root / TrouNoir / TrouNoir.py @ 201
Historique | Voir | Annoter | Télécharger (12,35 ko)
1 |
#!/usr/bin/env python
|
---|---|
2 |
#
|
3 |
# TrouNoir model using PyOpenCL
|
4 |
#
|
5 |
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr>
|
6 |
#
|
7 |
# Thanks to Andreas Klockner for PyOpenCL:
|
8 |
# http://mathema.tician.de/software/pyopencl
|
9 |
#
|
10 |
# Interesting links:
|
11 |
# http://viennacl.sourceforge.net/viennacl-documentation.html
|
12 |
# http://enja.org/2011/02/22/adventures-in-pyopencl-part-1-getting-started-with-python/
|
13 |
|
14 |
import pyopencl as cl |
15 |
import numpy |
16 |
from PIL import Image |
17 |
import time,string |
18 |
from numpy.random import randint as nprnd |
19 |
import sys |
20 |
import getopt |
21 |
import matplotlib.pyplot as plt |
22 |
|
23 |
KERNEL_CODE=string.Template("""
|
24 |
|
25 |
#define PI (float)3.14159265359
|
26 |
#define nbr 200
|
27 |
|
28 |
float atanp(float x,float y)
|
29 |
{
|
30 |
float angle;
|
31 |
|
32 |
angle=atan2(y,x);
|
33 |
|
34 |
if (angle<0)
|
35 |
{
|
36 |
angle+=(float)2.e0f*PI;
|
37 |
}
|
38 |
|
39 |
return angle;
|
40 |
}
|
41 |
|
42 |
float f(float v)
|
43 |
{
|
44 |
return v;
|
45 |
}
|
46 |
|
47 |
float g(float u,float m,float b)
|
48 |
{
|
49 |
return (3.*m/b*pow(u,2)-u);
|
50 |
}
|
51 |
|
52 |
|
53 |
void calcul(float *us,float *vs,float up,float vp,
|
54 |
float h,float m,float b)
|
55 |
{
|
56 |
float c0,c1,c2,c3,d0,d1,d2,d3;
|
57 |
|
58 |
c0=h*f(vp);
|
59 |
c1=h*f(vp+c0/2.);
|
60 |
c2=h*f(vp+c1/2.);
|
61 |
c3=h*f(vp+c2);
|
62 |
d0=h*g(up,m,b);
|
63 |
d1=h*g(up+d0/2.,m,b);
|
64 |
d2=h*g(up+d1/2.,m,b);
|
65 |
d3=h*g(up+d2,m,b);
|
66 |
|
67 |
*us=up+(c0+2.*c1+2.*c2+c3)/6.;
|
68 |
*vs=vp+(d0+2.*d1+2.*d2+d3)/6.;
|
69 |
}
|
70 |
|
71 |
void rungekutta(float *ps,float *us,float *vs,
|
72 |
float pp,float up,float vp,
|
73 |
float h,float m,float b)
|
74 |
{
|
75 |
calcul(us,vs,up,vp,h,m,b);
|
76 |
*ps=pp+h;
|
77 |
}
|
78 |
|
79 |
float decalage_spectral(float r,float b,float phi,
|
80 |
float tho,float m)
|
81 |
{
|
82 |
return (sqrt(1-3*m/r)/(1+sqrt(m/pow(r,3))*b*sin(tho)*sin(phi)));
|
83 |
}
|
84 |
|
85 |
float spectre(float rf,int q,float b,float db,
|
86 |
float h,float r,float m,float bss)
|
87 |
{
|
88 |
float flx;
|
89 |
|
90 |
flx=pow(r/m,q)*pow(rf,4)*b;
|
91 |
return(flx);
|
92 |
}
|
93 |
|
94 |
float spectre_cn(float rf,float b,float db,
|
95 |
float h,float r,float m,float bss)
|
96 |
{
|
97 |
double flx;
|
98 |
double nu_rec,nu_em,qu,v,temp,temp_em,flux_int,m_point,planck,c,k,psc2,psk;
|
99 |
int fi,posfreq;
|
100 |
|
101 |
planck=6.62e-34;
|
102 |
k=1.38e-23;
|
103 |
psk=5.38e-11;
|
104 |
psc2=7.35e-51;
|
105 |
temp=3.e7;
|
106 |
m_point=1.e14;
|
107 |
v=1.-3./r;
|
108 |
|
109 |
qu=1./sqrt(1.-3./r)/sqrt(r)*(sqrt(r)-sqrt(6.)+sqrt(3.)/2.*log((sqrt(r)+sqrt(3.))/(sqrt(r)-sqrt(3.))*(sqrt(6.)-sqrt(3.))/(sqrt(6.)+sqrt(3.))));
|
110 |
|
111 |
temp_em=temp*sqrt(m)*exp(0.25*log(m_point))*exp(-0.75*log(r))*exp(-0.125*log(v))*exp(0.25*log(qu));
|
112 |
|
113 |
flux_int=0;
|
114 |
flx=0;
|
115 |
|
116 |
for (fi=0;fi<nbr;fi++)
|
117 |
{
|
118 |
nu_em=bss*(float)fi/(float)nbr;
|
119 |
nu_rec=nu_em*rf;
|
120 |
posfreq=(int)(1./bss*nu_rec*(float)nbr);
|
121 |
if ((posfreq>0)&&(posfreq<nbr))
|
122 |
{
|
123 |
//flux_int=2*planck/9.e16f*pow(nu_em,3)/(exp(planck*nu_em/k/temp_em)-1.)*pow(rf,3)*b*db*h;
|
124 |
//flux_int=pow(nu_em,3)/(exp(planck*nu_em/k/temp_em)-1.)*pow(rf,3)*b;
|
125 |
flux_int=2.*psc2*pow(nu_em,3)/(exp(psk*nu_em/temp_em)-1.)*pow(rf,3)*b;
|
126 |
flx+=flux_int;
|
127 |
}
|
128 |
}
|
129 |
//return(pow(rf,3)*b*temp_em);
|
130 |
return((float)flx);
|
131 |
return(flx);
|
132 |
}
|
133 |
|
134 |
void impact(float phi,float r,float b,float tho,float m,
|
135 |
float *zp,float *fp,
|
136 |
int q,float db,
|
137 |
float h,float bss,int raie)
|
138 |
{
|
139 |
float flx,rf;
|
140 |
|
141 |
rf=decalage_spectral(r,b,phi,tho,m);
|
142 |
|
143 |
if (raie==0)
|
144 |
{
|
145 |
flx=spectre(rf,q,b,db,h,r,m,bss);
|
146 |
}
|
147 |
else
|
148 |
{
|
149 |
flx=spectre_cn(rf,b,db,h,r,m,bss);
|
150 |
}
|
151 |
|
152 |
*zp=1./rf;
|
153 |
*fp=flx;
|
154 |
|
155 |
}
|
156 |
|
157 |
__kernel void EachPixel(__global float *zImage,__global float *fImage)
|
158 |
{
|
159 |
uint xi=(uint)get_global_id(0);
|
160 |
uint yi=(uint)get_global_id(1);
|
161 |
uint sizex=(uint)get_global_size(0);
|
162 |
uint sizey=(uint)get_global_size(1);
|
163 |
|
164 |
// Perform trajectory for each pixel
|
165 |
|
166 |
float m,rs,ri,re,tho,ro;
|
167 |
int q;
|
168 |
|
169 |
float bss,stp;
|
170 |
int nmx,dim;
|
171 |
float d,bmx,db,b,h;
|
172 |
float up,vp,pp;
|
173 |
float us,vs,ps;
|
174 |
float rp[2000];
|
175 |
float zmx,fmx,zen,fen;
|
176 |
float flux_tot,impc_tot;
|
177 |
float phi,thi,thx,phd,php,nr,r;
|
178 |
int ni,ii,i,imx,j,n,tst,dist,raie,pc,fcl,zcl;
|
179 |
int nh;
|
180 |
int ExitOnImpact;
|
181 |
float zp,fp;
|
182 |
|
183 |
m=1.;
|
184 |
rs=2.*m;
|
185 |
ri=3.*rs;
|
186 |
re=12.;
|
187 |
ro=100.;
|
188 |
//tho=PI/180.*80;
|
189 |
tho=PI/180.*80.;
|
190 |
q=-2;
|
191 |
dist=0;
|
192 |
raie=0;
|
193 |
|
194 |
nmx=(float)sizex/2.;
|
195 |
stp=0.5;
|
196 |
// Autosize for image
|
197 |
bmx=1.25*re;
|
198 |
b=0.;
|
199 |
thx=asin(bmx/ro);
|
200 |
pc=0;
|
201 |
|
202 |
if (raie==0)
|
203 |
{
|
204 |
bss=2.;
|
205 |
}
|
206 |
else
|
207 |
{
|
208 |
bss=1.e6;
|
209 |
}
|
210 |
|
211 |
h=PI/5.e2f;
|
212 |
|
213 |
// set origin as center of image
|
214 |
float x=(float)xi-(float)(sizex/2)+(float)5e-1f;
|
215 |
float y=(float)yi-(float)(sizey/2)+(float)5e-1f;
|
216 |
// angle extracted from cylindric symmetry
|
217 |
phi=atanp(x,y);
|
218 |
phd=atanp(cos(phi)*sin(tho),cos(tho));
|
219 |
|
220 |
if (dist==1)
|
221 |
{
|
222 |
db=bmx/nmx;
|
223 |
// impact parameter
|
224 |
b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx;
|
225 |
up=0.;
|
226 |
vp=1.;
|
227 |
}
|
228 |
else
|
229 |
{
|
230 |
b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx;
|
231 |
thi=asin(b/ro);
|
232 |
db=bmx/nmx;
|
233 |
up=sin(thi);
|
234 |
vp=cos(thi);
|
235 |
}
|
236 |
|
237 |
pp=0.;
|
238 |
nh=0;
|
239 |
|
240 |
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
|
241 |
|
242 |
rp[nh]=fabs(b/us);
|
243 |
|
244 |
ExitOnImpact=0;
|
245 |
|
246 |
do
|
247 |
{
|
248 |
nh++;
|
249 |
pp=ps;
|
250 |
up=us;
|
251 |
vp=vs;
|
252 |
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b);
|
253 |
rp[nh]=fabs(b/us);
|
254 |
ExitOnImpact = ((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))&&(rp[nh]>ri)&&(rp[nh]<re)?1:0;
|
255 |
|
256 |
} while ((rp[nh]>=rs)&&(rp[nh]<=rp[0])&&(ExitOnImpact==0));
|
257 |
|
258 |
if (ExitOnImpact==1) {
|
259 |
impact(phi,rp[nh-1],b,tho,m,&zp,&fp,q,db,h,bss,raie);
|
260 |
}
|
261 |
else
|
262 |
{
|
263 |
zp=0.;
|
264 |
fp=0.;
|
265 |
}
|
266 |
|
267 |
//
|
268 |
zImage[yi+sizex*xi]=(float)zp;
|
269 |
fImage[yi+sizex*xi]=(float)fp;
|
270 |
|
271 |
barrier(CLK_GLOBAL_MEM_FENCE);
|
272 |
|
273 |
}
|
274 |
""")
|
275 |
|
276 |
def ImageOutput(sigma,prefix): |
277 |
Max=sigma.max() |
278 |
Min=sigma.min() |
279 |
|
280 |
# Normalize value as 8bits Integer
|
281 |
SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8') |
282 |
image = Image.fromarray(SigmaInt) |
283 |
image.save("%s.jpg" % prefix)
|
284 |
|
285 |
def BlackHole(zImage,fImage,Device): |
286 |
|
287 |
kernel_params = {} |
288 |
|
289 |
# Je detecte un peripherique GPU dans la liste des peripheriques
|
290 |
Id=0
|
291 |
HasXPU=False
|
292 |
for platform in cl.get_platforms(): |
293 |
for device in platform.get_devices(): |
294 |
if Id==Device:
|
295 |
XPU=device |
296 |
print "CPU/GPU selected: ",device.name.lstrip() |
297 |
HasXPU=True
|
298 |
Id+=1
|
299 |
|
300 |
if HasXPU==False: |
301 |
print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1) |
302 |
sys.exit() |
303 |
|
304 |
ctx = cl.Context([XPU]) |
305 |
queue = cl.CommandQueue(ctx, |
306 |
properties=cl.command_queue_properties.PROFILING_ENABLE) |
307 |
|
308 |
BlackHoleCL = cl.Program(ctx,KERNEL_CODE.substitute(kernel_params)).build() |
309 |
|
310 |
# Je recupere les flag possibles pour les buffers
|
311 |
mf = cl.mem_flags |
312 |
|
313 |
print(zImage) |
314 |
|
315 |
# Program based on Kernel2
|
316 |
zImageCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=zImage) |
317 |
fImageCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=fImage) |
318 |
|
319 |
start_time=time.time() |
320 |
|
321 |
CLLaunch=BlackHoleCL.EachPixel(queue,(numpy.int32(zImage.shape[0]),numpy.int32(zImage.shape[1])),None,zImageCL,fImageCL) |
322 |
|
323 |
CLLaunch.wait() |
324 |
elapsed = time.time()-start_time |
325 |
print("Elapsed %f: " % elapsed)
|
326 |
|
327 |
cl.enqueue_copy(queue,zImage,zImageCL).wait() |
328 |
cl.enqueue_copy(queue,fImage,fImageCL).wait() |
329 |
|
330 |
print(zImage.max()) |
331 |
print(fImage.max()) |
332 |
zImageCL.release() |
333 |
fImageCL.release() |
334 |
|
335 |
ImageOutput(zImage,"TrouNoirZ")
|
336 |
ImageOutput(fImage,"TrouNoirF")
|
337 |
return(elapsed)
|
338 |
|
339 |
if __name__=='__main__': |
340 |
|
341 |
GpuStyle = 'OpenCL'
|
342 |
Mass = 1.
|
343 |
# Internal Radius 3 times de Schwarzschild Radius
|
344 |
InternalRadius=6.*Mass
|
345 |
#
|
346 |
ExternalRadius=12.
|
347 |
#
|
348 |
ObserverDistance=100.
|
349 |
# Angle with normal to disc 10 degrees
|
350 |
Angle = numpy.pi/180.*(90.-10.) |
351 |
# Radiation of disc : BlackBody or Monochromatic
|
352 |
BlackBody = True
|
353 |
# Camera : Angular Camera or plate with the size of camera
|
354 |
AngularCamera = True
|
355 |
# Size of image
|
356 |
Size=256
|
357 |
# Variable Type
|
358 |
VariableType='FP32'
|
359 |
# ?
|
360 |
q=-2
|
361 |
|
362 |
HowToUse='%s -h [Help] -b [BlackBodyEmission] -c [AngularCamera] -s <SizeInPixels> -m <Mass> -i <DiscInternalRadius> -x <DiscExternalRadius> -o <ObservatorDistance> -a <AngleAboveDisc> -d <DeviceId> -g <CUDA/OpenCL> -t <FP32/FP64>'
|
363 |
|
364 |
try:
|
365 |
opts, args = getopt.getopt(sys.argv[1:],"hbcs:m:i:x:o:a:d:g:t:",["blackbody","camera","size=","mass=","internal=","external=","observer=","angle=","device=","gpustyle=","variabletype="]) |
366 |
except getopt.GetoptError:
|
367 |
print(HowToUse % sys.argv[0])
|
368 |
sys.exit(2)
|
369 |
|
370 |
# List of Devices
|
371 |
Devices=[] |
372 |
Alu={} |
373 |
|
374 |
for opt, arg in opts: |
375 |
if opt == '-h': |
376 |
print(HowToUse % sys.argv[0])
|
377 |
|
378 |
print("\nInformations about devices detected under OpenCL API:")
|
379 |
# For PyOpenCL import
|
380 |
try:
|
381 |
import pyopencl as cl |
382 |
Id=0
|
383 |
for platform in cl.get_platforms(): |
384 |
for device in platform.get_devices(): |
385 |
#deviceType=cl.device_type.to_string(device.type)
|
386 |
deviceType="xPU"
|
387 |
print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
|
388 |
Id=Id+1
|
389 |
|
390 |
except:
|
391 |
print("Your platform does not seem to support OpenCL")
|
392 |
|
393 |
print("\nInformations about devices detected under CUDA API:")
|
394 |
# For PyCUDA import
|
395 |
try:
|
396 |
import pycuda.driver as cuda |
397 |
cuda.init() |
398 |
for Id in range(cuda.Device.count()): |
399 |
device=cuda.Device(Id) |
400 |
print("Device #%i of type GPU : %s" % (Id,device.name()))
|
401 |
print
|
402 |
except:
|
403 |
print("Your platform does not seem to support CUDA")
|
404 |
|
405 |
sys.exit() |
406 |
|
407 |
elif opt in ("-d", "--device"): |
408 |
# Devices.append(int(arg))
|
409 |
Device=int(arg)
|
410 |
elif opt in ("-g", "--gpustyle"): |
411 |
GpuStyle = arg |
412 |
elif opt in ("-t", "--variabletype"): |
413 |
VariableType = arg |
414 |
elif opt in ("-s", "--size"): |
415 |
Size = int(arg)
|
416 |
elif opt in ("-m", "--mass"): |
417 |
Mass = float(arg)
|
418 |
elif opt in ("-i", "--internal"): |
419 |
InternalRadius = float(arg)
|
420 |
elif opt in ("-e", "--external"): |
421 |
ExternalRadius = float(arg)
|
422 |
elif opt in ("-o", "--observer"): |
423 |
ObserverDistance = float(arg)
|
424 |
elif opt in ("-a", "--angle"): |
425 |
Angle = numpy.pi/180.*(90.-float(arg)) |
426 |
elif opt in ("-b", "--blackbody"): |
427 |
BlackBody = True
|
428 |
elif opt in ("-c", "--camera"): |
429 |
AngularCamera = True
|
430 |
|
431 |
print("Device Identification selected : %s" % Device)
|
432 |
print("GpuStyle used : %s" % GpuStyle)
|
433 |
print("VariableType : %s" % VariableType)
|
434 |
print("Size : %i" % Size)
|
435 |
print("Mass : %f" % Mass)
|
436 |
print("Internal Radius : %f" % InternalRadius)
|
437 |
print("External Radius : %f" % ExternalRadius)
|
438 |
print("Observer Distance : %f" % ObserverDistance)
|
439 |
print("Angle with normal of (in radians) : %f" % Angle)
|
440 |
print("Black Body Disc Emission (monochromatic instead) : %s" % BlackBody)
|
441 |
print("Angular Camera (dimension of object instead) : %s" % AngularCamera)
|
442 |
|
443 |
if GpuStyle=='CUDA': |
444 |
print("\nSelection of CUDA device")
|
445 |
try:
|
446 |
# For PyCUDA import
|
447 |
import pycuda.driver as cuda |
448 |
|
449 |
cuda.init() |
450 |
for Id in range(cuda.Device.count()): |
451 |
device=cuda.Device(Id) |
452 |
print("Device #%i of type GPU : %s" % (Id,device.name()))
|
453 |
if Id in Devices: |
454 |
Alu[Id]='GPU'
|
455 |
|
456 |
except ImportError: |
457 |
print("Platform does not seem to support CUDA")
|
458 |
|
459 |
if GpuStyle=='OpenCL': |
460 |
print("\nSelection of OpenCL device")
|
461 |
try:
|
462 |
# For PyOpenCL import
|
463 |
import pyopencl as cl |
464 |
Id=0
|
465 |
for platform in cl.get_platforms(): |
466 |
for device in platform.get_devices(): |
467 |
#deviceType=cl.device_type.to_string(device.type)
|
468 |
deviceType="xPU"
|
469 |
print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
|
470 |
|
471 |
if Id in Devices: |
472 |
# Set the Alu as detected Device Type
|
473 |
Alu[Id]=deviceType |
474 |
Id=Id+1
|
475 |
except ImportError: |
476 |
print("Platform does not seem to support OpenCL")
|
477 |
|
478 |
# print(Devices,Alu)
|
479 |
|
480 |
# MyImage=numpy.where(numpy.random.zeros(Size,Size)>0,1,-1).astype(numpy.float32)
|
481 |
zImage=numpy.zeros((Size,Size),dtype=numpy.float32) |
482 |
fImage=numpy.zeros((Size,Size),dtype=numpy.float32) |
483 |
|
484 |
print(zImage) |
485 |
|
486 |
duration=BlackHole(zImage,fImage,Device) |
487 |
|
488 |
|