Révision 199
TrouNoir/TrouNoir.py (revision 199) | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# |
|
3 |
# TrouNoir model using PyOpenCL |
|
4 |
# |
|
5 |
# CC BY-NC-SA 2011 : <emmanuel.quemener@ens-lyon.fr> |
|
6 |
# |
|
7 |
# Thanks to Andreas Klockner for PyOpenCL: |
|
8 |
# http://mathema.tician.de/software/pyopencl |
|
9 |
# |
|
10 |
# Interesting links: |
|
11 |
# http://viennacl.sourceforge.net/viennacl-documentation.html |
|
12 |
# http://enja.org/2011/02/22/adventures-in-pyopencl-part-1-getting-started-with-python/ |
|
13 |
|
|
14 |
import pyopencl as cl |
|
15 |
import numpy |
|
16 |
from PIL import Image |
|
17 |
import time,string |
|
18 |
from numpy.random import randint as nprnd |
|
19 |
import sys |
|
20 |
import getopt |
|
21 |
import matplotlib.pyplot as plt |
|
22 |
|
|
23 |
KERNEL_CODE=string.Template(""" |
|
24 |
|
|
25 |
#define PI (float)3.14159265359 |
|
26 |
#define nbr 200 |
|
27 |
|
|
28 |
float atanp(float x,float y) |
|
29 |
{ |
|
30 |
float angle; |
|
31 |
|
|
32 |
angle=atan2(y,x); |
|
33 |
|
|
34 |
if (angle<0) |
|
35 |
{ |
|
36 |
angle+=(float)2.e0f*PI; |
|
37 |
} |
|
38 |
|
|
39 |
return angle; |
|
40 |
} |
|
41 |
|
|
42 |
float f(float v) |
|
43 |
{ |
|
44 |
return v; |
|
45 |
} |
|
46 |
|
|
47 |
float g(float u,float m,float b) |
|
48 |
{ |
|
49 |
return (3.*m/b*pow(u,2)-u); |
|
50 |
} |
|
51 |
|
|
52 |
|
|
53 |
void calcul(float *us,float *vs,float up,float vp, |
|
54 |
float h,float m,float b) |
|
55 |
{ |
|
56 |
float c0,c1,c2,c3,d0,d1,d2,d3; |
|
57 |
|
|
58 |
c0=h*f(vp); |
|
59 |
c1=h*f(vp+c0/2.); |
|
60 |
c2=h*f(vp+c1/2.); |
|
61 |
c3=h*f(vp+c2); |
|
62 |
d0=h*g(up,m,b); |
|
63 |
d1=h*g(up+d0/2.,m,b); |
|
64 |
d2=h*g(up+d1/2.,m,b); |
|
65 |
d3=h*g(up+d2,m,b); |
|
66 |
|
|
67 |
*us=up+(c0+2.*c1+2.*c2+c3)/6.; |
|
68 |
*vs=vp+(d0+2.*d1+2.*d2+d3)/6.; |
|
69 |
} |
|
70 |
|
|
71 |
void rungekutta(float *ps,float *us,float *vs, |
|
72 |
float pp,float up,float vp, |
|
73 |
float h,float m,float b) |
|
74 |
{ |
|
75 |
calcul(us,vs,up,vp,h,m,b); |
|
76 |
*ps=pp+h; |
|
77 |
} |
|
78 |
|
|
79 |
float decalage_spectral(float r,float b,float phi, |
|
80 |
float tho,float m) |
|
81 |
{ |
|
82 |
return (sqrt(1-3*m/r)/(1+sqrt(m/pow(r,3))*b*sin(tho)*sin(phi))); |
|
83 |
} |
|
84 |
|
|
85 |
float spectre(float rf,int q,float b,float db, |
|
86 |
float h,float r,float m,float bss) |
|
87 |
{ |
|
88 |
float flx; |
|
89 |
int fi; |
|
90 |
|
|
91 |
fi=(int)(rf*nbr/bss); |
|
92 |
flx=pow(r/m,q)*pow(rf,4)*b*db*h; |
|
93 |
return(flx); |
|
94 |
} |
|
95 |
|
|
96 |
float spectre_cn(float rf,float b,float db, |
|
97 |
float h,float r,float m,float bss) |
|
98 |
{ |
|
99 |
float flx; |
|
100 |
float nu_rec,nu_em,qu,v,temp,temp_em,flux_int,m_point,planck,c,k; |
|
101 |
int fi,posfreq; |
|
102 |
|
|
103 |
planck=6.62e-34; |
|
104 |
k=1.38e-23; |
|
105 |
temp=3.e7; |
|
106 |
m_point=1.e14; |
|
107 |
v=1.-3./r; |
|
108 |
|
|
109 |
qu=1/sqrt(1-3./r)/sqrt(r)*(sqrt(r)-sqrt(6.)+sqrt(3.)/2.*log((sqrt(r)+sqrt(3.))/(sqrt(r)-sqrt(3.))*(sqrt(6.)-sqrt(3.))/(sqrt(6.)+sqrt(3.)))); |
|
110 |
|
|
111 |
temp_em=temp*sqrt(m)*exp(0.25*log(m_point))*exp(-0.75*log(r))* |
|
112 |
exp(-0.125*log(v))*exp(0.25*log(qu)); |
|
113 |
|
|
114 |
flux_int=0; |
|
115 |
flx=0; |
|
116 |
|
|
117 |
for (fi=1;fi<nbr;fi++) |
|
118 |
{ |
|
119 |
nu_em=bss*fi/nbr; |
|
120 |
nu_rec=nu_em*rf; |
|
121 |
posfreq=1./bss*nu_rec*nbr; |
|
122 |
if ((posfreq>0)&&(posfreq<nbr)) |
|
123 |
{ |
|
124 |
flux_int=2*planck/9.e16f*pow(nu_em,3)/(exp(planck*nu_em/k/temp_em)-1.)*pow(rf,3)*b*db*h; |
|
125 |
flx+=flux_int; |
|
126 |
} |
|
127 |
} |
|
128 |
return(flx); |
|
129 |
} |
|
130 |
|
|
131 |
void impact(float phi,float r,float b,float tho,float m, |
|
132 |
float *zp,float *fp, |
|
133 |
int q,float db, |
|
134 |
float h,float bss,int raie) |
|
135 |
{ |
|
136 |
float flx,rf; |
|
137 |
|
|
138 |
rf=decalage_spectral(r,b,phi,tho,m); |
|
139 |
|
|
140 |
if (raie==0) |
|
141 |
{ |
|
142 |
flx=spectre(rf,q,b,db,h,r,m,bss); |
|
143 |
} |
|
144 |
else |
|
145 |
{ |
|
146 |
flx=spectre_cn(rf,b,db,h,r,m,bss); |
|
147 |
} |
|
148 |
|
|
149 |
*zp=1./rf; |
|
150 |
*fp=flx; |
|
151 |
|
|
152 |
} |
|
153 |
|
|
154 |
__kernel void EachPixel(__global float *MyImage) |
|
155 |
{ |
|
156 |
uint xi=(uint)get_global_id(0); |
|
157 |
uint yi=(uint)get_global_id(1); |
|
158 |
uint sizex=(uint)get_global_size(0); |
|
159 |
uint sizey=(uint)get_global_size(1); |
|
160 |
|
|
161 |
// Perform trajectory for each pixel |
|
162 |
|
|
163 |
float m,rs,ri,re,tho,ro; |
|
164 |
int q; |
|
165 |
|
|
166 |
float bss,stp; |
|
167 |
int nmx,dim; |
|
168 |
float d,bmx,db,b,h; |
|
169 |
float up,vp,pp; |
|
170 |
float us,vs,ps; |
|
171 |
float rp[2000]; |
|
172 |
float zmx,fmx,zen,fen; |
|
173 |
float flux_tot,impc_tot; |
|
174 |
float phi,thi,thx,phd,php,nr,r; |
|
175 |
int ni,ii,i,imx,j,n,tst,dist,raie,pc,fcl,zcl; |
|
176 |
int nh; |
|
177 |
int ExitOnImpact; |
|
178 |
float zp,fp; |
|
179 |
|
|
180 |
m=1.; |
|
181 |
rs=2.*m; |
|
182 |
ri=3.*rs; |
|
183 |
re=12.; |
|
184 |
ro=100.; |
|
185 |
//tho=PI/180.*80; |
|
186 |
tho=PI/180.*0.; |
|
187 |
q=-2; |
|
188 |
dist=1; |
|
189 |
raie=0; |
|
190 |
|
|
191 |
nmx=(float)sizex/2.; |
|
192 |
stp=0.5; |
|
193 |
// Autosize for image |
|
194 |
bmx=1.25*re; |
|
195 |
b=0.; |
|
196 |
thx=asin(bmx/ro); |
|
197 |
pc=0; |
|
198 |
|
|
199 |
h=PI/5.e2f; |
|
200 |
|
|
201 |
// set origin as center of image |
|
202 |
float x=(float)xi-(float)(sizex/2)+(float)5e-1f; |
|
203 |
float y=(float)(sizey/2)-(float)yi-(float)5e-1f; |
|
204 |
// angle extracted from cylindric symmetry |
|
205 |
phi=atanp(x,y); |
|
206 |
phd=atanp(cos(phi)*sin(tho),cos(tho)); |
|
207 |
|
|
208 |
if (dist==1) |
|
209 |
{ |
|
210 |
db=bmx/nmx; |
|
211 |
// impact parameter |
|
212 |
b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx; |
|
213 |
up=0.; |
|
214 |
vp=1.; |
|
215 |
} |
|
216 |
else |
|
217 |
{ |
|
218 |
b=sqrt(x*x+y*y)*(float)2.e0f/(float)sizex*bmx; |
|
219 |
thi=asin(b/ro); |
|
220 |
db=bmx/nmx; |
|
221 |
up=sin(thi); |
|
222 |
vp=cos(thi); |
|
223 |
} |
|
224 |
|
|
225 |
pp=0.; |
|
226 |
nh=0; |
|
227 |
|
|
228 |
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); |
|
229 |
|
|
230 |
rp[nh]=fabs(b/us); |
|
231 |
|
|
232 |
ExitOnImpact=0; |
|
233 |
|
|
234 |
do |
|
235 |
{ |
|
236 |
nh++; |
|
237 |
pp=ps; |
|
238 |
up=us; |
|
239 |
vp=vs; |
|
240 |
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); |
|
241 |
rp[nh]=fabs(b/us); |
|
242 |
ExitOnImpact = (((fmod(pp,PI)<fmod(phd,PI))&&(fmod(ps,PI)>fmod(phd,PI)))||((fmod(ps,PI)<fmod(phd,PI))&&(fmod(pp,PI)>fmod(phd,PI))))&&(rp[nh]>ri)&&(rp[nh]<re)?1:0; |
|
243 |
|
|
244 |
} while ((rp[nh]>=rs)&&(rp[nh]<=rp[0])&&(ExitOnImpact==0)); |
|
245 |
|
|
246 |
if (ExitOnImpact==1) { |
|
247 |
impact(phi,rp[nh-1],b,tho,m,&zp,&fp,q,db,h,bss,raie); |
|
248 |
} |
|
249 |
else |
|
250 |
{ |
|
251 |
zp=0.; |
|
252 |
} |
|
253 |
|
|
254 |
// |
|
255 |
MyImage[yi+sizex*xi]=(float)zp; |
|
256 |
|
|
257 |
barrier(CLK_LOCAL_MEM_FENCE); |
|
258 |
|
|
259 |
} |
|
260 |
""") |
|
261 |
|
|
262 |
def ImageOutput(sigma,prefix): |
|
263 |
Max=sigma.max() |
|
264 |
Min=sigma.min() |
|
265 |
|
|
266 |
# Normalize value as 8bits Integer |
|
267 |
SigmaInt=(255*(sigma-Min)/(Max-Min)).astype('uint8') |
|
268 |
image = Image.fromarray(SigmaInt) |
|
269 |
image.save("%s.jpg" % prefix) |
|
270 |
|
|
271 |
def BlackHole(MyImage,Device): |
|
272 |
|
|
273 |
kernel_params = {} |
|
274 |
|
|
275 |
# Je detecte un peripherique GPU dans la liste des peripheriques |
|
276 |
Id=0 |
|
277 |
HasXPU=False |
|
278 |
for platform in cl.get_platforms(): |
|
279 |
for device in platform.get_devices(): |
|
280 |
if Id==Device: |
|
281 |
XPU=device |
|
282 |
print "CPU/GPU selected: ",device.name.lstrip() |
|
283 |
HasXPU=True |
|
284 |
Id+=1 |
|
285 |
|
|
286 |
if HasXPU==False: |
|
287 |
print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1) |
|
288 |
sys.exit() |
|
289 |
|
|
290 |
ctx = cl.Context([XPU]) |
|
291 |
queue = cl.CommandQueue(ctx, |
|
292 |
properties=cl.command_queue_properties.PROFILING_ENABLE) |
|
293 |
|
|
294 |
BlackHoleCL = cl.Program(ctx,KERNEL_CODE.substitute(kernel_params)).build() |
|
295 |
|
|
296 |
# Je recupere les flag possibles pour les buffers |
|
297 |
mf = cl.mem_flags |
|
298 |
|
|
299 |
print(MyImage) |
|
300 |
|
|
301 |
# Program based on Kernel2 |
|
302 |
MyImageCL = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=MyImage) |
|
303 |
|
|
304 |
start_time=time.time() |
|
305 |
|
|
306 |
CLLaunch=BlackHoleCL.EachPixel(queue,(MyImage.shape[0],MyImage.shape[1]),None,MyImageCL) |
|
307 |
|
|
308 |
CLLaunch.wait() |
|
309 |
elapsed = time.time()-start_time |
|
310 |
print("Elapsed %f: " % elapsed) |
|
311 |
|
|
312 |
cl.enqueue_copy(queue,MyImage,MyImageCL).wait() |
|
313 |
|
|
314 |
print(MyImage) |
|
315 |
MyImageCL.release() |
|
316 |
|
|
317 |
ImageOutput(MyImage,"TrouNoir") |
|
318 |
return(elapsed) |
|
319 |
|
|
320 |
if __name__=='__main__': |
|
321 |
|
|
322 |
GpuStyle = 'OpenCL' |
|
323 |
Mass = 1. |
|
324 |
# Internal Radius 3 times de Schwarzschild Radius |
|
325 |
InternalRadius=6.*Mass |
|
326 |
# |
|
327 |
ExternalRadius=12. |
|
328 |
# |
|
329 |
ObserverDistance=100. |
|
330 |
# Angle with normal to disc 10 degrees |
|
331 |
Angle = numpy.pi/180.*(90.-10.) |
|
332 |
# Radiation of disc : BlackBody or Monochromatic |
|
333 |
BlackBody = True |
|
334 |
# Camera : Angular Camera or plate with the size of camera |
|
335 |
AngularCamera = True |
|
336 |
# Size of image |
|
337 |
Size=256 |
|
338 |
# Variable Type |
|
339 |
VariableType='FP32' |
|
340 |
# ? |
|
341 |
q=-2 |
|
342 |
|
|
343 |
HowToUse='%s -h [Help] -b [BlackBodyEmission] -c [AngularCamera] -s <SizeInPixels> -m <Mass> -i <DiscInternalRadius> -x <DiscExternalRadius> -o <ObservatorDistance> -a <AngleAboveDisc> -d <DeviceId> -g <CUDA/OpenCL> -t <FP32/FP64>' |
|
344 |
|
|
345 |
try: |
|
346 |
opts, args = getopt.getopt(sys.argv[1:],"hbcs:m:i:x:o:a:d:g:t:",["blackbody","camera","size=","mass=","internal=","external=","observer=","angle=","device=","gpustyle=","variabletype="]) |
|
347 |
except getopt.GetoptError: |
|
348 |
print(HowToUse % sys.argv[0]) |
|
349 |
sys.exit(2) |
|
350 |
|
|
351 |
# List of Devices |
|
352 |
Devices=[] |
|
353 |
Alu={} |
|
354 |
|
|
355 |
for opt, arg in opts: |
|
356 |
if opt == '-h': |
|
357 |
print(HowToUse % sys.argv[0]) |
|
358 |
|
|
359 |
print("\nInformations about devices detected under OpenCL API:") |
|
360 |
# For PyOpenCL import |
|
361 |
try: |
|
362 |
import pyopencl as cl |
|
363 |
Id=0 |
|
364 |
for platform in cl.get_platforms(): |
|
365 |
for device in platform.get_devices(): |
|
366 |
#deviceType=cl.device_type.to_string(device.type) |
|
367 |
deviceType="xPU" |
|
368 |
print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip())) |
|
369 |
Id=Id+1 |
|
370 |
|
|
371 |
except: |
|
372 |
print("Your platform does not seem to support OpenCL") |
|
373 |
|
|
374 |
print("\nInformations about devices detected under CUDA API:") |
|
375 |
# For PyCUDA import |
|
376 |
try: |
|
377 |
import pycuda.driver as cuda |
|
378 |
cuda.init() |
|
379 |
for Id in range(cuda.Device.count()): |
|
380 |
device=cuda.Device(Id) |
|
381 |
print("Device #%i of type GPU : %s" % (Id,device.name())) |
|
382 |
|
|
383 |
except: |
|
384 |
print("Your platform does not seem to support CUDA") |
|
385 |
|
|
386 |
sys.exit() |
|
387 |
|
|
388 |
elif opt in ("-d", "--device"): |
|
389 |
# Devices.append(int(arg)) |
|
390 |
Device=int(arg) |
|
391 |
elif opt in ("-g", "--gpustyle"): |
|
392 |
GpuStyle = arg |
|
393 |
elif opt in ("-t", "--variabletype"): |
|
394 |
VariableType = arg |
|
395 |
elif opt in ("-s", "--size"): |
|
396 |
Size = int(arg) |
|
397 |
elif opt in ("-m", "--mass"): |
|
398 |
Mass = float(arg) |
|
399 |
elif opt in ("-i", "--internal"): |
|
400 |
InternalRadius = float(arg) |
|
401 |
elif opt in ("-e", "--external"): |
|
402 |
ExternalRadius = float(arg) |
|
403 |
elif opt in ("-o", "--observer"): |
|
404 |
ObserverDistance = float(arg) |
|
405 |
elif opt in ("-a", "--angle"): |
|
406 |
Angle = numpy.pi/180.*(90.-float(arg)) |
|
407 |
elif opt in ("-b", "--blackbody"): |
|
408 |
BlackBody = True |
|
409 |
elif opt in ("-c", "--camera"): |
|
410 |
AngularCamera = True |
|
411 |
|
|
412 |
print("Device Identification selected : %s" % Device) |
|
413 |
print("GpuStyle used : %s" % GpuStyle) |
|
414 |
print("VariableType : %s" % VariableType) |
|
415 |
print("Size : %i" % Size) |
|
416 |
print("Mass : %f" % Mass) |
|
417 |
print("Internal Radius : %f" % InternalRadius) |
|
418 |
print("External Radius : %f" % ExternalRadius) |
|
419 |
print("Observer Distance : %f" % ObserverDistance) |
|
420 |
print("Angle with normal of (in radians) : %f" % Angle) |
|
421 |
print("Black Body Disc Emission (monochromatic instead) : %s" % BlackBody) |
|
422 |
print("Angular Camera (dimension of object instead) : %s" % AngularCamera) |
|
423 |
|
|
424 |
if GpuStyle=='CUDA': |
|
425 |
print("\nSelection of CUDA device") |
|
426 |
try: |
|
427 |
# For PyCUDA import |
|
428 |
import pycuda.driver as cuda |
|
429 |
|
|
430 |
cuda.init() |
|
431 |
for Id in range(cuda.Device.count()): |
|
432 |
device=cuda.Device(Id) |
|
433 |
print("Device #%i of type GPU : %s" % (Id,device.name())) |
|
434 |
if Id in Devices: |
|
435 |
Alu[Id]='GPU' |
|
436 |
|
|
437 |
except ImportError: |
|
438 |
print("Platform does not seem to support CUDA") |
|
439 |
|
|
440 |
if GpuStyle=='OpenCL': |
|
441 |
print("\nSelection of OpenCL device") |
|
442 |
try: |
|
443 |
# For PyOpenCL import |
|
444 |
import pyopencl as cl |
|
445 |
Id=0 |
|
446 |
for platform in cl.get_platforms(): |
|
447 |
for device in platform.get_devices(): |
|
448 |
#deviceType=cl.device_type.to_string(device.type) |
|
449 |
deviceType="xPU" |
|
450 |
print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip())) |
|
451 |
|
|
452 |
if Id in Devices: |
|
453 |
# Set the Alu as detected Device Type |
|
454 |
Alu[Id]=deviceType |
|
455 |
Id=Id+1 |
|
456 |
except ImportError: |
|
457 |
print("Platform does not seem to support OpenCL") |
|
458 |
|
|
459 |
# print(Devices,Alu) |
|
460 |
|
|
461 |
# MyImage=numpy.where(numpy.random.zeros(Size,Size)>0,1,-1).astype(numpy.float32) |
|
462 |
MyImage=numpy.zeros((Size,Size),dtype=numpy.float32) |
|
463 |
|
|
464 |
print(MyImage) |
|
465 |
|
|
466 |
duration=BlackHole(MyImage,Device) |
|
467 |
|
|
468 |
|
|
0 | 469 |
Formats disponibles : Unified diff