root / TrouNoir / trou_noir_OpenACC.c @ 290
Historique | Voir | Annoter | Télécharger (10,99 ko)
1 |
/*
|
---|---|
2 |
Programme original realise en Fortran 77 en mars 1994
|
3 |
pour les Travaux Pratiques de Modelisation Numerique
|
4 |
DEA d'astrophysique et techniques spatiales de Meudon
|
5 |
|
6 |
par Herve Aussel et Emmanuel Quemener
|
7 |
|
8 |
Conversion en C par Emmanuel Quemener en aout 1997
|
9 |
Modification par Emmanuel Quemener en aout 2019
|
10 |
|
11 |
Licence CC BY-NC-SA Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
|
12 |
|
13 |
Remerciements a :
|
14 |
|
15 |
- Herve Aussel pour sa procedure sur le spectre de corps noir
|
16 |
- Didier Pelat pour l'aide lors de ce travail
|
17 |
- Jean-Pierre Luminet pour son article de 1979
|
18 |
- Le Numerical Recipies pour ses recettes de calcul
|
19 |
- Luc Blanchet pour sa disponibilite lors de mes interrogations en RG
|
20 |
|
21 |
Compilation sous gcc ( Compilateur GNU sous Linux ) :
|
22 |
|
23 |
Version FP32 : gcc -fopenmp -O3 -ffast-math -DFP32 -o trou_noir_OMP_FP32 trou_noir_OpenMP.c -lm -lgomp
|
24 |
Version FP64 : gcc -fopenmp -O3 -ffast-math -FP64 -o trou_noir_OMP_FP64 trou_noir_OpenMP.c -lm -lgomp
|
25 |
|
26 |
Version FP32 : gcc -O3 -fopenacc -ffast-math -DFP32 -foffload=nvptx-none -foffload="-O3 -misa=sm_35 -lm" -o trou_noir_openacc_FP32 trou_noir_OpenACC.c -lm
|
27 |
Version FP64 : gcc -O3 -fopenacc -ffast-math -DFP64 -foffload=nvptx-none -foffload="-O3 -misa=sm_35 -lm" -o trou_noir_openacc_FP64 trou_noir_OpenACC.c -lm
|
28 |
|
29 |
*/
|
30 |
|
31 |
#include <stdio.h> |
32 |
#include <math.h> |
33 |
#include <stdlib.h> |
34 |
#include <string.h> |
35 |
#include <sys/time.h> |
36 |
#include <time.h> |
37 |
|
38 |
#define nbr 256 /* Nombre de colonnes du spectre */ |
39 |
|
40 |
#define PI 3.14159265359 |
41 |
|
42 |
#define TRACKPOINTS 2048 |
43 |
|
44 |
#if TYPE == FP64
|
45 |
#define MYFLOAT float |
46 |
#else
|
47 |
#define MYFLOAT double |
48 |
#endif
|
49 |
|
50 |
#pragma acc routine
|
51 |
MYFLOAT atanp(MYFLOAT x,MYFLOAT y) |
52 |
{ |
53 |
MYFLOAT angle; |
54 |
|
55 |
angle=atan2(y,x); |
56 |
|
57 |
if (angle<0) |
58 |
{ |
59 |
angle+=2*PI;
|
60 |
} |
61 |
|
62 |
return angle;
|
63 |
} |
64 |
|
65 |
#pragma acc routine
|
66 |
MYFLOAT f(MYFLOAT v) |
67 |
{ |
68 |
return v;
|
69 |
} |
70 |
|
71 |
#pragma acc routine
|
72 |
MYFLOAT g(MYFLOAT u,MYFLOAT m,MYFLOAT b) |
73 |
{ |
74 |
return (3.*m/b*pow(u,2)-u); |
75 |
} |
76 |
|
77 |
#pragma acc routine
|
78 |
void calcul(MYFLOAT *us,MYFLOAT *vs,MYFLOAT up,MYFLOAT vp,
|
79 |
MYFLOAT h,MYFLOAT m,MYFLOAT b) |
80 |
{ |
81 |
MYFLOAT c[4],d[4]; |
82 |
|
83 |
c[0]=h*f(vp);
|
84 |
c[1]=h*f(vp+c[0]/2.); |
85 |
c[2]=h*f(vp+c[1]/2.); |
86 |
c[3]=h*f(vp+c[2]); |
87 |
d[0]=h*g(up,m,b);
|
88 |
d[1]=h*g(up+d[0]/2.,m,b); |
89 |
d[2]=h*g(up+d[1]/2.,m,b); |
90 |
d[3]=h*g(up+d[2],m,b); |
91 |
|
92 |
*us=up+(c[0]+2.*c[1]+2.*c[2]+c[3])/6.; |
93 |
*vs=vp+(d[0]+2.*d[1]+2.*d[2]+d[3])/6.; |
94 |
} |
95 |
|
96 |
#pragma acc routine
|
97 |
void rungekutta(MYFLOAT *ps,MYFLOAT *us,MYFLOAT *vs,
|
98 |
MYFLOAT pp,MYFLOAT up,MYFLOAT vp, |
99 |
MYFLOAT h,MYFLOAT m,MYFLOAT b) |
100 |
{ |
101 |
calcul(us,vs,up,vp,h,m,b); |
102 |
*ps=pp+h; |
103 |
} |
104 |
|
105 |
#pragma acc routine
|
106 |
MYFLOAT decalage_spectral(MYFLOAT r,MYFLOAT b,MYFLOAT phi, |
107 |
MYFLOAT tho,MYFLOAT m) |
108 |
{ |
109 |
return (sqrt(1-3*m/r)/(1+sqrt(m/pow(r,3))*b*sin(tho)*sin(phi))); |
110 |
} |
111 |
|
112 |
#pragma acc routine
|
113 |
MYFLOAT spectre(MYFLOAT rf,MYFLOAT q,MYFLOAT b,MYFLOAT db, |
114 |
MYFLOAT h,MYFLOAT r,MYFLOAT m,MYFLOAT bss) |
115 |
{ |
116 |
MYFLOAT flx; |
117 |
|
118 |
flx=exp(q*log(r/m))*pow(rf,4)*b*db*h;
|
119 |
return(flx);
|
120 |
} |
121 |
|
122 |
#pragma acc routine
|
123 |
MYFLOAT spectre_cn(MYFLOAT rf,MYFLOAT b,MYFLOAT db, |
124 |
MYFLOAT h,MYFLOAT r,MYFLOAT m,MYFLOAT bss) |
125 |
{ |
126 |
|
127 |
MYFLOAT flx; |
128 |
MYFLOAT nu_rec,nu_em,qu,temp_em,flux_int; |
129 |
int fi,posfreq;
|
130 |
|
131 |
#define planck 6.62e-34 |
132 |
#define k 1.38e-23 |
133 |
#define c2 9.e16 |
134 |
#define temp 3.e7 |
135 |
#define m_point 1. |
136 |
|
137 |
#define lplanck (log(6.62)-34.*log(10.)) |
138 |
#define lk (log(1.38)-23.*log(10.)) |
139 |
#define lc2 (log(9.)+16.*log(10.)) |
140 |
|
141 |
MYFLOAT v=1.-3./r; |
142 |
|
143 |
qu=1./sqrt((1.-3./r)*r)*(sqrt(r)-sqrt(6.)+sqrt(3.)/2.*log((sqrt(r)+sqrt(3.))/(sqrt(r)-sqrt(3.))* 0.17157287525380988 )); |
144 |
|
145 |
temp_em=temp*sqrt(m)*exp(0.25*log(m_point)-0.75*log(r)-0.125*log(v)+0.25*log(fabs(qu))); |
146 |
|
147 |
flux_int=0.;
|
148 |
flx=0.;
|
149 |
|
150 |
for (fi=0;fi<nbr;fi++) |
151 |
{ |
152 |
nu_em=bss*(MYFLOAT)fi/(MYFLOAT)nbr; |
153 |
nu_rec=nu_em*rf; |
154 |
posfreq=(int)(nu_rec*(MYFLOAT)nbr/bss);
|
155 |
if ((posfreq>0)&&(posfreq<nbr)) |
156 |
{ |
157 |
flux_int=2.*planck/c2*pow(nu_em,3)/(exp(planck*nu_em/(k*temp_em))-1.)*exp(3.*log(rf))*b*db*h; |
158 |
flx+=flux_int; |
159 |
} |
160 |
} |
161 |
|
162 |
return((MYFLOAT)flx);
|
163 |
} |
164 |
|
165 |
#pragma acc routine
|
166 |
void impact(MYFLOAT d,MYFLOAT phi,int dim,MYFLOAT r,MYFLOAT b,MYFLOAT tho,MYFLOAT m, |
167 |
MYFLOAT *zp,MYFLOAT *fp, |
168 |
MYFLOAT q,MYFLOAT db, |
169 |
MYFLOAT h,MYFLOAT bss,int raie)
|
170 |
{ |
171 |
MYFLOAT xe,ye; |
172 |
int xi,yi;
|
173 |
MYFLOAT flx,rf; |
174 |
xe=d*sin(phi); |
175 |
ye=-d*cos(phi); |
176 |
|
177 |
xi=(int)xe+dim/2; |
178 |
yi=(int)ye+dim/2; |
179 |
|
180 |
rf=decalage_spectral(r,b,phi,tho,m); |
181 |
|
182 |
if (raie==0) |
183 |
{ |
184 |
bss=1.e19;
|
185 |
flx=spectre_cn(rf,b,db,h,r,m,bss); |
186 |
} |
187 |
else
|
188 |
{ |
189 |
bss=2.;
|
190 |
flx=spectre(rf,q,b,db,h,r,m,bss); |
191 |
} |
192 |
|
193 |
if (zp[xi+dim*yi]==0.) |
194 |
{ |
195 |
zp[xi+dim*yi]=1./rf;
|
196 |
} |
197 |
|
198 |
if (fp[xi+dim*yi]==0.) |
199 |
{ |
200 |
fp[xi+dim*yi]=flx; |
201 |
} |
202 |
|
203 |
} |
204 |
|
205 |
void sauvegarde_pgm(char nom[24],unsigned int *image,int dim) |
206 |
{ |
207 |
FILE *sortie; |
208 |
unsigned long i,j; |
209 |
|
210 |
sortie=fopen(nom,"w");
|
211 |
|
212 |
fprintf(sortie,"P5\n");
|
213 |
fprintf(sortie,"%i %i\n",dim,dim);
|
214 |
fprintf(sortie,"255\n");
|
215 |
|
216 |
for (j=0;j<dim;j++) for (i=0;i<dim;i++) |
217 |
{ |
218 |
fputc(image[i+dim*j],sortie); |
219 |
} |
220 |
|
221 |
fclose(sortie); |
222 |
} |
223 |
|
224 |
int main(int argc,char *argv[]) |
225 |
{ |
226 |
|
227 |
MYFLOAT m,rs,ri,re,tho; |
228 |
int q;
|
229 |
|
230 |
MYFLOAT bss,stp; |
231 |
int nmx,dim;
|
232 |
MYFLOAT bmx; |
233 |
MYFLOAT *zp,*fp; |
234 |
unsigned int *izp,*ifp; |
235 |
MYFLOAT zmx,fmx; |
236 |
int zimx=0,zjmx=0,fimx=0,fjmx=0; |
237 |
int raie,fcl,zcl;
|
238 |
struct timeval tv1,tv2;
|
239 |
MYFLOAT elapsed,cputime,epoch; |
240 |
int mtv1,mtv2;
|
241 |
unsigned int epoch1,epoch2; |
242 |
|
243 |
/* Variables used inside pragma need to be placed inside loop ! */
|
244 |
|
245 |
/* MYFLOAT d,db,b,nh,h,up,vp,pp,us,vs,ps; */
|
246 |
/* MYFLOAT phi,phd,php,nr,r; */
|
247 |
/* int ni,ii,imx,tst; */
|
248 |
/* MYFLOAT rp[TRACKPOINTS]; */
|
249 |
|
250 |
if (argc==2) |
251 |
{ |
252 |
if (strcmp(argv[1],"-aide")==0) |
253 |
{ |
254 |
printf("\nSimulation d'un disque d'accretion autour d'un trou noir\n");
|
255 |
printf("\nParametres a definir :\n\n");
|
256 |
printf(" 1) Dimension de l'Image\n");
|
257 |
printf(" 2) Masse relative du trou noir\n");
|
258 |
printf(" 3) Dimension du disque exterieur\n");
|
259 |
printf(" 4) Inclinaison par rapport au disque (en degres)\n");
|
260 |
printf(" 5) Rayonnement de disque MONOCHROMATIQUE ou CORPS_NOIR\n");
|
261 |
printf(" 6) Impression des images NEGATIVE ou POSITIVE\n");
|
262 |
printf(" 7) Nom de l'image des Flux\n");
|
263 |
printf(" 8) Nom de l'image des decalages spectraux\n");
|
264 |
printf("\nSi aucun parametre defini, parametres par defaut :\n\n");
|
265 |
printf(" 1) Dimension de l'image : 1024 pixels de cote\n");
|
266 |
printf(" 2) Masse relative du trou noir : 1\n");
|
267 |
printf(" 3) Dimension du disque exterieur : 12 \n");
|
268 |
printf(" 4) Inclinaison par rapport au disque (en degres) : 10\n");
|
269 |
printf(" 5) Rayonnement de disque CORPS_NOIR\n");
|
270 |
printf(" 6) Impression des images NEGATIVE ou POSITIVE\n");
|
271 |
printf(" 7) Nom de l'image des flux : flux.pgm\n");
|
272 |
printf(" 8) Nom de l'image des z : z.pgm\n");
|
273 |
} |
274 |
} |
275 |
|
276 |
if ((argc==9)||(argc==7)) |
277 |
{ |
278 |
printf("# Utilisation les valeurs definies par l'utilisateur\n");
|
279 |
|
280 |
dim=atoi(argv[1]);
|
281 |
m=atof(argv[2]);
|
282 |
re=atof(argv[3]);
|
283 |
tho=PI/180.*(90-atof(argv[4])); |
284 |
|
285 |
rs=2.*m;
|
286 |
ri=3.*rs;
|
287 |
|
288 |
if (strcmp(argv[5],"CORPS_NOIR")==0) |
289 |
{ |
290 |
raie=0;
|
291 |
} |
292 |
else
|
293 |
{ |
294 |
raie=1;
|
295 |
} |
296 |
|
297 |
} |
298 |
else
|
299 |
{ |
300 |
printf("# Utilisation les valeurs par defaut\n");
|
301 |
|
302 |
dim=1024;
|
303 |
m=1.;
|
304 |
rs=2.*m;
|
305 |
ri=3.*rs;
|
306 |
re=12.; |
307 |
tho=PI/180.*80; |
308 |
// Corps noir
|
309 |
raie=0;
|
310 |
} |
311 |
|
312 |
if (raie==1) |
313 |
{ |
314 |
bss=2.;
|
315 |
q=-2;
|
316 |
} |
317 |
else
|
318 |
{ |
319 |
bss=1.e19;
|
320 |
q=-0.75; |
321 |
} |
322 |
|
323 |
printf("# Dimension de l'image : %i\n",dim);
|
324 |
printf("# Masse : %f\n",m);
|
325 |
printf("# Rayon singularite : %f\n",rs);
|
326 |
printf("# Rayon interne : %f\n",ri);
|
327 |
printf("# Rayon externe : %f\n",re);
|
328 |
printf("# Inclinaison a la normale en radian : %f\n",tho);
|
329 |
|
330 |
zp=(MYFLOAT*)calloc(dim*dim,sizeof(MYFLOAT));
|
331 |
fp=(MYFLOAT*)calloc(dim*dim,sizeof(MYFLOAT));
|
332 |
|
333 |
izp=(unsigned int*)calloc(dim*dim,sizeof(unsigned int)); |
334 |
ifp=(unsigned int*)calloc(dim*dim,sizeof(unsigned int)); |
335 |
|
336 |
nmx=dim; |
337 |
stp=dim/(2.*nmx);
|
338 |
bmx=1.25*re; |
339 |
|
340 |
// Set start timer
|
341 |
gettimeofday(&tv1, NULL);
|
342 |
mtv1=clock()*1000/CLOCKS_PER_SEC;
|
343 |
epoch1=time(NULL);
|
344 |
|
345 |
#pragma acc data copy(zp[0:dim*dim],fp[0:dim*dim]) |
346 |
#pragma acc parallel loop
|
347 |
for (int n=1;n<=nmx;n++) |
348 |
{ |
349 |
MYFLOAT d,db,b,nh,h,up,vp,pp,us,vs,ps; |
350 |
MYFLOAT phi,phd,php,nr,r; |
351 |
int ni,ii,imx,tst;
|
352 |
MYFLOAT rp[TRACKPOINTS]; |
353 |
|
354 |
h=4.*PI/(MYFLOAT)TRACKPOINTS;
|
355 |
d=stp*(MYFLOAT)n; |
356 |
|
357 |
db=bmx/(MYFLOAT)nmx; |
358 |
b=db*(MYFLOAT)n; |
359 |
up=0.;
|
360 |
vp=1.;
|
361 |
|
362 |
pp=0.;
|
363 |
nh=1;
|
364 |
|
365 |
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); |
366 |
|
367 |
rp[(int)nh]=fabs(b/us);
|
368 |
|
369 |
do
|
370 |
{ |
371 |
nh++; |
372 |
pp=ps; |
373 |
up=us; |
374 |
vp=vs; |
375 |
rungekutta(&ps,&us,&vs,pp,up,vp,h,m,b); |
376 |
|
377 |
rp[(int)nh]=b/us;
|
378 |
|
379 |
} while ((rp[(int)nh]>=rs)&&(rp[(int)nh]<=rp[1])); |
380 |
|
381 |
for (int i=nh+1;i<TRACKPOINTS;i++) |
382 |
{ |
383 |
rp[i]=0.;
|
384 |
} |
385 |
|
386 |
imx=(int)(8*d); |
387 |
|
388 |
for (int i=0;i<=imx;i++) |
389 |
{ |
390 |
phi=2.*PI/(MYFLOAT)imx*(MYFLOAT)i;
|
391 |
phd=atanp(cos(phi)*sin(tho),cos(tho)); |
392 |
phd=fmod(phd,PI); |
393 |
ii=0;
|
394 |
tst=0;
|
395 |
|
396 |
do
|
397 |
{ |
398 |
php=phd+(MYFLOAT)ii*PI; |
399 |
nr=php/h; |
400 |
ni=(int)nr;
|
401 |
|
402 |
if ((MYFLOAT)ni<nh)
|
403 |
{ |
404 |
r=(rp[ni+1]-rp[ni])*(nr-ni*1.)+rp[ni]; |
405 |
} |
406 |
else
|
407 |
{ |
408 |
r=rp[ni]; |
409 |
} |
410 |
|
411 |
if ((r<=re)&&(r>=ri))
|
412 |
{ |
413 |
tst=1;
|
414 |
impact(d,phi,dim,r,b,tho,m,zp,fp,q,db,h,bss,raie); |
415 |
} |
416 |
|
417 |
ii++; |
418 |
} while ((ii<=2)&&(tst==0)); |
419 |
} |
420 |
} |
421 |
|
422 |
// Set stop timer
|
423 |
gettimeofday(&tv2, NULL);
|
424 |
mtv2=clock()*1000/CLOCKS_PER_SEC;
|
425 |
epoch2=time(NULL);
|
426 |
|
427 |
elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
|
428 |
(tv2.tv_usec-tv1.tv_usec))/1000000;
|
429 |
cputime=(MYFLOAT)((mtv2-mtv1)/1000.); |
430 |
epoch=(MYFLOAT)(epoch2-epoch1); |
431 |
|
432 |
fmx=fp[0];
|
433 |
zmx=zp[0];
|
434 |
|
435 |
for (int i=0;i<dim;i++) for (int j=0;j<dim;j++) |
436 |
{ |
437 |
if (fmx<fp[i+dim*j])
|
438 |
{ |
439 |
fimx=i; |
440 |
fjmx=j; |
441 |
fmx=fp[i+dim*j]; |
442 |
} |
443 |
|
444 |
if (zmx<zp[i+dim*j])
|
445 |
{ |
446 |
zimx=i; |
447 |
zjmx=j; |
448 |
zmx=zp[i+dim*j]; |
449 |
} |
450 |
} |
451 |
|
452 |
printf("\nElapsed Time : %lf",(double)elapsed); |
453 |
printf("\nCPU Time : %lf",(double)cputime); |
454 |
printf("\nEpoch Time : %lf",(double)epoch); |
455 |
printf("\nZ max @(%.6f,%.6f) : %.6f",
|
456 |
(float)zimx/(float)dim-0.5,0.5-(float)zjmx/(float)dim,zmx); |
457 |
printf("\nFlux max @(%.6f,%.6f) : %.6f\n\n",
|
458 |
(float)fimx/(float)dim-0.5,0.5-(float)fjmx/(float)dim,fmx); |
459 |
|
460 |
// If input parameters set without output files precised
|
461 |
if (argc!=7) { |
462 |
|
463 |
for (int i=0;i<dim;i++) |
464 |
for (int j=0;j<dim;j++) |
465 |
{ |
466 |
zcl=(int)(255/zmx*zp[i+dim*(dim-1-j)]); |
467 |
fcl=(int)(255/fmx*fp[i+dim*(dim-1-j)]); |
468 |
|
469 |
if (strcmp(argv[6],"NEGATIVE")==0) |
470 |
{ |
471 |
if (zcl>255) |
472 |
{ |
473 |
izp[i+dim*j]=0;
|
474 |
} |
475 |
else
|
476 |
{ |
477 |
izp[i+dim*j]=255-zcl;
|
478 |
} |
479 |
|
480 |
if (fcl>255) |
481 |
{ |
482 |
ifp[i+dim*j]=0;
|
483 |
} |
484 |
else
|
485 |
{ |
486 |
ifp[i+dim*j]=255-fcl;
|
487 |
} |
488 |
|
489 |
} |
490 |
else
|
491 |
{ |
492 |
if (zcl>255) |
493 |
{ |
494 |
izp[i+dim*j]=255;
|
495 |
} |
496 |
else
|
497 |
{ |
498 |
izp[i+dim*j]=zcl; |
499 |
} |
500 |
|
501 |
if (fcl>255) |
502 |
{ |
503 |
ifp[i+dim*j]=255;
|
504 |
} |
505 |
else
|
506 |
{ |
507 |
ifp[i+dim*j]=fcl; |
508 |
} |
509 |
|
510 |
} |
511 |
|
512 |
} |
513 |
|
514 |
if (argc==9) |
515 |
{ |
516 |
sauvegarde_pgm(argv[7],ifp,dim);
|
517 |
sauvegarde_pgm(argv[8],izp,dim);
|
518 |
} |
519 |
else
|
520 |
{ |
521 |
sauvegarde_pgm("z.pgm",izp,dim);
|
522 |
sauvegarde_pgm("flux.pgm",ifp,dim);
|
523 |
} |
524 |
} |
525 |
else
|
526 |
{ |
527 |
printf("No output file precised, useful for benchmarks...\n\n");
|
528 |
} |
529 |
|
530 |
free(zp); |
531 |
free(fp); |
532 |
|
533 |
free(izp); |
534 |
free(ifp); |
535 |
|
536 |
} |
537 |
|
538 |
|