Révision 282 Pi/C/OpenACC/Pi_OpenACC.c
Pi_OpenACC.c (revision 282) | ||
---|---|---|
1 | 1 |
// |
2 | 2 |
// Estimation of Pi using Monte Carlo exploration process |
3 | 3 |
// Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com> |
4 |
// Exploit OpenACC on Nvidia GPU |
|
5 |
// module load |
|
6 |
// icpc -std=c99 -O3 -o Pi_OpenACC Pi_OpenACC.c -lm |
|
4 |
// Tested on GCC-10 |
|
5 |
// gcc -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35" Pi_OpenACC.c |
|
7 | 6 |
// |
8 | 7 |
|
9 | 8 |
#include <math.h> |
... | ... | |
139 | 138 |
LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1; |
140 | 139 |
|
141 | 140 |
inside=(LENGTH*)malloc(sizeof(LENGTH)*ParallelRate); |
142 |
|
|
143 |
#if _OPENACC |
|
144 |
acc_init(acc_device_nvidia); |
|
145 |
#endif |
|
146 | 141 |
|
147 | 142 |
gettimeofday(&tv1, &tz); |
148 |
#pragma omp parallel for shared(ParallelRate,inside) |
|
149 |
#pragma acc kernels loop |
|
150 |
for (int i=0 ; i<ParallelRate; i++) { |
|
151 |
inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i); |
|
152 |
} |
|
153 | 143 |
|
144 |
#pragma acc kernels copy(inside[0:ParallelRate]) |
|
145 |
{ |
|
146 |
#pragma acc independant |
|
147 |
for (int i=0 ; i<ParallelRate; i++) { |
|
148 |
inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i); |
|
149 |
} |
|
150 |
} |
|
154 | 151 |
gettimeofday(&tv2, &tz); |
155 | 152 |
|
156 | 153 |
for (int i=0 ; i<ParallelRate; i++) { |
... | ... | |
163 | 160 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
164 | 161 |
|
165 | 162 |
double itops=(double)(ParallelRate*IterationsEach)/elapsed; |
166 |
|
|
167 |
printf("ParallelRate %i\nElapsed Time %.2f\nItops %.0f\n",ParallelRate,elapsed,itops); |
|
168 | 163 |
|
164 |
printf("Inside/Total %ld %ld\nParallelRate %i\nElapsed Time %.2f\nItops %.0f\nLogItops %.2f\n",insides,ParallelRate*IterationsEach,ParallelRate,elapsed,itops,log10(itops)); |
|
165 |
|
|
169 | 166 |
free(inside); |
170 | 167 |
return(insides); |
171 | 168 |
} |
... | ... | |
200 | 197 |
|
201 | 198 |
LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate; |
202 | 199 |
|
203 |
printf("Inside/Total %ld %ld\nPi estimation %f\n\n",insides,total,(4.*(float)insides/total)); |
|
204 |
|
|
200 |
printf("Pi estimation %f\n\n",total,(4.*(float)insides/total)); |
|
201 |
|
|
202 |
|
|
205 | 203 |
} |
Formats disponibles : Unified diff