1 |
1 |
//
|
2 |
2 |
// Estimation of Pi using Monte Carlo exploration process
|
3 |
3 |
// Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
|
4 |
|
// Exploit OpenACC on Nvidia GPU
|
5 |
|
// module load
|
6 |
|
// icpc -std=c99 -O3 -o Pi_OpenACC Pi_OpenACC.c -lm
|
|
4 |
// Tested on GCC-10
|
|
5 |
// gcc -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35" Pi_OpenACC.c
|
7 |
6 |
//
|
8 |
7 |
|
9 |
8 |
#include <math.h>
|
... | ... | |
139 |
138 |
LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
|
140 |
139 |
|
141 |
140 |
inside=(LENGTH*)malloc(sizeof(LENGTH)*ParallelRate);
|
142 |
|
|
143 |
|
#if _OPENACC
|
144 |
|
acc_init(acc_device_nvidia);
|
145 |
|
#endif
|
146 |
141 |
|
147 |
142 |
gettimeofday(&tv1, &tz);
|
148 |
|
#pragma omp parallel for shared(ParallelRate,inside)
|
149 |
|
#pragma acc kernels loop
|
150 |
|
for (int i=0 ; i<ParallelRate; i++) {
|
151 |
|
inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
|
152 |
|
}
|
153 |
143 |
|
|
144 |
#pragma acc kernels copy(inside[0:ParallelRate])
|
|
145 |
{
|
|
146 |
#pragma acc independant
|
|
147 |
for (int i=0 ; i<ParallelRate; i++) {
|
|
148 |
inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
|
|
149 |
}
|
|
150 |
}
|
154 |
151 |
gettimeofday(&tv2, &tz);
|
155 |
152 |
|
156 |
153 |
for (int i=0 ; i<ParallelRate; i++) {
|
... | ... | |
163 |
160 |
(tv2.tv_usec-tv1.tv_usec))/1000000;
|
164 |
161 |
|
165 |
162 |
double itops=(double)(ParallelRate*IterationsEach)/elapsed;
|
166 |
|
|
167 |
|
printf("ParallelRate %i\nElapsed Time %.2f\nItops %.0f\n",ParallelRate,elapsed,itops);
|
168 |
163 |
|
|
164 |
printf("Inside/Total %ld %ld\nParallelRate %i\nElapsed Time %.2f\nItops %.0f\nLogItops %.2f\n",insides,ParallelRate*IterationsEach,ParallelRate,elapsed,itops,log10(itops));
|
|
165 |
|
169 |
166 |
free(inside);
|
170 |
167 |
return(insides);
|
171 |
168 |
}
|
... | ... | |
200 |
197 |
|
201 |
198 |
LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate;
|
202 |
199 |
|
203 |
|
printf("Inside/Total %ld %ld\nPi estimation %f\n\n",insides,total,(4.*(float)insides/total));
|
204 |
|
|
|
200 |
printf("Pi estimation %f\n\n",total,(4.*(float)insides/total));
|
|
201 |
|
|
202 |
|
205 |
203 |
}
|