11 |
11 |
#include <stdlib.h>
|
12 |
12 |
#include <omp.h>
|
13 |
13 |
#include <limits.h>
|
|
14 |
#include <sys/time.h>
|
14 |
15 |
|
15 |
16 |
// Marsaglia RNG very simple implementation
|
16 |
17 |
#define znew ((z=36969*(z&65535)+(z>>16))<<16)
|
... | ... | |
27 |
28 |
|
28 |
29 |
#define ITERATIONS 1000000000
|
29 |
30 |
|
30 |
|
#define PROCESS 4
|
|
31 |
#define PARALLELRATE 1024
|
31 |
32 |
|
32 |
33 |
#ifdef LONG
|
33 |
34 |
#define LENGTH long long
|
... | ... | |
36 |
37 |
#endif
|
37 |
38 |
|
38 |
39 |
#pragma omp declare target
|
39 |
|
LENGTH splitter(int,int,int,LENGTH);
|
|
40 |
LENGTH splitter(LENGTH,int,int,int);
|
40 |
41 |
|
41 |
42 |
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
|
42 |
43 |
{
|
... | ... | |
129 |
130 |
return(total);
|
130 |
131 |
}
|
131 |
132 |
|
132 |
|
LENGTH splitter(int process,int seed_w,int seed_z,LENGTH iterations) {
|
|
133 |
LENGTH splitter(LENGTH iterations,int seed_w,int seed_z,int ParallelRate) {
|
133 |
134 |
|
134 |
|
LENGTH inside[8192],insides=0;
|
|
135 |
LENGTH inside[1048576],insides=0;
|
135 |
136 |
int i;
|
|
137 |
struct timeval tv1,tv2;
|
|
138 |
struct timezone tz;
|
|
139 |
LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
|
|
140 |
|
|
141 |
gettimeofday(&tv1, &tz);
|
136 |
142 |
|
137 |
143 |
#pragma omp target device(0)
|
138 |
144 |
#pragma omp teams num_teams(60) thread_limit(4)
|
139 |
145 |
// #pragma omp parallel for
|
140 |
146 |
#pragma omp distribute
|
141 |
|
for (int i=0 ; i<process; i++) {
|
142 |
|
inside[i]=MainLoopGlobal(iterations/process,seed_w+i,seed_z+i);
|
|
147 |
for (int i=0 ; i<ParallelRate; i++) {
|
|
148 |
inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
|
143 |
149 |
}
|
144 |
150 |
|
145 |
|
for (int i=0 ; i<process; i++) {
|
146 |
|
printf("\tFound %lld for process %i\n",(long long)inside[i],i);
|
|
151 |
for (int i=0 ; i<ParallelRate; i++) {
|
147 |
152 |
insides+=inside[i];
|
148 |
153 |
}
|
|
154 |
|
|
155 |
gettimeofday(&tv2, &tz);
|
|
156 |
|
|
157 |
for (int i=0 ; i<ParallelRate; i++) {
|
|
158 |
printf("\tFound %lld for process %i\n",(long long)inside[i],i);
|
|
159 |
}
|
149 |
160 |
printf("\n");
|
|
161 |
|
|
162 |
double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
|
|
163 |
(tv2.tv_usec-tv1.tv_usec))/1000000;
|
150 |
164 |
|
|
165 |
double itops=(double)(ParallelRate*IterationsEach)/elapsed;
|
|
166 |
|
|
167 |
printf("ParallelRate %i\nElapsed Time %.2f\nItops %.0f\n",ParallelRate,elapsed,itops);
|
|
168 |
|
151 |
169 |
return(insides);
|
152 |
170 |
}
|
153 |
171 |
|
154 |
172 |
int main(int argc, char *argv[]) {
|
155 |
173 |
|
156 |
|
unsigned int seed_w=10,seed_z=10,process=PROCESS;
|
|
174 |
unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE;
|
157 |
175 |
LENGTH iterations=ITERATIONS;
|
158 |
176 |
LENGTH insides=0;
|
159 |
177 |
|
160 |
178 |
if (argc > 1) {
|
161 |
179 |
iterations=(LENGTH)atoll(argv[1]);
|
162 |
|
process=atoi(argv[2]);
|
|
180 |
ParallelRate=atoi(argv[2]);
|
163 |
181 |
}
|
164 |
182 |
else {
|
165 |
183 |
printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
|
166 |
184 |
printf("\t\t#1 : number of iterations (default 1 billion)\n");
|
167 |
|
printf("\t\t#2 : number of process (default 4)\n\n");
|
|
185 |
printf("\t\t#2 : ParallelRate (default 1024)\n\n");
|
168 |
186 |
}
|
169 |
187 |
|
170 |
188 |
printf ("\n\tInformation about architecture:\n\n");
|
... | ... | |
177 |
195 |
printf ("\tMax long = %ld\n", LONG_MAX);
|
178 |
196 |
printf ("\tMax long long = %lld\n\n", LLONG_MAX);
|
179 |
197 |
|
180 |
|
insides=splitter(process,seed_w,seed_z,iterations);
|
|
198 |
insides=splitter(iterations,seed_w,seed_z,ParallelRate);
|
181 |
199 |
|
182 |
|
float pi=4.*(float)insides/(float)iterations;
|
|
200 |
LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate;
|
183 |
201 |
|
184 |
|
printf("\tPi=%f with error %f and %lld iterations\n\n",pi,
|
185 |
|
fabs(pi-4*atan(1))/pi,(long long)iterations);
|
|
202 |
printf("Inside/Total %ld %ld\nPi estimation %f\n\n",insides,total,(4.*(float)insides/total));
|
186 |
203 |
|
187 |
204 |
}
|