9 |
9 |
#include <stdlib.h>
|
10 |
10 |
#include <omp.h>
|
11 |
11 |
#include <limits.h>
|
|
12 |
#include <sys/time.h>
|
12 |
13 |
|
13 |
14 |
// Marsaglia RNG very simple implementation
|
14 |
15 |
#define znew ((z=36969*(z&65535)+(z>>16))<<16)
|
... | ... | |
25 |
26 |
|
26 |
27 |
#define ITERATIONS 1000000000
|
27 |
28 |
|
28 |
|
#define PROCESS 4
|
|
29 |
#define PARALLELRATE 1024
|
29 |
30 |
|
30 |
31 |
#ifdef LONG
|
31 |
32 |
#define LENGTH long long
|
... | ... | |
122 |
123 |
return(total);
|
123 |
124 |
}
|
124 |
125 |
|
125 |
|
LENGTH splitter(LENGTH iterations,unsigned int process,
|
126 |
|
unsigned int seed_w,unsigned int seed_z)
|
|
126 |
LENGTH splitter(LENGTH iterations,unsigned int seed_w,unsigned int seed_z,unsigned int ParallelRate)
|
127 |
127 |
{
|
128 |
|
// Trashy static allocation...
|
129 |
|
LENGTH inside[4096],insides=0;
|
|
128 |
LENGTH *inside,insides=0;
|
|
129 |
struct timeval tv1,tv2;
|
|
130 |
LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
|
130 |
131 |
|
|
132 |
inside=(LENGTH*)malloc(sizeof(LENGTH)*ParallelRate);
|
|
133 |
|
|
134 |
gettimeofday(&tv1, NULL);
|
|
135 |
|
131 |
136 |
#pragma omp parallel for
|
132 |
|
for (int i=0 ; i<process; i++) {
|
133 |
|
inside[i]=MainLoopGlobal(iterations/process,seed_w+process,seed_z+process);
|
134 |
|
printf("\tFound %lld for process %i\n",(long long)inside[i],i);
|
|
137 |
for (int i=0 ; i<ParallelRate; i++) {
|
|
138 |
inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
|
135 |
139 |
}
|
136 |
|
printf("\n");
|
137 |
|
|
138 |
|
for (int i=0 ; i<process; i++) {
|
|
140 |
|
|
141 |
for (int i=0 ; i<ParallelRate; i++) {
|
139 |
142 |
insides+=inside[i];
|
140 |
143 |
}
|
|
144 |
|
|
145 |
gettimeofday(&tv2, NULL);
|
|
146 |
|
|
147 |
for (int i=0 ; i<ParallelRate; i++) {
|
|
148 |
printf("\tFound %lld for ParallelRate %i\n",(long long)inside[i],i);
|
|
149 |
}
|
|
150 |
printf("\n");
|
141 |
151 |
|
|
152 |
double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
|
|
153 |
(tv2.tv_usec-tv1.tv_usec))/1000000;
|
|
154 |
|
|
155 |
double itops=(double)(ParallelRate*IterationsEach)/elapsed;
|
|
156 |
|
|
157 |
printf("ParallelRate %i\nElapsed Time %.2f\nItops %.0f\n",ParallelRate,elapsed,itops);
|
|
158 |
|
|
159 |
free(inside);
|
|
160 |
|
142 |
161 |
return(insides);
|
143 |
162 |
}
|
144 |
163 |
|
145 |
164 |
|
146 |
165 |
int main(int argc, char *argv[]) {
|
147 |
166 |
|
148 |
|
unsigned int seed_w=10,seed_z=10,process=PROCESS;
|
|
167 |
unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE;
|
149 |
168 |
LENGTH iterations=ITERATIONS,insides=0;
|
150 |
169 |
|
151 |
170 |
if (argc > 1) {
|
152 |
171 |
iterations=(LENGTH)atoll(argv[1]);
|
153 |
|
process=atoi(argv[2]);
|
|
172 |
ParallelRate=atoi(argv[2]);
|
154 |
173 |
}
|
155 |
174 |
else {
|
156 |
175 |
printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
|
157 |
176 |
printf("\t\t#1 : number of iterations (default 1 billion)\n");
|
158 |
|
printf("\t\t#2 : number of process (default 4)\n\n");
|
|
177 |
printf("\t\t#2 : number of ParallelRate (default 1024)\n\n");
|
159 |
178 |
}
|
160 |
179 |
|
161 |
180 |
printf ("\n\tInformation about architecture:\n\n");
|
... | ... | |
168 |
187 |
printf ("\tMax long = %ld\n", LONG_MAX);
|
169 |
188 |
printf ("\tMax long long = %lld\n\n", LLONG_MAX);
|
170 |
189 |
|
171 |
|
insides=splitter(iterations,process,seed_w,seed_z);
|
172 |
|
|
173 |
|
float pi=4.*(float)insides/(float)iterations;
|
|
190 |
insides=splitter(iterations,seed_w,seed_z,ParallelRate);
|
174 |
191 |
|
175 |
|
printf("\tPi=%f with error %f and %lld iterations\n\n",pi,
|
176 |
|
fabs(pi-4*atan(1))/pi,(long long)iterations);
|
|
192 |
LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate;
|
|
193 |
|
|
194 |
printf("Inside/Total %ld %ld\nPi estimation %f\n\n",(long int)insides,(long int)total,(4.*(float)insides/total));
|
177 |
195 |
|
178 |
196 |
}
|