root / Pi / C / Hybrid / Pi_Hybrid.c @ 183
History  View  Annotate  Download (8.6 kB)
1 
//


2 
// Estimation of Pi using Monte Carlo exploration process

3 
// gcc std=c99 O3 o Pi Pi.c lm

4 
// Emmanuel Quemener <emmanuel.quemener@enslyon.fr>

5 
// Cecill v2

6  
7 
// Needed for gethostname

8 
#define _BSD_SOURCE

9 
#include <sys/unistd.h> 
10  
11 
#include <math.h> 
12 
#include <stdio.h> 
13 
#include <stdlib.h> 
14 
#include <limits.h> 
15 
#include <mpi.h> 
16 
#include <stddef.h> 
17  
18 
#ifdef TIME

19 
#include <sys/time.h> 
20 
#endif

21  
22 
// Marsaglia RNG very simple implementation

23 
#define znew ((z=36969*(z&65535)+(z>>16))<<16) 
24 
#define wnew ((w=18000*(w&65535)+(w>>16))&65535) 
25 
#define MWC (znew+wnew)

26 
#define SHR3 (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5)) 
27 
#define CONG (jcong=69069*jcong+1234567) 
28 
#define KISS ((MWC^CONG)+SHR3)

29  
30 
#define ITERATIONS 1000000000 
31  
32 
#define MWCfp MWC * 2.328306435454494e10f 
33 
#define KISSfp KISS * 2.328306435454494e10f 
34 
#define SHR3fp SHR3 * 2.328306435454494e10f 
35 
#define CONGfp CONG * 2.328306435454494e10f 
36  
37 
#define PROCESS 1 
38  
39 
#ifdef LONG

40 
#define LENGTH long long 
41 
#else

42 
#define LENGTH int 
43 
#endif

44  
45 
typedef struct compute_node { 
46 
LENGTH iterations; 
47 
int process;

48 
} node; 
49  
50 
typedef struct compute_result { 
51 
LENGTH inside; 
52 
long int useconds; 
53 
} result; 
54  
55 
unsigned int rotl(unsigned int value, int shift) { 
56 
return (value << shift)  (value >> (sizeof(value) * CHAR_BIT  shift)); 
57 
} 
58  
59 
unsigned int rotr(unsigned int value, int shift) { 
60 
return (value >> shift)  (value << (sizeof(value) * CHAR_BIT  shift)); 
61 
} 
62  
63 
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z) 
64 
{ 
65 

66 
#if defined TCONG

67 
unsigned int jcong=seed_z; 
68 
#elif defined TSHR3

69 
unsigned int jsr=seed_w; 
70 
#elif defined TMWC

71 
unsigned int z=seed_z; 
72 
unsigned int w=seed_w; 
73 
#elif defined TKISS

74 
unsigned int jcong=seed_z; 
75 
unsigned int jsr=seed_w; 
76 
unsigned int z=seed_z; 
77 
unsigned int w=seed_w; 
78 
#endif

79 

80 
LENGTH total=0;

81 

82 
for (LENGTH i=0;i<iterations;i++) { 
83 

84 
#if defined TINT32

85 
#define THEONE 1073741824 
86 
#if defined TCONG

87 
unsigned int x=CONG>>17 ; 
88 
unsigned int y=CONG>>17 ; 
89 
#elif defined TSHR3

90 
unsigned int x=SHR3>>17 ; 
91 
unsigned int y=SHR3>>17 ; 
92 
#elif defined TMWC

93 
unsigned int x=MWC>>17 ; 
94 
unsigned int y=MWC>>17 ; 
95 
#elif defined TKISS

96 
unsigned int x=KISS>>17 ; 
97 
unsigned int y=KISS>>17 ; 
98 
#endif

99 
#elif defined TINT64

100 
#define THEONE 4611686018427387904 
101 
#if defined TCONG

102 
unsigned long x=(unsigned long)(CONG>>1) ; 
103 
unsigned long y=(unsigned long)(CONG>>1) ; 
104 
#elif defined TSHR3

105 
unsigned long x=(unsigned long)(SHR3>>1) ; 
106 
unsigned long y=(unsigned long)(SHR3>>1) ; 
107 
#elif defined TMWC

108 
unsigned long x=(unsigned long)(MWC>>1) ; 
109 
unsigned long y=(unsigned long)(MWC>>1) ; 
110 
#elif defined TKISS

111 
unsigned long x=(unsigned long)(KISS>>1) ; 
112 
unsigned long y=(unsigned long)(KISS>>1) ; 
113 
#endif

114 
#elif defined TFP32

115 
#define THEONE 1.0f 
116 
#if defined TCONG

117 
float x=CONGfp ;

118 
float y=CONGfp ;

119 
#elif defined TSHR3

120 
float x=SHR3fp ;

121 
float y=SHR3fp ;

122 
#elif defined TMWC

123 
float x=MWCfp ;

124 
float y=MWCfp ;

125 
#elif defined TKISS

126 
float x=KISSfp ;

127 
float y=KISSfp ;

128 
#endif

129 
#elif defined TFP64

130 
#define THEONE 1.0f 
131 
#if defined TCONG

132 
double x=(double)CONGfp ; 
133 
double y=(double)CONGfp ; 
134 
#elif defined TSHR3

135 
double x=(double)SHR3fp ; 
136 
double y=(double)SHR3fp ; 
137 
#elif defined TMWC

138 
double x=(double)MWCfp ; 
139 
double y=(double)MWCfp ; 
140 
#elif defined TKISS

141 
double x=(double)KISSfp ; 
142 
double y=(double)KISSfp ; 
143 
#endif

144 
#endif

145  
146 
// Matching test

147 
unsigned long inside=((x*x+y*y) < THEONE) ? 1:0; 
148 
total+=inside; 
149 
} 
150 

151 
return(total);

152 

153 
} 
154  
155 
int main(int argc, char *argv[]) { 
156 

157 
unsigned int seed_z=362436069,seed_w=52128862,process=PROCESS; 
158 
// Number of NP or OpenMP processes <1024

159 
LENGTH iterations=ITERATIONS,insideMPI[8192],insideOpenMP[1024], 
160 
part_inside=0,part_iterations,insides=0; 
161 
int numtasks,rank,rc,tag=1,i; 
162 
float pi;

163 

164 
// Hostname supposed to be <128 characters

165 
char hostname[128]; 
166 

167 
gethostname(hostname, sizeof hostname);

168 

169 
struct timeval start,end;

170 
long int useconds; 
171 

172 
MPI_Status Stat; 
173 

174 
rc = MPI_Init(&argc,&argv); 
175 
if (rc != MPI_SUCCESS) {

176 
printf ("Error starting MPI program. Terminating.\n");

177 
MPI_Abort(MPI_COMM_WORLD, rc); 
178 
} 
179  
180 
MPI_Comm_size(MPI_COMM_WORLD,&numtasks); 
181 
MPI_Comm_rank(MPI_COMM_WORLD,&rank); 
182 

183 
const int nitems=2; 
184 
int blocklengths[2] = {1,1}; 
185 

186 
#ifdef LONG

187 
MPI_Datatype types_node[2] = {MPI_LONG, MPI_INT};

188 
MPI_Datatype types_result[2] = {MPI_LONG, MPI_LONG};

189 
#else

190 
MPI_Datatype types_node[2] = {MPI_INT, MPI_INT};

191 
MPI_Datatype types_result[2] = {MPI_INT, MPI_LONG};

192 
#endif

193 

194 
MPI_Datatype mpi_node_type,mpi_result_type; 
195 
MPI_Aint offsets[2],offsetsr[2]; 
196 

197 
offsets[0] = offsetof(node, iterations);

198 
offsets[1] = offsetof(node, process);

199 

200 
MPI_Type_create_struct(nitems, blocklengths, offsets, types_node, &mpi_node_type); 
201 
MPI_Type_commit(&mpi_node_type); 
202 

203 
offsetsr[0] = offsetof(result, inside);

204 
offsetsr[1] = offsetof(result, useconds);

205 

206 
MPI_Type_create_struct(nitems, blocklengths, offsetsr, types_result, &mpi_result_type); 
207 
MPI_Type_commit(&mpi_result_type); 
208 

209 
if (rank==0) { 
210 

211 
if (argc > 1) { 
212 
iterations=(LENGTH)atoll(argv[1]);

213 
process=atoi(argv[2]);

214 
} 
215 
else {

216 
printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");

217 
printf("\t\t#1 : number of iterations (default 1 billion)\n");

218 
printf("\t\t#2 : number of OpenMP processes (default 1)\n\n");

219 
} 
220 

221 
printf ("\n\tInformation about architecture:\n\n");

222 

223 
printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int)); 
224 
printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long)); 
225 
printf ("\tSizeof long long = %lld bytes.\n", (long long)sizeof(long long)); 
226 

227 
printf ("\tMax int = %u\n", INT_MAX);

228 
printf ("\tMax long = %ld\n", LONG_MAX);

229 
printf ("\tMax long long = %lld\n\n", LLONG_MAX);

230 

231 
part_iterations=(((iterations%numtasks)%process) == 0) ? iterations/numtasks/process:iterations/numtasks/process+1 ; 
232 

233 
node send; 
234 
send.iterations=part_iterations; 
235 
send.process=process; 
236  
237 
// Split part of code

238 
for (i=1;i<numtasks;i++) { 
239 
rc = MPI_Send(&send, 1, mpi_node_type, i, tag, MPI_COMM_WORLD);

240 
} 
241 

242 
gettimeofday(&start,(struct timezone *)0); 
243 

244 
#pragma omp parallel for 
245 
for (int i=0 ; i<process; i++) { 
246 
insideOpenMP[i]=MainLoopGlobal(part_iterations, 
247 
rotr(seed_w,i), 
248 
rotl(seed_z,i)); 
249 
/*

250 
printf("\t(%s,%i) found %lld for process %i\n",hostname,0,

251 
(long long)insideOpenMP[i],i); */

252 
} 
253 
/*

254 
printf("\n");

255 
*/

256 

257 
insides=0;

258 
for (int i=0 ; i<process; i++) { 
259 
insides+=insideOpenMP[i]; 
260 
} 
261 

262 
gettimeofday(&end,(struct timezone *)0); 
263 
useconds=(end.tv_secstart.tv_sec)*1000000+end.tv_usecstart.tv_usec;

264 

265 
printf("\tOn %s with rank #%i find %lld inside in %lu useconds.\n",

266 
hostname,rank,(long long)insides,useconds); 
267 

268 
// Join part of code

269 
for (i=1;i<numtasks;i++) { 
270 

271 
result recv; 
272 

273 
rc = MPI_Recv(&recv, 1, mpi_result_type, i, tag, MPI_COMM_WORLD,&Stat);

274 

275 
insideMPI[i]=recv.inside; 
276 
useconds=recv.useconds; 
277 

278 
printf("\tReceive from rank #%i, find %lld inside in %lu useconds\n",i,(long long)insideMPI[i],useconds); 
279 

280 
insides+=insideMPI[i]; 
281 
} 
282 

283 
pi=4.*(float)insides/(float)(part_iterations*numtasks*process); 
284 

285 
printf("\n\tPi=%.40f\n\twith error %.40f\n\twith %lld iterations\n\n",pi,

286 
fabs(pi4*atan(1.))/pi,(long long)(part_iterations*numtasks*process)); 
287 

288 
} 
289 
else

290 
{ 
291 
// Receive information from master

292 

293 
node recv; 
294 

295 
rc = MPI_Recv(&recv, 1, mpi_node_type, 0, tag, MPI_COMM_WORLD,&Stat); 
296 
/*

297 
printf("\t(%s,%i) receive from master %lld with %i process\n",

298 
hostname,rank,(long long)recv.iterations,recv.process);

299 
*/

300 

301 
gettimeofday(&start,(struct timezone *)0); 
302 

303 
#pragma omp parallel for 
304 
for (int i=0 ; i<recv.process; i++) { 
305 
insideOpenMP[i]=MainLoopGlobal(recv.iterations,rotr(seed_w,rank+i),rotl(seed_z,ranki)); 
306 
/*

307 
printf("\t(%s,%i) found %lld for process %i\n",hostname,rank,

308 
(long long)insideOpenMP[i],i);

309 
*/

310 
} 
311 

312 
/* printf("\n"); */

313 

314 
for (int i=0 ; i<recv.process; i++) { 
315 
part_inside+=insideOpenMP[i]; 
316 
} 
317 

318 
gettimeofday(&end,(struct timezone *)0); 
319 
useconds=(end.tv_secstart.tv_sec)*1000000+end.tv_usecstart.tv_usec;

320 
/*

321 
printf("\tOn %s rank %i find %lld inside in %lu useconds.\n",

322 
hostname,rank,(long long)part_inside,useconds);

323 
*/

324 
result send; 
325 
send.inside=part_inside; 
326 
send.useconds=useconds; 
327 

328 
rc = MPI_Send(&send, 1, mpi_result_type, 0, tag, MPI_COMM_WORLD); 
329 

330 
} 
331 

332 
MPI_Type_free(&mpi_node_type); 
333 
MPI_Type_free(&mpi_result_type); 
334 

335 
MPI_Finalize(); 
336 

337 
} 