root / Pi / C / Hybrid / Pi_Hybrid.c @ 182
Historique  Voir  Annoter  Télécharger (8,72 ko)
1 
//


2 
// Estimation of Pi using Monte Carlo exploration process

3 
// gcc std=c99 O3 o Pi Pi.c lm

4 
// Emmanuel Quemener <emmanuel.quemener@enslyon.fr>

5 
// Cecill v2

6  
7 
// Needed for gethostname

8 
#define _BSD_SOURCE

9 
#include <sys/unistd.h> 
10  
11 
#include <math.h> 
12 
#include <stdio.h> 
13 
#include <stdlib.h> 
14 
#include <limits.h> 
15 
#include <mpi.h> 
16 
#include <stddef.h> 
17  
18 
#ifdef TIME

19 
#include <sys/time.h> 
20 
#endif

21  
22 
// Marsaglia RNG very simple implementation

23 
#define znew ((z=36969*(z&65535)+(z>>16))<<16) 
24 
#define wnew ((w=18000*(w&65535)+(w>>16))&65535) 
25 
#define MWC (znew+wnew)

26 
#define SHR3 (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5)) 
27 
#define CONG (jcong=69069*jcong+1234567) 
28 
#define KISS ((MWC^CONG)+SHR3)

29  
30 
#define ITERATIONS 1000000000 
31  
32 
#define MWCfp MWC * 2.328306435454494e10f 
33 
#define KISSfp KISS * 2.328306435454494e10f 
34 
#define SHR3fp SHR3 * 2.328306435454494e10f 
35 
#define CONGfp CONG * 2.328306435454494e10f 
36  
37 
#define PROCESS 1 
38  
39 
#ifdef LONG

40 
#define LENGTH long long 
41 
#else

42 
#define LENGTH int 
43 
#endif

44  
45 
typedef struct compute_node { 
46 
LENGTH iterations; 
47 
int process;

48 
} node; 
49  
50 
typedef struct compute_node { 
51 
LENGTH inside; 
52 
long int useconds; 
53 
} result; 
54  
55 
unsigned int rotl(unsigned int value, int shift) { 
56 
return (value << shift)  (value >> (sizeof(value) * CHAR_BIT  shift)); 
57 
} 
58 

59 
unsigned int rotr(unsigned int value, int shift) { 
60 
return (value >> shift)  (value << (sizeof(value) * CHAR_BIT  shift)); 
61 
} 
62  
63 
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z) 
64 
{ 
65  
66 
#if defined TCONG

67 
unsigned int jcong=seed_z; 
68 
#elif defined TSHR3

69 
unsigned int jsr=seed_w; 
70 
#elif defined TMWC

71 
unsigned int z=seed_z; 
72 
unsigned int w=seed_w; 
73 
#elif defined TKISS

74 
unsigned int jcong=seed_z; 
75 
unsigned int jsr=seed_w; 
76 
unsigned int z=seed_z; 
77 
unsigned int w=seed_w; 
78 
#endif

79 

80 
LENGTH total=0;

81  
82 
for (LENGTH i=0;i<iterations;i++) { 
83  
84 
#if defined TINT32

85 
#define THEONE 1073741824 
86 
#if defined TCONG

87 
unsigned int x=CONG>>17 ; 
88 
unsigned int y=CONG>>17 ; 
89 
#elif defined TSHR3

90 
unsigned int x=SHR3>>17 ; 
91 
unsigned int y=SHR3>>17 ; 
92 
#elif defined TMWC

93 
unsigned int x=MWC>>17 ; 
94 
unsigned int y=MWC>>17 ; 
95 
#elif defined TKISS

96 
unsigned int x=KISS>>17 ; 
97 
unsigned int y=KISS>>17 ; 
98 
#endif

99 
#elif defined TINT64

100 
#define THEONE 4611686018427387904 
101 
#if defined TCONG

102 
unsigned long x=(unsigned long)(CONG>>1) ; 
103 
unsigned long y=(unsigned long)(CONG>>1) ; 
104 
#elif defined TSHR3

105 
unsigned long x=(unsigned long)(SHR3>>1) ; 
106 
unsigned long y=(unsigned long)(SHR3>>1) ; 
107 
#elif defined TMWC

108 
unsigned long x=(unsigned long)(MWC>>1) ; 
109 
unsigned long y=(unsigned long)(MWC>>1) ; 
110 
#elif defined TKISS

111 
unsigned long x=(unsigned long)(KISS>>1) ; 
112 
unsigned long y=(unsigned long)(KISS>>1) ; 
113 
#endif

114 
#elif defined TFP32

115 
#define THEONE 1.0f 
116 
#if defined TCONG

117 
float x=CONGfp ;

118 
float y=CONGfp ;

119 
#elif defined TSHR3

120 
float x=SHR3fp ;

121 
float y=SHR3fp ;

122 
#elif defined TMWC

123 
float x=MWCfp ;

124 
float y=MWCfp ;

125 
#elif defined TKISS

126 
float x=KISSfp ;

127 
float y=KISSfp ;

128 
#endif

129 
#elif defined TFP64

130 
#define THEONE 1.0f 
131 
#if defined TCONG

132 
double x=(double)CONGfp ; 
133 
double y=(double)CONGfp ; 
134 
#elif defined TSHR3

135 
double x=(double)SHR3fp ; 
136 
double y=(double)SHR3fp ; 
137 
#elif defined TMWC

138 
double x=(double)MWCfp ; 
139 
double y=(double)MWCfp ; 
140 
#elif defined TKISS

141 
double x=(double)KISSfp ; 
142 
double y=(double)KISSfp ; 
143 
#endif

144 
#endif

145  
146 
// Matching test

147 
unsigned long inside=((x*x+y*y) < THEONE) ? 1:0; 
148 
total+=inside; 
149 
} 
150  
151 
return(total);

152  
153 
} 
154  
155 
int main(int argc, char *argv[]) { 
156  
157 
unsigned int seed_z=362436069,seed_w=52128862,process=PROCESS; 
158 
// Number of NP or OpenMP processes <1024

159 
LENGTH iterations=ITERATIONS,insideMPI[8192],insideOpenMP[1024], 
160 
part_inside,part_iterations,insides; 
161 
int numtasks,rank,rc,tag=1,i; 
162 
float pi;

163 

164 
// Hostname supposed to be <128 characters

165 
char hostname[128]; 
166  
167 
gethostname(hostname, sizeof hostname);

168  
169 
struct timeval start,end;

170 
long int useconds; 
171  
172 
MPI_Status Stat; 
173 

174 
rc = MPI_Init(&argc,&argv); 
175 
if (rc != MPI_SUCCESS) {

176 
printf ("Error starting MPI program. Terminating.\n");

177 
MPI_Abort(MPI_COMM_WORLD, rc); 
178 
} 
179  
180 
MPI_Comm_size(MPI_COMM_WORLD,&numtasks); 
181 
MPI_Comm_rank(MPI_COMM_WORLD,&rank); 
182  
183 
const int nitems=2; 
184 
int blocklengths[2] = {1,1}; 
185  
186 
#ifdef LONG

187 
MPI_Datatype types_node[2] = {MPI_LONG, MPI_INT};

188 
MPI_Datatype types_result[2] = {MPI_LONG, MPI_LONG};

189 
#else

190 
MPI_Datatype types_node[2] = {MPI_INT, MPI_INT};

191 
MPI_Datatype types_result[2] = {MPI_INT, MPI_LONG};

192 
#endif

193  
194 
MPI_Datatype mpi_node_type,mpi_result_type; 
195 
MPI_Aint offsets[2],offsetsr[2]; 
196  
197 
offsets[0] = offsetof(node, iterations);

198 
offsets[1] = offsetof(node, process);

199  
200 
MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_node_type); 
201 
MPI_Type_commit(&mpi_node_type); 
202  
203 
offsetsr[0] = offsetof(result, inside);

204 
offsetsr[1] = offsetof(result, useconds);

205  
206 
MPI_Type_create_struct(nitems, blocklengths, offsetsr, types, &mpi_result_type); 
207 
MPI_Type_commit(&mpi_result_type); 
208  
209 
if (rank==0) { 
210 

211 
if (argc > 1) { 
212 
iterations=(LENGTH)atoll(argv[1]);

213 
process=atoi(argv[2]);

214 
} 
215 
else {

216 
printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");

217 
printf("\t\t#1 : number of iterations (default 1 billion)\n\n");

218 
printf("\t\t#1 : number of OpenMP processes (default 1)\n\n");

219 
} 
220 

221 
printf ("\n\tInformation about architecture:\n\n");

222  
223 
printf ("Sizeof int = %lld bytes.\n", (long long)sizeof(int)); 
224 
printf ("Sizeof long = %lld bytes.\n", (long long)sizeof(long)); 
225 
printf ("Sizeof long long = %lld bytes.\n", (long long)sizeof(long long)); 
226 

227 
printf ("Max int = %u\n", INT_MAX);

228 
printf ("Max long = %ld\n", LONG_MAX);

229 
printf ("Max long long = %lld\n\n", LLONG_MAX);

230 

231 
part_iterations=((iterations%numtasks) == 0) ? iterations/numtasks:iterations/numtasks+1 ; 
232 

233 
node send; 
234 
send.iterations=part_iterations; 
235 
send.process=process; 
236  
237 
// Split part of code

238 
for (i=1;i<numtasks;i++) { 
239 
rc = MPI_Send(&send, 1, mpi_node_type, i, tag, MPI_COMM_WORLD);

240 
} 
241 

242 
gettimeofday(&start,(struct timezone *)0); 
243 

244 
#pragma omp parallel for 
245 
for (int i=0 ; i<process; i++) { 
246 
insideOpenMP[i]=MainLoopGlobal(part_iterations, 
247 
rotr(seed_w,process), 
248 
rotl(seed_z,process)); 
249 
printf("\t(%s,%i) found %lld for process %i\n",hostname,0, 
250 
(long long)insideOpenMP[i],i); 
251 
} 
252 
printf("\n");

253  
254 
insides=0;

255 
for (int i=0 ; i<process; i++) { 
256 
insides+=insideOpenMP[i]; 
257 
} 
258  
259 
gettimeofday(&end,(struct timezone *)0); 
260 
useconds=(end.tv_secstart.tv_sec)*1000000+end.tv_usecstart.tv_usec;

261 

262 
printf("\tOn %s with %i find %lld inside in %lu useconds.\n",

263 
hostname,rank,(long long)insides,useconds); 
264 

265 
// Join part of code

266 
for (i=1;i<numtasks;i++) { 
267  
268 
result recv; 
269 

270 
rc = MPI_Recv(&recv, 1, mpi_result_type, i, tag, MPI_COMM_WORLD,&Stat);

271  
272 
insideMPI[i]=recv.inside; 
273 
useconds=recv.useconds; 
274  
275 
printf("\tReceive from %i, find %lld inside in %lu useconds\n",i,(long long)insideMPI[i],useconds); 
276 

277 
insides+=insideMPI[i]; 
278 
} 
279 

280 
pi=4.*(float)insides/(float)(part_iterations*numtasks); 
281 

282 
printf("\n\tPi=%.40f\n\twith error %.40f\n\twith %lld iterations\n\n",pi,

283 
fabs(pi4*atan(1.))/pi,(long long)(part_iterations*numtasks)); 
284  
285 
} 
286 
else

287 
{ 
288 
// Receive information from master

289 

290 
node recv; 
291  
292 
rc = MPI_Recv(&recv, 1, mpi_node_type, 0, tag, MPI_COMM_WORLD,&Stat); 
293 
/*

294 
printf("\t(%s,%i) receive from master %lld with %i process\n",

295 
hostname,rank,(long long)recv.iterations,recv.process);

296 
*/

297  
298 
gettimeofday(&start,(struct timezone *)0); 
299  
300 
#pragma omp parallel for 
301 
for (int i=0 ; i<recv.process; i++) { 
302 
insideOpenMP[i]=MainLoopGlobal(recv.iterations,rotr(seed_w,rank+process),rotl(seed_z,rank+process)); 
303 
/*

304 
printf("\t(%s,%i) found %lld for process %i\n",hostname,rank,

305 
(long long)insideOpenMP[i],i);

306 
*/

307 
} 
308 

309 
/* printf("\n"); */

310  
311 
part_inside=0;

312 
for (int i=0 ; i<recv.process; i++) { 
313 
part_inside+=insideOpenMP[i]; 
314 
} 
315 

316 
gettimeofday(&end,(struct timezone *)0); 
317 
useconds=(end.tv_secstart.tv_sec)*1000000+end.tv_usecstart.tv_usec;

318 
/*

319 
printf("\tOn %s rank %i find %lld inside in %lu useconds.\n",

320 
hostname,rank,(long long)part_inside,useconds);

321 
*/

322 
result send; 
323 
send.inside=part_inside; 
324 
send.useconds=useconds; 
325 

326 
rc = MPI_Send(&send, 1, mpi_result_type, 0, tag, MPI_COMM_WORLD); 
327  
328 
} 
329 

330 
MPI_Type_free(&mpi_node_type); 
331 
MPI_Type_free(&mpi_result_type); 
332  
333 
MPI_Finalize(); 
334 

335 
} 