Revision 183 Pi/C/Hybrid/Pi_Hybrid.c
Pi_Hybrid.c (revision 183)  

43  43 
#endif 
44  44  
45  45 
typedef struct compute_node { 
46 
LENGTH iterations;


47 
int process;


46 
LENGTH iterations; 

47 
int process; 

48  48 
} node; 
49  49  
50 
typedef struct compute_node {


51 
LENGTH inside;


52 
long int useconds;


50 
typedef struct compute_result {


51 
LENGTH inside; 

52 
long int useconds; 

53  53 
} result; 
54  54  
55  55 
unsigned int rotl(unsigned int value, int shift) { 
56 
return (value << shift)  (value >> (sizeof(value) * CHAR_BIT  shift));


56 
return (value << shift)  (value >> (sizeof(value) * CHAR_BIT  shift)); 

57  57 
} 
58 


58  
59  59 
unsigned int rotr(unsigned int value, int shift) { 
60 
return (value >> shift)  (value << (sizeof(value) * CHAR_BIT  shift));


60 
return (value >> shift)  (value << (sizeof(value) * CHAR_BIT  shift)); 

61  61 
} 
62  62  
63  63 
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z) 
64  64 
{ 
65  
65 


66  66 
#if defined TCONG 
67 
unsigned int jcong=seed_z;


67 
unsigned int jcong=seed_z; 

68  68 
#elif defined TSHR3 
69 
unsigned int jsr=seed_w;


69 
unsigned int jsr=seed_w; 

70  70 
#elif defined TMWC 
71 
unsigned int z=seed_z;


72 
unsigned int w=seed_w;


71 
unsigned int z=seed_z; 

72 
unsigned int w=seed_w; 

73  73 
#elif defined TKISS 
74 
unsigned int jcong=seed_z;


75 
unsigned int jsr=seed_w;


76 
unsigned int z=seed_z;


77 
unsigned int w=seed_w;


74 
unsigned int jcong=seed_z; 

75 
unsigned int jsr=seed_w; 

76 
unsigned int z=seed_z; 

77 
unsigned int w=seed_w; 

78  78 
#endif 
79  79 

80 
LENGTH total=0;


81  
82 
for (LENGTH i=0;i<iterations;i++) {


83  
80 
LENGTH total=0; 

81 


82 
for (LENGTH i=0;i<iterations;i++) { 

83 


84  84 
#if defined TINT32 
85 
#define THEONE 1073741824


86 
#if defined TCONG


87 
unsigned int x=CONG>>17 ;


88 
unsigned int y=CONG>>17 ;


89 
#elif defined TSHR3


90 
unsigned int x=SHR3>>17 ;


91 
unsigned int y=SHR3>>17 ;


92 
#elif defined TMWC


93 
unsigned int x=MWC>>17 ;


94 
unsigned int y=MWC>>17 ;


95 
#elif defined TKISS


96 
unsigned int x=KISS>>17 ;


97 
unsigned int y=KISS>>17 ;


98 
#endif


85 
#define THEONE 1073741824 

86 
#if defined TCONG 

87 
unsigned int x=CONG>>17 ; 

88 
unsigned int y=CONG>>17 ; 

89 
#elif defined TSHR3 

90 
unsigned int x=SHR3>>17 ; 

91 
unsigned int y=SHR3>>17 ; 

92 
#elif defined TMWC 

93 
unsigned int x=MWC>>17 ; 

94 
unsigned int y=MWC>>17 ; 

95 
#elif defined TKISS 

96 
unsigned int x=KISS>>17 ; 

97 
unsigned int y=KISS>>17 ; 

98 
#endif 

99  99 
#elif defined TINT64 
100 
#define THEONE 4611686018427387904


101 
#if defined TCONG


102 
unsigned long x=(unsigned long)(CONG>>1) ;


103 
unsigned long y=(unsigned long)(CONG>>1) ;


104 
#elif defined TSHR3


105 
unsigned long x=(unsigned long)(SHR3>>1) ;


106 
unsigned long y=(unsigned long)(SHR3>>1) ;


107 
#elif defined TMWC


108 
unsigned long x=(unsigned long)(MWC>>1) ;


109 
unsigned long y=(unsigned long)(MWC>>1) ;


110 
#elif defined TKISS


111 
unsigned long x=(unsigned long)(KISS>>1) ;


112 
unsigned long y=(unsigned long)(KISS>>1) ;


113 
#endif


100 
#define THEONE 4611686018427387904 

101 
#if defined TCONG 

102 
unsigned long x=(unsigned long)(CONG>>1) ; 

103 
unsigned long y=(unsigned long)(CONG>>1) ; 

104 
#elif defined TSHR3 

105 
unsigned long x=(unsigned long)(SHR3>>1) ; 

106 
unsigned long y=(unsigned long)(SHR3>>1) ; 

107 
#elif defined TMWC 

108 
unsigned long x=(unsigned long)(MWC>>1) ; 

109 
unsigned long y=(unsigned long)(MWC>>1) ; 

110 
#elif defined TKISS 

111 
unsigned long x=(unsigned long)(KISS>>1) ; 

112 
unsigned long y=(unsigned long)(KISS>>1) ; 

113 
#endif 

114  114 
#elif defined TFP32 
115 
#define THEONE 1.0f


116 
#if defined TCONG


117 
float x=CONGfp ;


118 
float y=CONGfp ;


119 
#elif defined TSHR3


120 
float x=SHR3fp ;


121 
float y=SHR3fp ;


122 
#elif defined TMWC


123 
float x=MWCfp ;


124 
float y=MWCfp ;


125 
#elif defined TKISS


126 
float x=KISSfp ;


127 
float y=KISSfp ;


128 
#endif


115 
#define THEONE 1.0f 

116 
#if defined TCONG 

117 
float x=CONGfp ; 

118 
float y=CONGfp ; 

119 
#elif defined TSHR3 

120 
float x=SHR3fp ; 

121 
float y=SHR3fp ; 

122 
#elif defined TMWC 

123 
float x=MWCfp ; 

124 
float y=MWCfp ; 

125 
#elif defined TKISS 

126 
float x=KISSfp ; 

127 
float y=KISSfp ; 

128 
#endif 

129  129 
#elif defined TFP64 
130 
#define THEONE 1.0f 

131 
#if defined TCONG 

132 
double x=(double)CONGfp ; 

133 
double y=(double)CONGfp ; 

134 
#elif defined TSHR3 

135 
double x=(double)SHR3fp ; 

136 
double y=(double)SHR3fp ; 

137 
#elif defined TMWC 

138 
double x=(double)MWCfp ; 

139 
double y=(double)MWCfp ; 

140 
#elif defined TKISS 

141 
double x=(double)KISSfp ; 

142 
double y=(double)KISSfp ; 

143 
#endif 

130 
#define THEONE 1.0f 

131 
#if defined TCONG 

132 
double x=(double)CONGfp ; 

133 
double y=(double)CONGfp ; 

134 
#elif defined TSHR3 

135 
double x=(double)SHR3fp ; 

136 
double y=(double)SHR3fp ; 

137 
#elif defined TMWC 

138 
double x=(double)MWCfp ; 

139 
double y=(double)MWCfp ; 

140 
#elif defined TKISS 

141 
double x=(double)KISSfp ; 

142 
double y=(double)KISSfp ; 

144  143 
#endif 
144 
#endif 

145  145  
146 
// Matching test


147 
unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;


148 
total+=inside;


149 
}


150  
151 
return(total);


152  
146 
// Matching test 

147 
unsigned long inside=((x*x+y*y) < THEONE) ? 1:0; 

148 
total+=inside; 

149 
} 

150 


151 
return(total); 

152 


153  153 
} 
154  154  
155  155 
int main(int argc, char *argv[]) { 
156  
156 


157  157 
unsigned int seed_z=362436069,seed_w=52128862,process=PROCESS; 
158  158 
// Number of NP or OpenMP processes <1024 
159  159 
LENGTH iterations=ITERATIONS,insideMPI[8192],insideOpenMP[1024], 
160 
part_inside,part_iterations,insides;


160 
part_inside=0,part_iterations,insides=0;


161  161 
int numtasks,rank,rc,tag=1,i; 
162  162 
float pi; 
163  163 

164  164 
// Hostname supposed to be <128 characters 
165  165 
char hostname[128]; 
166  
166 


167  167 
gethostname(hostname, sizeof hostname); 
168  
168 


169  169 
struct timeval start,end; 
170  170 
long int useconds; 
171  
171 


172  172 
MPI_Status Stat; 
173  173 

174  174 
rc = MPI_Init(&argc,&argv); 
...  ...  
179  179  
180  180 
MPI_Comm_size(MPI_COMM_WORLD,&numtasks); 
181  181 
MPI_Comm_rank(MPI_COMM_WORLD,&rank); 
182  
182 


183  183 
const int nitems=2; 
184  184 
int blocklengths[2] = {1,1}; 
185  
185 


186  186 
#ifdef LONG 
187  187 
MPI_Datatype types_node[2] = {MPI_LONG, MPI_INT}; 
188  188 
MPI_Datatype types_result[2] = {MPI_LONG, MPI_LONG}; 
...  ...  
190  190 
MPI_Datatype types_node[2] = {MPI_INT, MPI_INT}; 
191  191 
MPI_Datatype types_result[2] = {MPI_INT, MPI_LONG}; 
192  192 
#endif 
193  
193 


194  194 
MPI_Datatype mpi_node_type,mpi_result_type; 
195  195 
MPI_Aint offsets[2],offsetsr[2]; 
196  
196 


197  197 
offsets[0] = offsetof(node, iterations); 
198  198 
offsets[1] = offsetof(node, process); 
199  
200 
MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_node_type); 

199 


200 
MPI_Type_create_struct(nitems, blocklengths, offsets, types_node, &mpi_node_type);


201  201 
MPI_Type_commit(&mpi_node_type); 
202  
202 


203  203 
offsetsr[0] = offsetof(result, inside); 
204  204 
offsetsr[1] = offsetof(result, useconds); 
205  
206 
MPI_Type_create_struct(nitems, blocklengths, offsetsr, types, &mpi_result_type); 

205 


206 
MPI_Type_create_struct(nitems, blocklengths, offsetsr, types_result, &mpi_result_type);


207  207 
MPI_Type_commit(&mpi_result_type); 
208  
208 


209  209 
if (rank==0) { 
210  210 

211  211 
if (argc > 1) { 
...  ...  
214  214 
} 
215  215 
else { 
216  216 
printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n"); 
217 
printf("\t\t#1 : number of iterations (default 1 billion)\n\n");


218 
printf("\t\t#1 : number of OpenMP processes (default 1)\n\n");


217 
printf("\t\t#1 : number of iterations (default 1 billion)\n"); 

218 
printf("\t\t#2 : number of OpenMP processes (default 1)\n\n");


219  219 
} 
220  220 

221  221 
printf ("\n\tInformation about architecture:\n\n"); 
222  
223 
printf ("Sizeof int = %lld bytes.\n", (long long)sizeof(int)); 

224 
printf ("Sizeof long = %lld bytes.\n", (long long)sizeof(long)); 

225 
printf ("Sizeof long long = %lld bytes.\n", (long long)sizeof(long long)); 

226  222 

227 
printf ("Max int = %u\n", INT_MAX);


228 
printf ("Max long = %ld\n", LONG_MAX);


229 
printf ("Max long long = %lld\n\n", LLONG_MAX);


223 
printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));


224 
printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));


225 
printf ("\tSizeof long long = %lld bytes.\n", (long long)sizeof(long long));


230  226 

231 
part_iterations=((iterations%numtasks) == 0) ? iterations/numtasks:iterations/numtasks+1 ; 

227 
printf ("\tMax int = %u\n", INT_MAX); 

228 
printf ("\tMax long = %ld\n", LONG_MAX); 

229 
printf ("\tMax long long = %lld\n\n", LLONG_MAX); 

232  230 

231 
part_iterations=(((iterations%numtasks)%process) == 0) ? iterations/numtasks/process:iterations/numtasks/process+1 ; 

232 


233  233 
node send; 
234  234 
send.iterations=part_iterations; 
235  235 
send.process=process; 
...  ...  
242  242 
gettimeofday(&start,(struct timezone *)0); 
243  243 

244  244 
#pragma omp parallel for 
245 
for (int i=0 ; i<process; i++) { 

246 
insideOpenMP[i]=MainLoopGlobal(part_iterations, 

247 
rotr(seed_w,process), 

248 
rotl(seed_z,process)); 

249 
printf("\t(%s,%i) found %lld for process %i\n",hostname,0, 

250 
(long long)insideOpenMP[i],i); 

251 
} 

252 
printf("\n"); 

253  
254 
insides=0; 

255 
for (int i=0 ; i<process; i++) { 

256 
insides+=insideOpenMP[i]; 

257 
} 

258  
259 
gettimeofday(&end,(struct timezone *)0); 

260 
useconds=(end.tv_secstart.tv_sec)*1000000+end.tv_usecstart.tv_usec; 

261 


262 
printf("\tOn %s with %i find %lld inside in %lu useconds.\n", 

263 
hostname,rank,(long long)insides,useconds); 

264 


265 
// Join part of code 

266 
for (i=1;i<numtasks;i++) { 

267  
268 
result recv; 

245 
for (int i=0 ; i<process; i++) { 

246 
insideOpenMP[i]=MainLoopGlobal(part_iterations, 

247 
rotr(seed_w,i), 

248 
rotl(seed_z,i)); 

249 
/* 

250 
printf("\t(%s,%i) found %lld for process %i\n",hostname,0, 

251 
(long long)insideOpenMP[i],i); */ 

252 
} 

253 
/* 

254 
printf("\n"); 

255 
*/ 

269  256 

270 
rc = MPI_Recv(&recv, 1, mpi_result_type, i, tag, MPI_COMM_WORLD,&Stat); 

271  
272 
insideMPI[i]=recv.inside; 

273 
useconds=recv.useconds; 

274  
275 
printf("\tReceive from %i, find %lld inside in %lu useconds\n",i,(long long)insideMPI[i],useconds); 

257 
insides=0; 

258 
for (int i=0 ; i<process; i++) { 

259 
insides+=insideOpenMP[i]; 

260 
} 

276  261 

277 
insides+=insideMPI[i]; 

278 
} 

262 
gettimeofday(&end,(struct timezone *)0); 

263 
useconds=(end.tv_secstart.tv_sec)*1000000+end.tv_usecstart.tv_usec; 

264 


265 
printf("\tOn %s with rank #%i find %lld inside in %lu useconds.\n", 

266 
hostname,rank,(long long)insides,useconds); 

267 


268 
// Join part of code 

269 
for (i=1;i<numtasks;i++) { 

279  270 

280 
pi=4.*(float)insides/(float)(part_iterations*numtasks);


271 
result recv;


281  272 

282 
printf("\n\tPi=%.40f\n\twith error %.40f\n\twith %lld iterations\n\n",pi, 

283 
fabs(pi4*atan(1.))/pi,(long long)(part_iterations*numtasks)); 

284  
273 
rc = MPI_Recv(&recv, 1, mpi_result_type, i, tag, MPI_COMM_WORLD,&Stat); 

274 


275 
insideMPI[i]=recv.inside; 

276 
useconds=recv.useconds; 

277 


278 
printf("\tReceive from rank #%i, find %lld inside in %lu useconds\n",i,(long long)insideMPI[i],useconds); 

279 


280 
insides+=insideMPI[i]; 

281 
} 

282 


283 
pi=4.*(float)insides/(float)(part_iterations*numtasks*process); 

284 


285 
printf("\n\tPi=%.40f\n\twith error %.40f\n\twith %lld iterations\n\n",pi, 

286 
fabs(pi4*atan(1.))/pi,(long long)(part_iterations*numtasks*process)); 

287 


285  288 
} 
286  289 
else 
287  290 
{ 
288  291 
// Receive information from master 
289 


292 


290  293 
node recv; 
291  
294 


292  295 
rc = MPI_Recv(&recv, 1, mpi_node_type, 0, tag, MPI_COMM_WORLD,&Stat); 
293 
/*


296 
/* 

294  297 
printf("\t(%s,%i) receive from master %lld with %i process\n", 
295 
hostname,rank,(long long)recv.iterations,recv.process);


298 
hostname,rank,(long long)recv.iterations,recv.process); 

296  299 
*/ 
297  
300 


298  301 
gettimeofday(&start,(struct timezone *)0); 
299  
302 


300  303 
#pragma omp parallel for 
301  304 
for (int i=0 ; i<recv.process; i++) { 
302 
insideOpenMP[i]=MainLoopGlobal(recv.iterations,rotr(seed_w,rank+process),rotl(seed_z,rank+process));


305 
insideOpenMP[i]=MainLoopGlobal(recv.iterations,rotr(seed_w,rank+i),rotl(seed_z,ranki));


303  306 
/* 
304 
printf("\t(%s,%i) found %lld for process %i\n",hostname,rank, 

305 
(long long)insideOpenMP[i],i);


307 
printf("\t(%s,%i) found %lld for process %i\n",hostname,rank,


308 
(long long)insideOpenMP[i],i); 

306  309 
*/ 
307  310 
} 
308  311 

309  312 
/* printf("\n"); */ 
310  
311 
part_inside=0; 

313 


312  314 
for (int i=0 ; i<recv.process; i++) { 
313  315 
part_inside+=insideOpenMP[i]; 
314  316 
} 
...  ...  
316  318 
gettimeofday(&end,(struct timezone *)0); 
317  319 
useconds=(end.tv_secstart.tv_sec)*1000000+end.tv_usecstart.tv_usec; 
318  320 
/* 
319 
printf("\tOn %s rank %i find %lld inside in %lu useconds.\n",


320 
hostname,rank,(long long)part_inside,useconds);


321 
printf("\tOn %s rank %i find %lld inside in %lu useconds.\n",


322 
hostname,rank,(long long)part_inside,useconds); 

321  323 
*/ 
322  324 
result send; 
323  325 
send.inside=part_inside; 
324  326 
send.useconds=useconds; 
325  327 

326  328 
rc = MPI_Send(&send, 1, mpi_result_type, 0, tag, MPI_COMM_WORLD); 
327  
329 


328  330 
} 
329  331 

330  332 
MPI_Type_free(&mpi_node_type); 
331  333 
MPI_Type_free(&mpi_result_type); 
332  
334 


333  335 
MPI_Finalize(); 
334  336 

335  337 
} 
Also available in: Unified diff