Revision 182
Pi/C/Hybrid/Pi_Hybrid.c (revision 182)  

43  43 
#endif 
44  44  
45  45 
typedef struct compute_node { 
46 
LENGTH iterations; 

47 
int process; 

46 
LENGTH iterations;


47 
int process;


48  48 
} node; 
49  49  
50 
typedef struct compute_result {


51 
LENGTH inside; 

52 
long int useconds; 

50 
typedef struct compute_node {


51 
LENGTH inside;


52 
long int useconds;


53  53 
} result; 
54  54  
55  55 
unsigned int rotl(unsigned int value, int shift) { 
56 
return (value << shift)  (value >> (sizeof(value) * CHAR_BIT  shift)); 

56 
return (value << shift)  (value >> (sizeof(value) * CHAR_BIT  shift));


57  57 
} 
58  
58 


59  59 
unsigned int rotr(unsigned int value, int shift) { 
60 
return (value >> shift)  (value << (sizeof(value) * CHAR_BIT  shift)); 

60 
return (value >> shift)  (value << (sizeof(value) * CHAR_BIT  shift));


61  61 
} 
62  62  
63  63 
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z) 
64  64 
{ 
65 


65  
66  66 
#if defined TCONG 
67 
unsigned int jcong=seed_z; 

67 
unsigned int jcong=seed_z;


68  68 
#elif defined TSHR3 
69 
unsigned int jsr=seed_w; 

69 
unsigned int jsr=seed_w;


70  70 
#elif defined TMWC 
71 
unsigned int z=seed_z; 

72 
unsigned int w=seed_w; 

71 
unsigned int z=seed_z;


72 
unsigned int w=seed_w;


73  73 
#elif defined TKISS 
74 
unsigned int jcong=seed_z; 

75 
unsigned int jsr=seed_w; 

76 
unsigned int z=seed_z; 

77 
unsigned int w=seed_w; 

74 
unsigned int jcong=seed_z;


75 
unsigned int jsr=seed_w;


76 
unsigned int z=seed_z;


77 
unsigned int w=seed_w;


78  78 
#endif 
79  79 

80 
LENGTH total=0; 

81 


82 
for (LENGTH i=0;i<iterations;i++) { 

83 


80 
LENGTH total=0;


81  
82 
for (LENGTH i=0;i<iterations;i++) {


83  
84  84 
#if defined TINT32 
85 
#define THEONE 1073741824 

86 
#if defined TCONG 

87 
unsigned int x=CONG>>17 ; 

88 
unsigned int y=CONG>>17 ; 

89 
#elif defined TSHR3 

90 
unsigned int x=SHR3>>17 ; 

91 
unsigned int y=SHR3>>17 ; 

92 
#elif defined TMWC 

93 
unsigned int x=MWC>>17 ; 

94 
unsigned int y=MWC>>17 ; 

95 
#elif defined TKISS 

96 
unsigned int x=KISS>>17 ; 

97 
unsigned int y=KISS>>17 ; 

98 
#endif 

85 
#define THEONE 1073741824


86 
#if defined TCONG


87 
unsigned int x=CONG>>17 ;


88 
unsigned int y=CONG>>17 ;


89 
#elif defined TSHR3


90 
unsigned int x=SHR3>>17 ;


91 
unsigned int y=SHR3>>17 ;


92 
#elif defined TMWC


93 
unsigned int x=MWC>>17 ;


94 
unsigned int y=MWC>>17 ;


95 
#elif defined TKISS


96 
unsigned int x=KISS>>17 ;


97 
unsigned int y=KISS>>17 ;


98 
#endif


99  99 
#elif defined TINT64 
100 
#define THEONE 4611686018427387904 

101 
#if defined TCONG 

102 
unsigned long x=(unsigned long)(CONG>>1) ; 

103 
unsigned long y=(unsigned long)(CONG>>1) ; 

104 
#elif defined TSHR3 

105 
unsigned long x=(unsigned long)(SHR3>>1) ; 

106 
unsigned long y=(unsigned long)(SHR3>>1) ; 

107 
#elif defined TMWC 

108 
unsigned long x=(unsigned long)(MWC>>1) ; 

109 
unsigned long y=(unsigned long)(MWC>>1) ; 

110 
#elif defined TKISS 

111 
unsigned long x=(unsigned long)(KISS>>1) ; 

112 
unsigned long y=(unsigned long)(KISS>>1) ; 

113 
#endif 

100 
#define THEONE 4611686018427387904


101 
#if defined TCONG


102 
unsigned long x=(unsigned long)(CONG>>1) ;


103 
unsigned long y=(unsigned long)(CONG>>1) ;


104 
#elif defined TSHR3


105 
unsigned long x=(unsigned long)(SHR3>>1) ;


106 
unsigned long y=(unsigned long)(SHR3>>1) ;


107 
#elif defined TMWC


108 
unsigned long x=(unsigned long)(MWC>>1) ;


109 
unsigned long y=(unsigned long)(MWC>>1) ;


110 
#elif defined TKISS


111 
unsigned long x=(unsigned long)(KISS>>1) ;


112 
unsigned long y=(unsigned long)(KISS>>1) ;


113 
#endif


114  114 
#elif defined TFP32 
115 
#define THEONE 1.0f 

116 
#if defined TCONG 

117 
float x=CONGfp ; 

118 
float y=CONGfp ; 

119 
#elif defined TSHR3 

120 
float x=SHR3fp ; 

121 
float y=SHR3fp ; 

122 
#elif defined TMWC 

123 
float x=MWCfp ; 

124 
float y=MWCfp ; 

125 
#elif defined TKISS 

126 
float x=KISSfp ; 

127 
float y=KISSfp ; 

128 
#endif 

115 
#define THEONE 1.0f


116 
#if defined TCONG


117 
float x=CONGfp ;


118 
float y=CONGfp ;


119 
#elif defined TSHR3


120 
float x=SHR3fp ;


121 
float y=SHR3fp ;


122 
#elif defined TMWC


123 
float x=MWCfp ;


124 
float y=MWCfp ;


125 
#elif defined TKISS


126 
float x=KISSfp ;


127 
float y=KISSfp ;


128 
#endif


129  129 
#elif defined TFP64 
130 
#define THEONE 1.0f 

131 
#if defined TCONG 

132 
double x=(double)CONGfp ; 

133 
double y=(double)CONGfp ; 

134 
#elif defined TSHR3 

135 
double x=(double)SHR3fp ; 

136 
double y=(double)SHR3fp ; 

137 
#elif defined TMWC 

138 
double x=(double)MWCfp ; 

139 
double y=(double)MWCfp ; 

140 
#elif defined TKISS 

141 
double x=(double)KISSfp ; 

142 
double y=(double)KISSfp ; 

130 
#define THEONE 1.0f 

131 
#if defined TCONG 

132 
double x=(double)CONGfp ; 

133 
double y=(double)CONGfp ; 

134 
#elif defined TSHR3 

135 
double x=(double)SHR3fp ; 

136 
double y=(double)SHR3fp ; 

137 
#elif defined TMWC 

138 
double x=(double)MWCfp ; 

139 
double y=(double)MWCfp ; 

140 
#elif defined TKISS 

141 
double x=(double)KISSfp ; 

142 
double y=(double)KISSfp ; 

143 
#endif 

143  144 
#endif 
144 
#endif 

145  145  
146 
// Matching test 

147 
unsigned long inside=((x*x+y*y) < THEONE) ? 1:0; 

148 
total+=inside; 

149 
} 

150 


151 
return(total); 

152 


146 
// Matching test


147 
unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;


148 
total+=inside;


149 
}


150  
151 
return(total);


152  
153  153 
} 
154  154  
155  155 
int main(int argc, char *argv[]) { 
156 


156  
157  157 
unsigned int seed_z=362436069,seed_w=52128862,process=PROCESS; 
158  158 
// Number of NP or OpenMP processes <1024 
159  159 
LENGTH iterations=ITERATIONS,insideMPI[8192],insideOpenMP[1024], 
160 
part_inside=0,part_iterations,insides=0;


160 
part_inside,part_iterations,insides;


161  161 
int numtasks,rank,rc,tag=1,i; 
162  162 
float pi; 
163  163 

164  164 
// Hostname supposed to be <128 characters 
165  165 
char hostname[128]; 
166 


166  
167  167 
gethostname(hostname, sizeof hostname); 
168 


168  
169  169 
struct timeval start,end; 
170  170 
long int useconds; 
171 


171  
172  172 
MPI_Status Stat; 
173  173 

174  174 
rc = MPI_Init(&argc,&argv); 
...  ...  
179  179  
180  180 
MPI_Comm_size(MPI_COMM_WORLD,&numtasks); 
181  181 
MPI_Comm_rank(MPI_COMM_WORLD,&rank); 
182 


182  
183  183 
const int nitems=2; 
184  184 
int blocklengths[2] = {1,1}; 
185 


185  
186  186 
#ifdef LONG 
187  187 
MPI_Datatype types_node[2] = {MPI_LONG, MPI_INT}; 
188  188 
MPI_Datatype types_result[2] = {MPI_LONG, MPI_LONG}; 
...  ...  
190  190 
MPI_Datatype types_node[2] = {MPI_INT, MPI_INT}; 
191  191 
MPI_Datatype types_result[2] = {MPI_INT, MPI_LONG}; 
192  192 
#endif 
193 


193  
194  194 
MPI_Datatype mpi_node_type,mpi_result_type; 
195  195 
MPI_Aint offsets[2],offsetsr[2]; 
196 


196  
197  197 
offsets[0] = offsetof(node, iterations); 
198  198 
offsets[1] = offsetof(node, process); 
199 


200 
MPI_Type_create_struct(nitems, blocklengths, offsets, types_node, &mpi_node_type);


199  
200 
MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_node_type); 

201  201 
MPI_Type_commit(&mpi_node_type); 
202 


202  
203  203 
offsetsr[0] = offsetof(result, inside); 
204  204 
offsetsr[1] = offsetof(result, useconds); 
205 


206 
MPI_Type_create_struct(nitems, blocklengths, offsetsr, types_result, &mpi_result_type);


205  
206 
MPI_Type_create_struct(nitems, blocklengths, offsetsr, types, &mpi_result_type); 

207  207 
MPI_Type_commit(&mpi_result_type); 
208 


208  
209  209 
if (rank==0) { 
210  210 

211  211 
if (argc > 1) { 
...  ...  
219  219 
} 
220  220 

221  221 
printf ("\n\tInformation about architecture:\n\n"); 
222  
223 
printf ("Sizeof int = %lld bytes.\n", (long long)sizeof(int)); 

224 
printf ("Sizeof long = %lld bytes.\n", (long long)sizeof(long)); 

225 
printf ("Sizeof long long = %lld bytes.\n", (long long)sizeof(long long)); 

222  226 

223 
printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));


224 
printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));


225 
printf ("\tSizeof long long = %lld bytes.\n", (long long)sizeof(long long));


227 
printf ("Max int = %u\n", INT_MAX);


228 
printf ("Max long = %ld\n", LONG_MAX);


229 
printf ("Max long long = %lld\n\n", LLONG_MAX);


226  230 

227 
printf ("\tMax int = %u\n", INT_MAX); 

228 
printf ("\tMax long = %ld\n", LONG_MAX); 

229 
printf ("\tMax long long = %lld\n\n", LLONG_MAX); 

231 
part_iterations=((iterations%numtasks) == 0) ? iterations/numtasks:iterations/numtasks+1 ; 

230  232 

231 
part_iterations=(((iterations%numtasks)%process) == 0) ? iterations/numtasks/process:iterations/numtasks/process+1 ; 

232 


233  233 
node send; 
234  234 
send.iterations=part_iterations; 
235  235 
send.process=process; 
...  ...  
242  242 
gettimeofday(&start,(struct timezone *)0); 
243  243 

244  244 
#pragma omp parallel for 
245 
for (int i=0 ; i<process; i++) { 

246 
insideOpenMP[i]=MainLoopGlobal(part_iterations, 

247 
rotr(seed_w,i), 

248 
rotl(seed_z,i)); 

249 
/* 

250 
printf("\t(%s,%i) found %lld for process %i\n",hostname,0, 

251 
(long long)insideOpenMP[i],i); */ 

252 
} 

253 
/* 

254 
printf("\n"); 

255 
*/ 

245 
for (int i=0 ; i<process; i++) { 

246 
insideOpenMP[i]=MainLoopGlobal(part_iterations, 

247 
rotr(seed_w,process), 

248 
rotl(seed_z,process)); 

249 
printf("\t(%s,%i) found %lld for process %i\n",hostname,0, 

250 
(long long)insideOpenMP[i],i); 

251 
} 

252 
printf("\n"); 

253  
254 
insides=0; 

255 
for (int i=0 ; i<process; i++) { 

256 
insides+=insideOpenMP[i]; 

257 
} 

258  
259 
gettimeofday(&end,(struct timezone *)0); 

260 
useconds=(end.tv_secstart.tv_sec)*1000000+end.tv_usecstart.tv_usec; 

261 


262 
printf("\tOn %s with %i find %lld inside in %lu useconds.\n", 

263 
hostname,rank,(long long)insides,useconds); 

264 


265 
// Join part of code 

266 
for (i=1;i<numtasks;i++) { 

267  
268 
result recv; 

256  269 

257 
insides=0; 

258 
for (int i=0 ; i<process; i++) { 

259 
insides+=insideOpenMP[i]; 

260 
} 

270 
rc = MPI_Recv(&recv, 1, mpi_result_type, i, tag, MPI_COMM_WORLD,&Stat); 

271  
272 
insideMPI[i]=recv.inside; 

273 
useconds=recv.useconds; 

274  
275 
printf("\tReceive from %i, find %lld inside in %lu useconds\n",i,(long long)insideMPI[i],useconds); 

261  276 

262 
gettimeofday(&end,(struct timezone *)0); 

263 
useconds=(end.tv_secstart.tv_sec)*1000000+end.tv_usecstart.tv_usec; 

264 


265 
printf("\tOn %s with rank #%i find %lld inside in %lu useconds.\n", 

266 
hostname,rank,(long long)insides,useconds); 

267 


268 
// Join part of code 

269 
for (i=1;i<numtasks;i++) { 

277 
insides+=insideMPI[i]; 

278 
} 

270  279 

271 
result recv;


280 
pi=4.*(float)insides/(float)(part_iterations*numtasks);


272  281 

273 
rc = MPI_Recv(&recv, 1, mpi_result_type, i, tag, MPI_COMM_WORLD,&Stat); 

274 


275 
insideMPI[i]=recv.inside; 

276 
useconds=recv.useconds; 

277 


278 
printf("\tReceive from rank #%i, find %lld inside in %lu useconds\n",i,(long long)insideMPI[i],useconds); 

279 


280 
insides+=insideMPI[i]; 

281 
} 

282 


283 
pi=4.*(float)insides/(float)(part_iterations*numtasks*process); 

284 


285 
printf("\n\tPi=%.40f\n\twith error %.40f\n\twith %lld iterations\n\n",pi, 

286 
fabs(pi4*atan(1.))/pi,(long long)(part_iterations*numtasks*process)); 

287 


282 
printf("\n\tPi=%.40f\n\twith error %.40f\n\twith %lld iterations\n\n",pi, 

283 
fabs(pi4*atan(1.))/pi,(long long)(part_iterations*numtasks)); 

284  
288  285 
} 
289  286 
else 
290  287 
{ 
291  288 
// Receive information from master 
292 


289 


293  290 
node recv; 
294 


291  
295  292 
rc = MPI_Recv(&recv, 1, mpi_node_type, 0, tag, MPI_COMM_WORLD,&Stat); 
296 
/* 

293 
/*


297  294 
printf("\t(%s,%i) receive from master %lld with %i process\n", 
298 
hostname,rank,(long long)recv.iterations,recv.process); 

295 
hostname,rank,(long long)recv.iterations,recv.process);


299  296 
*/ 
300 


297  
301  298 
gettimeofday(&start,(struct timezone *)0); 
302 


299  
303  300 
#pragma omp parallel for 
304  301 
for (int i=0 ; i<recv.process; i++) { 
305 
insideOpenMP[i]=MainLoopGlobal(recv.iterations,rotr(seed_w,rank+i),rotl(seed_z,ranki));


302 
insideOpenMP[i]=MainLoopGlobal(recv.iterations,rotr(seed_w,rank+process),rotl(seed_z,rank+process));


306  303 
/* 
307 
printf("\t(%s,%i) found %lld for process %i\n",hostname,rank,


308 
(long long)insideOpenMP[i],i); 

304 
printf("\t(%s,%i) found %lld for process %i\n",hostname,rank, 

305 
(long long)insideOpenMP[i],i);


309  306 
*/ 
310  307 
} 
311  308 

312  309 
/* printf("\n"); */ 
313 


310  
311 
part_inside=0; 

314  312 
for (int i=0 ; i<recv.process; i++) { 
315  313 
part_inside+=insideOpenMP[i]; 
316  314 
} 
...  ...  
318  316 
gettimeofday(&end,(struct timezone *)0); 
319  317 
useconds=(end.tv_secstart.tv_sec)*1000000+end.tv_usecstart.tv_usec; 
320  318 
/* 
321 
printf("\tOn %s rank %i find %lld inside in %lu useconds.\n",


322 
hostname,rank,(long long)part_inside,useconds); 

319 
printf("\tOn %s rank %i find %lld inside in %lu useconds.\n",


320 
hostname,rank,(long long)part_inside,useconds);


323  321 
*/ 
324  322 
result send; 
325  323 
send.inside=part_inside; 
326  324 
send.useconds=useconds; 
327  325 

328  326 
rc = MPI_Send(&send, 1, mpi_result_type, 0, tag, MPI_COMM_WORLD); 
329 


327  
330  328 
} 
331  329 

332  330 
MPI_Type_free(&mpi_node_type); 
333  331 
MPI_Type_free(&mpi_result_type); 
334 


332  
335  333 
MPI_Finalize(); 
336  334 

337  335 
} 
Also available in: Unified diff