Statistics
| Revision:

root / Pi / C / Hybrid / Pi_Hybrid.c @ 183

History | View | Annotate | Download (8.6 kB)

1
//
2
// Estimation of Pi using Monte Carlo exploration process
3
// gcc -std=c99 -O3 -o Pi Pi.c -lm 
4
// Emmanuel Quemener <emmanuel.quemener@ens-lyon.fr>
5
// Cecill v2
6

    
7
// Needed for gethostname
8
#define _BSD_SOURCE
9
#include <sys/unistd.h>
10

    
11
#include <math.h>
12
#include <stdio.h>
13
#include <stdlib.h>
14
#include <limits.h>
15
#include <mpi.h>
16
#include <stddef.h>
17

    
18
#ifdef TIME
19
#include <sys/time.h>
20
#endif
21

    
22
// Marsaglia RNG very simple implementation
23
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
24
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
25
#define MWC   (znew+wnew)
26
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
27
#define CONG  (jcong=69069*jcong+1234567)
28
#define KISS  ((MWC^CONG)+SHR3)
29

    
30
#define ITERATIONS 1000000000
31

    
32
#define MWCfp MWC * 2.328306435454494e-10f
33
#define KISSfp KISS * 2.328306435454494e-10f
34
#define SHR3fp SHR3 * 2.328306435454494e-10f
35
#define CONGfp CONG * 2.328306435454494e-10f
36

    
37
#define PROCESS 1
38

    
39
#ifdef LONG
40
#define LENGTH long long
41
#else
42
#define LENGTH int
43
#endif
44

    
45
typedef struct compute_node {
46
  LENGTH iterations;
47
  int process;
48
} node;
49

    
50
typedef struct compute_result {
51
  LENGTH inside;
52
  long int useconds;
53
} result;
54

    
55
unsigned int rotl(unsigned int value, int shift) {
56
  return (value << shift) | (value >> (sizeof(value) * CHAR_BIT - shift));
57
}
58

    
59
unsigned int rotr(unsigned int value, int shift) {
60
  return (value >> shift) | (value << (sizeof(value) * CHAR_BIT - shift));
61
}
62

    
63
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
64
{
65
  
66
#if defined TCONG
67
  unsigned int jcong=seed_z;
68
#elif defined TSHR3
69
  unsigned int jsr=seed_w;
70
#elif defined TMWC
71
  unsigned int z=seed_z;
72
  unsigned int w=seed_w;
73
#elif defined TKISS
74
  unsigned int jcong=seed_z;
75
  unsigned int jsr=seed_w;
76
  unsigned int z=seed_z;
77
  unsigned int w=seed_w;
78
#endif
79
   
80
  LENGTH total=0;
81
  
82
  for (LENGTH i=0;i<iterations;i++) {
83
    
84
#if defined TINT32
85
#define THEONE 1073741824
86
#if defined TCONG
87
    unsigned int x=CONG>>17 ;
88
    unsigned int y=CONG>>17 ;
89
#elif defined TSHR3
90
    unsigned int x=SHR3>>17 ;
91
    unsigned int y=SHR3>>17 ;
92
#elif defined TMWC
93
    unsigned int x=MWC>>17 ;
94
    unsigned int y=MWC>>17 ;
95
#elif defined TKISS
96
    unsigned int x=KISS>>17 ;
97
    unsigned int y=KISS>>17 ;
98
#endif
99
#elif defined TINT64
100
#define THEONE 4611686018427387904
101
#if defined TCONG
102
    unsigned long x=(unsigned long)(CONG>>1) ;
103
    unsigned long y=(unsigned long)(CONG>>1) ;
104
#elif defined TSHR3
105
    unsigned long x=(unsigned long)(SHR3>>1) ;
106
    unsigned long y=(unsigned long)(SHR3>>1) ;
107
#elif defined TMWC
108
    unsigned long x=(unsigned long)(MWC>>1) ;
109
    unsigned long y=(unsigned long)(MWC>>1) ;
110
#elif defined TKISS
111
    unsigned long x=(unsigned long)(KISS>>1) ;
112
    unsigned long y=(unsigned long)(KISS>>1) ;
113
#endif
114
#elif defined TFP32
115
#define THEONE 1.0f
116
#if defined TCONG
117
    float x=CONGfp ;
118
    float y=CONGfp ;
119
#elif defined TSHR3
120
    float x=SHR3fp ;
121
    float y=SHR3fp ;
122
#elif defined TMWC
123
    float x=MWCfp ;
124
    float y=MWCfp ;
125
#elif defined TKISS
126
    float x=KISSfp ;
127
    float y=KISSfp ;
128
#endif
129
#elif defined TFP64
130
#define THEONE 1.0f
131
#if defined TCONG
132
    double x=(double)CONGfp ;
133
    double y=(double)CONGfp ;
134
#elif defined TSHR3
135
    double x=(double)SHR3fp ;
136
    double y=(double)SHR3fp ;
137
#elif defined TMWC
138
    double x=(double)MWCfp ;
139
    double y=(double)MWCfp ;
140
#elif defined TKISS
141
    double x=(double)KISSfp ;
142
    double y=(double)KISSfp ;
143
#endif
144
#endif
145

    
146
    // Matching test
147
    unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
148
    total+=inside;
149
  }
150
  
151
  return(total);
152
  
153
}
154

    
155
int main(int argc, char *argv[]) {
156
  
157
  unsigned int seed_z=362436069,seed_w=52128862,process=PROCESS;
158
  // Number of NP or OpenMP processes <1024
159
  LENGTH iterations=ITERATIONS,insideMPI[8192],insideOpenMP[1024],
160
    part_inside=0,part_iterations,insides=0;
161
  int numtasks,rank,rc,tag=1,i;
162
  float pi;
163
  
164
  // Hostname supposed to be <128 characters
165
  char hostname[128];
166
  
167
  gethostname(hostname, sizeof hostname);
168
  
169
  struct timeval start,end;
170
  long int useconds;
171
  
172
  MPI_Status Stat;
173
  
174
  rc = MPI_Init(&argc,&argv);
175
  if (rc != MPI_SUCCESS) {
176
    printf ("Error starting MPI program. Terminating.\n");
177
    MPI_Abort(MPI_COMM_WORLD, rc);
178
  }
179

    
180
  MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
181
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
182
  
183
  const int nitems=2;
184
  int blocklengths[2] = {1,1};
185
  
186
#ifdef LONG
187
  MPI_Datatype types_node[2] = {MPI_LONG, MPI_INT};
188
  MPI_Datatype types_result[2] = {MPI_LONG, MPI_LONG};
189
#else
190
  MPI_Datatype types_node[2] = {MPI_INT, MPI_INT};
191
  MPI_Datatype types_result[2] = {MPI_INT, MPI_LONG};
192
#endif
193
  
194
  MPI_Datatype mpi_node_type,mpi_result_type;
195
  MPI_Aint     offsets[2],offsetsr[2];
196
  
197
  offsets[0] = offsetof(node, iterations);
198
  offsets[1] = offsetof(node, process);
199
  
200
  MPI_Type_create_struct(nitems, blocklengths, offsets, types_node, &mpi_node_type);
201
  MPI_Type_commit(&mpi_node_type);
202
  
203
  offsetsr[0] = offsetof(result, inside);
204
  offsetsr[1] = offsetof(result, useconds);
205
  
206
  MPI_Type_create_struct(nitems, blocklengths, offsetsr, types_result, &mpi_result_type);
207
  MPI_Type_commit(&mpi_result_type);
208
  
209
  if (rank==0) {
210
    
211
    if (argc > 1) {
212
      iterations=(LENGTH)atoll(argv[1]);
213
      process=atoi(argv[2]);
214
    }
215
    else {
216
      printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
217
      printf("\t\t#1 : number of iterations (default 1 billion)\n");
218
      printf("\t\t#2 : number of OpenMP processes (default 1)\n\n");
219
    }
220
    
221
    printf ("\n\tInformation about architecture:\n\n");
222
    
223
    printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
224
    printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
225
    printf ("\tSizeof long long = %lld bytes.\n", (long long)sizeof(long long));
226
    
227
    printf ("\tMax int = %u\n", INT_MAX);
228
    printf ("\tMax long = %ld\n", LONG_MAX);
229
    printf ("\tMax long long = %lld\n\n", LLONG_MAX);
230
    
231
    part_iterations=(((iterations%numtasks)%process) == 0) ? iterations/numtasks/process:iterations/numtasks/process+1 ;
232
    
233
    node send;
234
    send.iterations=part_iterations;
235
    send.process=process;
236

    
237
    // Split part of code
238
    for (i=1;i<numtasks;i++) {      
239
      rc = MPI_Send(&send, 1, mpi_node_type, i, tag, MPI_COMM_WORLD);
240
    }
241
    
242
    gettimeofday(&start,(struct timezone *)0);
243
    
244
#pragma omp parallel for
245
    for (int i=0 ; i<process; i++) {
246
      insideOpenMP[i]=MainLoopGlobal(part_iterations,
247
                                     rotr(seed_w,i),
248
                                     rotl(seed_z,i));
249
      /*
250
        printf("\t(%s,%i) found %lld for process %i\n",hostname,0,
251
        (long long)insideOpenMP[i],i); */
252
    }
253
    /*
254
      printf("\n");
255
    */
256
    
257
    insides=0;
258
    for (int i=0 ; i<process; i++) {
259
      insides+=insideOpenMP[i];
260
    }
261
    
262
    gettimeofday(&end,(struct timezone *)0);
263
    useconds=(end.tv_sec-start.tv_sec)*1000000+end.tv_usec-start.tv_usec;
264
    
265
    printf("\tOn %s with rank #%i find %lld inside in %lu useconds.\n",
266
           hostname,rank,(long long)insides,useconds);
267
    
268
    // Join part of code
269
    for (i=1;i<numtasks;i++) {
270
      
271
      result recv;
272
      
273
      rc = MPI_Recv(&recv, 1, mpi_result_type, i, tag, MPI_COMM_WORLD,&Stat);
274
      
275
      insideMPI[i]=recv.inside;
276
      useconds=recv.useconds;
277
      
278
      printf("\tReceive from rank #%i, find %lld inside in %lu useconds\n",i,(long long)insideMPI[i],useconds);
279
    
280
      insides+=insideMPI[i];
281
    }
282
    
283
    pi=4.*(float)insides/(float)(part_iterations*numtasks*process);
284
    
285
    printf("\n\tPi=%.40f\n\twith error %.40f\n\twith %lld iterations\n\n",pi,
286
           fabs(pi-4*atan(1.))/pi,(long long)(part_iterations*numtasks*process));
287
    
288
  }
289
  else
290
    {
291
      // Receive information from master
292
      
293
      node recv;
294
      
295
      rc = MPI_Recv(&recv, 1, mpi_node_type, 0, tag, MPI_COMM_WORLD,&Stat);
296
      /*   
297
      printf("\t(%s,%i) receive from master %lld with %i process\n",
298
      hostname,rank,(long long)recv.iterations,recv.process);
299
      */
300
      
301
      gettimeofday(&start,(struct timezone *)0);
302
      
303
#pragma omp parallel for
304
      for (int i=0 ; i<recv.process; i++) {
305
        insideOpenMP[i]=MainLoopGlobal(recv.iterations,rotr(seed_w,rank+i),rotl(seed_z,rank-i));
306
        /*
307
          printf("\t(%s,%i) found %lld for process %i\n",hostname,rank,
308
          (long long)insideOpenMP[i],i);
309
        */
310
      }
311
      
312
      /* printf("\n"); */
313
      
314
      for (int i=0 ; i<recv.process; i++) {
315
        part_inside+=insideOpenMP[i];
316
      }
317
      
318
      gettimeofday(&end,(struct timezone *)0);
319
      useconds=(end.tv_sec-start.tv_sec)*1000000+end.tv_usec-start.tv_usec;
320
      /*
321
        printf("\tOn %s rank %i find %lld inside in %lu useconds.\n",
322
        hostname,rank,(long long)part_inside,useconds);
323
      */
324
      result send;
325
      send.inside=part_inside;
326
      send.useconds=useconds;
327
      
328
      rc = MPI_Send(&send, 1, mpi_result_type, 0, tag, MPI_COMM_WORLD);
329
      
330
    }
331
  
332
  MPI_Type_free(&mpi_node_type);
333
  MPI_Type_free(&mpi_result_type);
334
  
335
  MPI_Finalize();
336
  
337
}