Statistiques
| Révision :

root / Pi / C / Hybrid / Pi_Hybrid.c @ 283

Historique | Voir | Annoter | Télécharger (9,05 ko)

1
//
2
// Estimation of Pi using Monte Carlo exploration process using OpenMP & MPI
3
// Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
4
// gcc -std=c99 -O3 -o Pi_Hybrid Pi_Hybrid.c -lm 
5

    
6
// Needed for gethostname
7
#define _DEFAULT_SOURCE
8
#include <sys/unistd.h>
9

    
10
#include <math.h>
11
#include <stdio.h>
12
#include <stdlib.h>
13
#include <limits.h>
14
#include <mpi.h>
15
#include <stddef.h>
16

    
17
#ifdef TIME
18
#include <sys/time.h>
19
#endif
20

    
21
// Marsaglia RNG very simple implementation
22
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
23
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
24
#define MWC   (znew+wnew)
25
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
26
#define CONG  (jcong=69069*jcong+1234567)
27
#define KISS  ((MWC^CONG)+SHR3)
28

    
29
#define ITERATIONS 1000000000
30

    
31
#define MWCfp MWC * 2.328306435454494e-10f
32
#define KISSfp KISS * 2.328306435454494e-10f
33
#define SHR3fp SHR3 * 2.328306435454494e-10f
34
#define CONGfp CONG * 2.328306435454494e-10f
35

    
36
#define PROCESS 1
37

    
38
#ifdef LONG
39
#define LENGTH long long
40
#else
41
#define LENGTH int
42
#endif
43

    
44
typedef struct compute_node {
45
  LENGTH iterations;
46
  int process;
47
} node;
48

    
49
typedef struct compute_result {
50
  LENGTH inside;
51
  long int useconds;
52
} result;
53

    
54
unsigned int rotl(unsigned int value, int shift) {
55
  return (value << shift) | (value >> (sizeof(value) * CHAR_BIT - shift));
56
}
57

    
58
unsigned int rotr(unsigned int value, int shift) {
59
  return (value >> shift) | (value << (sizeof(value) * CHAR_BIT - shift));
60
}
61

    
62
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
63
{
64
  
65
#if defined TCONG
66
  unsigned int jcong=seed_z;
67
#elif defined TSHR3
68
  unsigned int jsr=seed_w;
69
#elif defined TMWC
70
  unsigned int z=seed_z;
71
  unsigned int w=seed_w;
72
#elif defined TKISS
73
  unsigned int jcong=seed_z;
74
  unsigned int jsr=seed_w;
75
  unsigned int z=seed_z;
76
  unsigned int w=seed_w;
77
#endif
78
   
79
  LENGTH total=0;
80
  
81
  for (LENGTH i=0;i<iterations;i++) {
82
    
83
#if defined TINT32
84
#define THEONE 1073741824
85
#if defined TCONG
86
    unsigned int x=CONG>>17 ;
87
    unsigned int y=CONG>>17 ;
88
#elif defined TSHR3
89
    unsigned int x=SHR3>>17 ;
90
    unsigned int y=SHR3>>17 ;
91
#elif defined TMWC
92
    unsigned int x=MWC>>17 ;
93
    unsigned int y=MWC>>17 ;
94
#elif defined TKISS
95
    unsigned int x=KISS>>17 ;
96
    unsigned int y=KISS>>17 ;
97
#endif
98
#elif defined TINT64
99
#define THEONE 4611686018427387904
100
#if defined TCONG
101
    unsigned long x=(unsigned long)(CONG>>1) ;
102
    unsigned long y=(unsigned long)(CONG>>1) ;
103
#elif defined TSHR3
104
    unsigned long x=(unsigned long)(SHR3>>1) ;
105
    unsigned long y=(unsigned long)(SHR3>>1) ;
106
#elif defined TMWC
107
    unsigned long x=(unsigned long)(MWC>>1) ;
108
    unsigned long y=(unsigned long)(MWC>>1) ;
109
#elif defined TKISS
110
    unsigned long x=(unsigned long)(KISS>>1) ;
111
    unsigned long y=(unsigned long)(KISS>>1) ;
112
#endif
113
#elif defined TFP32
114
#define THEONE 1.0f
115
#if defined TCONG
116
    float x=CONGfp ;
117
    float y=CONGfp ;
118
#elif defined TSHR3
119
    float x=SHR3fp ;
120
    float y=SHR3fp ;
121
#elif defined TMWC
122
    float x=MWCfp ;
123
    float y=MWCfp ;
124
#elif defined TKISS
125
    float x=KISSfp ;
126
    float y=KISSfp ;
127
#endif
128
#elif defined TFP64
129
#define THEONE 1.0f
130
#if defined TCONG
131
    double x=(double)CONGfp ;
132
    double y=(double)CONGfp ;
133
#elif defined TSHR3
134
    double x=(double)SHR3fp ;
135
    double y=(double)SHR3fp ;
136
#elif defined TMWC
137
    double x=(double)MWCfp ;
138
    double y=(double)MWCfp ;
139
#elif defined TKISS
140
    double x=(double)KISSfp ;
141
    double y=(double)KISSfp ;
142
#endif
143
#endif
144

    
145
    // Matching test
146
    unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
147
    total+=inside;
148
  }
149
  
150
  return(total);
151
  
152
}
153

    
154
int main(int argc, char *argv[]) {
155
  
156
  unsigned int seed_z=362436069,seed_w=52128862,process=PROCESS;
157
  // Number of NP or OpenMP processes <1024
158
  LENGTH iterations=ITERATIONS,insideMPI[8192],insideOpenMP[1024],
159
    part_inside=0,part_iterations,insides=0;
160
  int numtasks,rank,rc,tag=1,i;
161
  
162
  // Hostname supposed to be <128 characters
163
  char hostname[128];
164
  
165
  gethostname(hostname, sizeof hostname);
166
  
167
  struct timeval start,end;
168
  long int useconds;
169
  
170
  MPI_Status Stat;
171
  
172
  rc = MPI_Init(&argc,&argv);
173
  if (rc != MPI_SUCCESS) {
174
    printf ("Error starting MPI program. Terminating.\n");
175
    MPI_Abort(MPI_COMM_WORLD, rc);
176
  }
177

    
178
  MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
179
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
180
  
181
  const int nitems=2;
182
  int blocklengths[2] = {1,1};
183
  
184
#ifdef LONG
185
  MPI_Datatype types_node[2] = {MPI_LONG, MPI_INT};
186
  MPI_Datatype types_result[2] = {MPI_LONG, MPI_LONG};
187
#else
188
  MPI_Datatype types_node[2] = {MPI_INT, MPI_INT};
189
  MPI_Datatype types_result[2] = {MPI_INT, MPI_LONG};
190
#endif
191
  
192
  MPI_Datatype mpi_node_type,mpi_result_type;
193
  MPI_Aint     offsets[2],offsetsr[2];
194
  
195
  offsets[0] = offsetof(node, iterations);
196
  offsets[1] = offsetof(node, process);
197
  
198
  MPI_Type_create_struct(nitems, blocklengths, offsets, types_node, &mpi_node_type);
199
  MPI_Type_commit(&mpi_node_type);
200
  
201
  offsetsr[0] = offsetof(result, inside);
202
  offsetsr[1] = offsetof(result, useconds);
203
  
204
  MPI_Type_create_struct(nitems, blocklengths, offsetsr, types_result, &mpi_result_type);
205
  MPI_Type_commit(&mpi_result_type);
206
  
207
  if (rank==0) {
208

    
209
    struct timeval tv1,tv2;
210
    
211
    if (argc > 1) {
212
      iterations=(LENGTH)atoll(argv[1]);
213
      process=atoi(argv[2]);
214
    }
215
    else {
216
      printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
217
      printf("\t\t#1 : number of iterations (default 1 billion)\n");
218
      printf("\t\t#2 : number of OpenMP processes (default 1)\n\n");
219
    }
220
    
221
    printf ("\n\tInformation about architecture:\n\n");
222
    
223
    printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
224
    printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
225
    printf ("\tSizeof long long = %lld bytes.\n", (long long)sizeof(long long));
226
    
227
    printf ("\tMax int = %u\n", INT_MAX);
228
    printf ("\tMax long = %ld\n", LONG_MAX);
229
    printf ("\tMax long long = %lld\n\n", LLONG_MAX);
230
    
231
    part_iterations=(((iterations%numtasks)%process) == 0) ? iterations/numtasks/process:iterations/numtasks/process+1 ;
232
    
233
    node send;
234
    send.iterations=part_iterations;
235
    send.process=process;
236

    
237
    gettimeofday(&tv1, NULL);
238
        
239
    // Split part of code
240
    for (i=1;i<numtasks;i++) {      
241
      rc = MPI_Send(&send, 1, mpi_node_type, i, tag, MPI_COMM_WORLD);
242
    }
243
    
244
    gettimeofday(&start,(struct timezone *)0);
245
    
246
#pragma omp parallel for
247
    for (int i=0 ; i<process; i++) {
248
      insideOpenMP[i]=MainLoopGlobal(part_iterations,
249
                                     rotr(seed_w,i),
250
                                     rotl(seed_z,i));
251
      /*
252
        printf("\t(%s,%i) found %lld for process %i\n",hostname,0,
253
        (long long)insideOpenMP[i],i); */
254
    }
255
    /*
256
      printf("\n");
257
    */
258
    
259
    insides=0;
260
    for (int i=0 ; i<process; i++) {
261
      insides+=insideOpenMP[i];
262
    }
263
    
264
    gettimeofday(&end,(struct timezone *)0);
265
    useconds=(end.tv_sec-start.tv_sec)*1000000+end.tv_usec-start.tv_usec;
266
    
267
    printf("\tOn %s with rank #%i find %lld inside in %lu useconds.\n",
268
           hostname,rank,(long long)insides,useconds);
269
    
270
    // Join part of code
271
    for (i=1;i<numtasks;i++) {
272
      
273
      result recv;
274
      
275
      rc = MPI_Recv(&recv, 1, mpi_result_type, i, tag, MPI_COMM_WORLD,&Stat);
276
      
277
      insideMPI[i]=recv.inside;
278
      useconds=recv.useconds;
279
      
280
      printf("\tReceive from rank #%i, find %lld inside in %lu useconds\n",i,(long long)insideMPI[i],useconds);
281
    
282
      insides+=insideMPI[i];
283
    }
284
    
285
    gettimeofday(&tv2, NULL);
286

    
287
    double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
288
                              (tv2.tv_usec-tv1.tv_usec))/1000000;
289
    
290
    double itops=(double)(part_iterations*numtasks)/elapsed;
291
    
292
    printf("\nParallelRate %i\nElapsed Time %.2f\nItops %.0f\nLogItops %.2f\n",numtasks*process,elapsed,itops,log10(itops));
293
    
294
    LENGTH total=((iterations%numtasks)==0)?iterations:(iterations/numtasks+1)*numtasks;
295

    
296
    printf("Inside/Total %ld %ld\nPi estimation %f\n\n",(long int)insides,(long int)total,(4.*(float)insides/total));
297
    
298
  }
299
  else
300
    {
301
      // Receive information from master
302
      
303
      node recv;
304
      
305
      rc = MPI_Recv(&recv, 1, mpi_node_type, 0, tag, MPI_COMM_WORLD,&Stat);
306
      /*   
307
      printf("\t(%s,%i) receive from master %lld with %i process\n",
308
      hostname,rank,(long long)recv.iterations,recv.process);
309
      */
310
      
311
      gettimeofday(&start,(struct timezone *)0);
312
      
313
#pragma omp parallel for
314
      for (int i=0 ; i<recv.process; i++) {
315
        insideOpenMP[i]=MainLoopGlobal(recv.iterations,rotr(seed_w,rank+i),rotl(seed_z,rank-i));
316
        /*
317
          printf("\t(%s,%i) found %lld for process %i\n",hostname,rank,
318
          (long long)insideOpenMP[i],i);
319
        */
320
      }
321
      
322
      /* printf("\n"); */
323
      
324
      for (int i=0 ; i<recv.process; i++) {
325
        part_inside+=insideOpenMP[i];
326
      }
327
      
328
      gettimeofday(&end,(struct timezone *)0);
329
      useconds=(end.tv_sec-start.tv_sec)*1000000+end.tv_usec-start.tv_usec;
330
      /*
331
        printf("\tOn %s rank %i find %lld inside in %lu useconds.\n",
332
        hostname,rank,(long long)part_inside,useconds);
333
      */
334
      result send;
335
      send.inside=part_inside;
336
      send.useconds=useconds;
337
      
338
      rc = MPI_Send(&send, 1, mpi_result_type, 0, tag, MPI_COMM_WORLD);
339
      
340
    }
341
  
342
  MPI_Type_free(&mpi_node_type);
343
  MPI_Type_free(&mpi_result_type);
344
  
345
  MPI_Finalize();
346
  
347
}