Statistiques
| Révision :

root / Pi / C / Hybrid / Pi_Hybrid.c @ 247

Historique | Voir | Annoter | Télécharger (9,05 ko)

1 64 equemene
//
2 247 equemene
// Estimation of Pi using Monte Carlo exploration process using OpenMP & MPI
3 247 equemene
// Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
4 247 equemene
// gcc -std=c99 -O3 -o Pi_Hybrid Pi_Hybrid.c -lm
5 64 equemene
6 64 equemene
// Needed for gethostname
7 247 equemene
#define _DEFAULT_SOURCE
8 64 equemene
#include <sys/unistd.h>
9 64 equemene
10 64 equemene
#include <math.h>
11 64 equemene
#include <stdio.h>
12 64 equemene
#include <stdlib.h>
13 64 equemene
#include <limits.h>
14 64 equemene
#include <mpi.h>
15 64 equemene
#include <stddef.h>
16 64 equemene
17 64 equemene
#ifdef TIME
18 64 equemene
#include <sys/time.h>
19 64 equemene
#endif
20 64 equemene
21 64 equemene
// Marsaglia RNG very simple implementation
22 64 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
23 64 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
24 64 equemene
#define MWC   (znew+wnew)
25 64 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
26 64 equemene
#define CONG  (jcong=69069*jcong+1234567)
27 64 equemene
#define KISS  ((MWC^CONG)+SHR3)
28 64 equemene
29 79 equemene
#define ITERATIONS 1000000000
30 79 equemene
31 64 equemene
#define MWCfp MWC * 2.328306435454494e-10f
32 64 equemene
#define KISSfp KISS * 2.328306435454494e-10f
33 79 equemene
#define SHR3fp SHR3 * 2.328306435454494e-10f
34 79 equemene
#define CONGfp CONG * 2.328306435454494e-10f
35 64 equemene
36 64 equemene
#define PROCESS 1
37 64 equemene
38 64 equemene
#ifdef LONG
39 64 equemene
#define LENGTH long long
40 64 equemene
#else
41 64 equemene
#define LENGTH int
42 64 equemene
#endif
43 64 equemene
44 64 equemene
typedef struct compute_node {
45 183 equemene
  LENGTH iterations;
46 183 equemene
  int process;
47 64 equemene
} node;
48 64 equemene
49 183 equemene
typedef struct compute_result {
50 183 equemene
  LENGTH inside;
51 183 equemene
  long int useconds;
52 125 equemene
} result;
53 125 equemene
54 64 equemene
unsigned int rotl(unsigned int value, int shift) {
55 183 equemene
  return (value << shift) | (value >> (sizeof(value) * CHAR_BIT - shift));
56 64 equemene
}
57 183 equemene
58 64 equemene
unsigned int rotr(unsigned int value, int shift) {
59 183 equemene
  return (value >> shift) | (value << (sizeof(value) * CHAR_BIT - shift));
60 64 equemene
}
61 64 equemene
62 64 equemene
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
63 64 equemene
{
64 183 equemene
65 79 equemene
#if defined TCONG
66 183 equemene
  unsigned int jcong=seed_z;
67 79 equemene
#elif defined TSHR3
68 183 equemene
  unsigned int jsr=seed_w;
69 79 equemene
#elif defined TMWC
70 183 equemene
  unsigned int z=seed_z;
71 183 equemene
  unsigned int w=seed_w;
72 79 equemene
#elif defined TKISS
73 183 equemene
  unsigned int jcong=seed_z;
74 183 equemene
  unsigned int jsr=seed_w;
75 183 equemene
  unsigned int z=seed_z;
76 183 equemene
  unsigned int w=seed_w;
77 79 equemene
#endif
78 79 equemene
79 183 equemene
  LENGTH total=0;
80 183 equemene
81 183 equemene
  for (LENGTH i=0;i<iterations;i++) {
82 183 equemene
83 79 equemene
#if defined TINT32
84 183 equemene
#define THEONE 1073741824
85 183 equemene
#if defined TCONG
86 183 equemene
    unsigned int x=CONG>>17 ;
87 183 equemene
    unsigned int y=CONG>>17 ;
88 183 equemene
#elif defined TSHR3
89 183 equemene
    unsigned int x=SHR3>>17 ;
90 183 equemene
    unsigned int y=SHR3>>17 ;
91 183 equemene
#elif defined TMWC
92 183 equemene
    unsigned int x=MWC>>17 ;
93 183 equemene
    unsigned int y=MWC>>17 ;
94 183 equemene
#elif defined TKISS
95 183 equemene
    unsigned int x=KISS>>17 ;
96 183 equemene
    unsigned int y=KISS>>17 ;
97 183 equemene
#endif
98 79 equemene
#elif defined TINT64
99 183 equemene
#define THEONE 4611686018427387904
100 183 equemene
#if defined TCONG
101 183 equemene
    unsigned long x=(unsigned long)(CONG>>1) ;
102 183 equemene
    unsigned long y=(unsigned long)(CONG>>1) ;
103 183 equemene
#elif defined TSHR3
104 183 equemene
    unsigned long x=(unsigned long)(SHR3>>1) ;
105 183 equemene
    unsigned long y=(unsigned long)(SHR3>>1) ;
106 183 equemene
#elif defined TMWC
107 183 equemene
    unsigned long x=(unsigned long)(MWC>>1) ;
108 183 equemene
    unsigned long y=(unsigned long)(MWC>>1) ;
109 183 equemene
#elif defined TKISS
110 183 equemene
    unsigned long x=(unsigned long)(KISS>>1) ;
111 183 equemene
    unsigned long y=(unsigned long)(KISS>>1) ;
112 183 equemene
#endif
113 79 equemene
#elif defined TFP32
114 183 equemene
#define THEONE 1.0f
115 183 equemene
#if defined TCONG
116 183 equemene
    float x=CONGfp ;
117 183 equemene
    float y=CONGfp ;
118 183 equemene
#elif defined TSHR3
119 183 equemene
    float x=SHR3fp ;
120 183 equemene
    float y=SHR3fp ;
121 183 equemene
#elif defined TMWC
122 183 equemene
    float x=MWCfp ;
123 183 equemene
    float y=MWCfp ;
124 183 equemene
#elif defined TKISS
125 183 equemene
    float x=KISSfp ;
126 183 equemene
    float y=KISSfp ;
127 183 equemene
#endif
128 79 equemene
#elif defined TFP64
129 183 equemene
#define THEONE 1.0f
130 183 equemene
#if defined TCONG
131 183 equemene
    double x=(double)CONGfp ;
132 183 equemene
    double y=(double)CONGfp ;
133 183 equemene
#elif defined TSHR3
134 183 equemene
    double x=(double)SHR3fp ;
135 183 equemene
    double y=(double)SHR3fp ;
136 183 equemene
#elif defined TMWC
137 183 equemene
    double x=(double)MWCfp ;
138 183 equemene
    double y=(double)MWCfp ;
139 183 equemene
#elif defined TKISS
140 183 equemene
    double x=(double)KISSfp ;
141 183 equemene
    double y=(double)KISSfp ;
142 79 equemene
#endif
143 183 equemene
#endif
144 64 equemene
145 183 equemene
    // Matching test
146 183 equemene
    unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
147 183 equemene
    total+=inside;
148 183 equemene
  }
149 183 equemene
150 183 equemene
  return(total);
151 183 equemene
152 64 equemene
}
153 64 equemene
154 64 equemene
int main(int argc, char *argv[]) {
155 183 equemene
156 64 equemene
  unsigned int seed_z=362436069,seed_w=52128862,process=PROCESS;
157 64 equemene
  // Number of NP or OpenMP processes <1024
158 125 equemene
  LENGTH iterations=ITERATIONS,insideMPI[8192],insideOpenMP[1024],
159 183 equemene
    part_inside=0,part_iterations,insides=0;
160 64 equemene
  int numtasks,rank,rc,tag=1,i;
161 64 equemene
162 64 equemene
  // Hostname supposed to be <128 characters
163 64 equemene
  char hostname[128];
164 183 equemene
165 64 equemene
  gethostname(hostname, sizeof hostname);
166 183 equemene
167 64 equemene
  struct timeval start,end;
168 64 equemene
  long int useconds;
169 183 equemene
170 64 equemene
  MPI_Status Stat;
171 76 equemene
172 64 equemene
  rc = MPI_Init(&argc,&argv);
173 64 equemene
  if (rc != MPI_SUCCESS) {
174 64 equemene
    printf ("Error starting MPI program. Terminating.\n");
175 64 equemene
    MPI_Abort(MPI_COMM_WORLD, rc);
176 64 equemene
  }
177 64 equemene
178 64 equemene
  MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
179 125 equemene
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
180 183 equemene
181 125 equemene
  const int nitems=2;
182 125 equemene
  int blocklengths[2] = {1,1};
183 183 equemene
184 64 equemene
#ifdef LONG
185 125 equemene
  MPI_Datatype types_node[2] = {MPI_LONG, MPI_INT};
186 125 equemene
  MPI_Datatype types_result[2] = {MPI_LONG, MPI_LONG};
187 64 equemene
#else
188 125 equemene
  MPI_Datatype types_node[2] = {MPI_INT, MPI_INT};
189 125 equemene
  MPI_Datatype types_result[2] = {MPI_INT, MPI_LONG};
190 64 equemene
#endif
191 183 equemene
192 125 equemene
  MPI_Datatype mpi_node_type,mpi_result_type;
193 125 equemene
  MPI_Aint     offsets[2],offsetsr[2];
194 183 equemene
195 125 equemene
  offsets[0] = offsetof(node, iterations);
196 125 equemene
  offsets[1] = offsetof(node, process);
197 183 equemene
198 183 equemene
  MPI_Type_create_struct(nitems, blocklengths, offsets, types_node, &mpi_node_type);
199 125 equemene
  MPI_Type_commit(&mpi_node_type);
200 183 equemene
201 125 equemene
  offsetsr[0] = offsetof(result, inside);
202 125 equemene
  offsetsr[1] = offsetof(result, useconds);
203 183 equemene
204 183 equemene
  MPI_Type_create_struct(nitems, blocklengths, offsetsr, types_result, &mpi_result_type);
205 125 equemene
  MPI_Type_commit(&mpi_result_type);
206 183 equemene
207 64 equemene
  if (rank==0) {
208 247 equemene
209 247 equemene
    struct timeval tv1,tv2;
210 64 equemene
211 64 equemene
    if (argc > 1) {
212 64 equemene
      iterations=(LENGTH)atoll(argv[1]);
213 64 equemene
      process=atoi(argv[2]);
214 64 equemene
    }
215 64 equemene
    else {
216 64 equemene
      printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
217 183 equemene
      printf("\t\t#1 : number of iterations (default 1 billion)\n");
218 183 equemene
      printf("\t\t#2 : number of OpenMP processes (default 1)\n\n");
219 64 equemene
    }
220 64 equemene
221 64 equemene
    printf ("\n\tInformation about architecture:\n\n");
222 64 equemene
223 183 equemene
    printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
224 183 equemene
    printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
225 183 equemene
    printf ("\tSizeof long long = %lld bytes.\n", (long long)sizeof(long long));
226 64 equemene
227 183 equemene
    printf ("\tMax int = %u\n", INT_MAX);
228 183 equemene
    printf ("\tMax long = %ld\n", LONG_MAX);
229 183 equemene
    printf ("\tMax long long = %lld\n\n", LLONG_MAX);
230 64 equemene
231 183 equemene
    part_iterations=(((iterations%numtasks)%process) == 0) ? iterations/numtasks/process:iterations/numtasks/process+1 ;
232 183 equemene
233 64 equemene
    node send;
234 64 equemene
    send.iterations=part_iterations;
235 64 equemene
    send.process=process;
236 64 equemene
237 247 equemene
    gettimeofday(&tv1, NULL);
238 247 equemene
239 64 equemene
    // Split part of code
240 76 equemene
    for (i=1;i<numtasks;i++) {
241 124 equemene
      rc = MPI_Send(&send, 1, mpi_node_type, i, tag, MPI_COMM_WORLD);
242 64 equemene
    }
243 64 equemene
244 64 equemene
    gettimeofday(&start,(struct timezone *)0);
245 64 equemene
246 64 equemene
#pragma omp parallel for
247 183 equemene
    for (int i=0 ; i<process; i++) {
248 183 equemene
      insideOpenMP[i]=MainLoopGlobal(part_iterations,
249 183 equemene
                                     rotr(seed_w,i),
250 183 equemene
                                     rotl(seed_z,i));
251 183 equemene
      /*
252 183 equemene
        printf("\t(%s,%i) found %lld for process %i\n",hostname,0,
253 183 equemene
        (long long)insideOpenMP[i],i); */
254 183 equemene
    }
255 183 equemene
    /*
256 183 equemene
      printf("\n");
257 183 equemene
    */
258 125 equemene
259 183 equemene
    insides=0;
260 183 equemene
    for (int i=0 ; i<process; i++) {
261 183 equemene
      insides+=insideOpenMP[i];
262 183 equemene
    }
263 125 equemene
264 183 equemene
    gettimeofday(&end,(struct timezone *)0);
265 183 equemene
    useconds=(end.tv_sec-start.tv_sec)*1000000+end.tv_usec-start.tv_usec;
266 183 equemene
267 183 equemene
    printf("\tOn %s with rank #%i find %lld inside in %lu useconds.\n",
268 183 equemene
           hostname,rank,(long long)insides,useconds);
269 183 equemene
270 183 equemene
    // Join part of code
271 183 equemene
    for (i=1;i<numtasks;i++) {
272 64 equemene
273 183 equemene
      result recv;
274 64 equemene
275 183 equemene
      rc = MPI_Recv(&recv, 1, mpi_result_type, i, tag, MPI_COMM_WORLD,&Stat);
276 183 equemene
277 183 equemene
      insideMPI[i]=recv.inside;
278 183 equemene
      useconds=recv.useconds;
279 183 equemene
280 183 equemene
      printf("\tReceive from rank #%i, find %lld inside in %lu useconds\n",i,(long long)insideMPI[i],useconds);
281 183 equemene
282 183 equemene
      insides+=insideMPI[i];
283 183 equemene
    }
284 183 equemene
285 247 equemene
    gettimeofday(&tv2, NULL);
286 247 equemene
287 247 equemene
    double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
288 247 equemene
                              (tv2.tv_usec-tv1.tv_usec))/1000000;
289 183 equemene
290 247 equemene
    double itops=(double)(part_iterations*numtasks)/elapsed;
291 183 equemene
292 247 equemene
    printf("\nParallelRate %i\nElapsed Time %.2f\nItops %.0f\nLogItops %.2f\n",numtasks*process,elapsed,itops,log10(itops));
293 247 equemene
294 247 equemene
    LENGTH total=((iterations%numtasks)==0)?iterations:(iterations/numtasks+1)*numtasks;
295 247 equemene
296 247 equemene
    printf("Inside/Total %ld %ld\nPi estimation %f\n\n",(long int)insides,(long int)total,(4.*(float)insides/total));
297 247 equemene
298 64 equemene
  }
299 64 equemene
  else
300 64 equemene
    {
301 64 equemene
      // Receive information from master
302 183 equemene
303 64 equemene
      node recv;
304 183 equemene
305 124 equemene
      rc = MPI_Recv(&recv, 1, mpi_node_type, 0, tag, MPI_COMM_WORLD,&Stat);
306 183 equemene
      /*
307 64 equemene
      printf("\t(%s,%i) receive from master %lld with %i process\n",
308 183 equemene
      hostname,rank,(long long)recv.iterations,recv.process);
309 125 equemene
      */
310 183 equemene
311 64 equemene
      gettimeofday(&start,(struct timezone *)0);
312 183 equemene
313 64 equemene
#pragma omp parallel for
314 125 equemene
      for (int i=0 ; i<recv.process; i++) {
315 183 equemene
        insideOpenMP[i]=MainLoopGlobal(recv.iterations,rotr(seed_w,rank+i),rotl(seed_z,rank-i));
316 125 equemene
        /*
317 183 equemene
          printf("\t(%s,%i) found %lld for process %i\n",hostname,rank,
318 183 equemene
          (long long)insideOpenMP[i],i);
319 125 equemene
        */
320 125 equemene
      }
321 64 equemene
322 125 equemene
      /* printf("\n"); */
323 183 equemene
324 125 equemene
      for (int i=0 ; i<recv.process; i++) {
325 125 equemene
        part_inside+=insideOpenMP[i];
326 125 equemene
      }
327 125 equemene
328 64 equemene
      gettimeofday(&end,(struct timezone *)0);
329 64 equemene
      useconds=(end.tv_sec-start.tv_sec)*1000000+end.tv_usec-start.tv_usec;
330 125 equemene
      /*
331 183 equemene
        printf("\tOn %s rank %i find %lld inside in %lu useconds.\n",
332 183 equemene
        hostname,rank,(long long)part_inside,useconds);
333 125 equemene
      */
334 125 equemene
      result send;
335 125 equemene
      send.inside=part_inside;
336 125 equemene
      send.useconds=useconds;
337 64 equemene
338 125 equemene
      rc = MPI_Send(&send, 1, mpi_result_type, 0, tag, MPI_COMM_WORLD);
339 183 equemene
340 64 equemene
    }
341 64 equemene
342 64 equemene
  MPI_Type_free(&mpi_node_type);
343 125 equemene
  MPI_Type_free(&mpi_result_type);
344 183 equemene
345 64 equemene
  MPI_Finalize();
346 64 equemene
347 64 equemene
}