Statistiques
| Révision :

root / Pi / C / Hybrid / Pi_Hybrid.c @ 79

Historique | Voir | Annoter | Télécharger (8,8 ko)

1 64 equemene
//
2 64 equemene
// Estimation of Pi using Monte Carlo exploration process
3 64 equemene
// gcc -std=c99 -O3 -o Pi Pi.c -lm
4 64 equemene
// Emmanuel Quemener <emmanuel.quemener@ens-lyon.fr>
5 64 equemene
// Cecill v2
6 64 equemene
7 64 equemene
// Needed for gethostname
8 64 equemene
#define _BSD_SOURCE
9 64 equemene
#include <sys/unistd.h>
10 64 equemene
11 64 equemene
#include <math.h>
12 64 equemene
#include <stdio.h>
13 64 equemene
#include <stdlib.h>
14 64 equemene
#include <limits.h>
15 64 equemene
#include <mpi.h>
16 64 equemene
#include <stddef.h>
17 64 equemene
18 64 equemene
#ifdef TIME
19 64 equemene
#include <sys/time.h>
20 64 equemene
#endif
21 64 equemene
22 64 equemene
// Marsaglia RNG very simple implementation
23 64 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
24 64 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
25 64 equemene
#define MWC   (znew+wnew)
26 64 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
27 64 equemene
#define CONG  (jcong=69069*jcong+1234567)
28 64 equemene
#define KISS  ((MWC^CONG)+SHR3)
29 64 equemene
30 79 equemene
#define ITERATIONS 1000000000
31 79 equemene
32 64 equemene
#define MWCfp MWC * 2.328306435454494e-10f
33 64 equemene
#define KISSfp KISS * 2.328306435454494e-10f
34 79 equemene
#define SHR3fp SHR3 * 2.328306435454494e-10f
35 79 equemene
#define CONGfp CONG * 2.328306435454494e-10f
36 64 equemene
37 64 equemene
#define PROCESS 1
38 64 equemene
39 64 equemene
#ifdef LONG
40 64 equemene
#define LENGTH long long
41 64 equemene
#else
42 64 equemene
#define LENGTH int
43 64 equemene
#endif
44 64 equemene
45 64 equemene
typedef struct compute_node {
46 64 equemene
        LENGTH iterations;
47 64 equemene
        int process;
48 64 equemene
} node;
49 64 equemene
50 64 equemene
unsigned int rotl(unsigned int value, int shift) {
51 64 equemene
    return (value << shift) | (value >> (sizeof(value) * CHAR_BIT - shift));
52 64 equemene
}
53 64 equemene
54 64 equemene
unsigned int rotr(unsigned int value, int shift) {
55 64 equemene
    return (value >> shift) | (value << (sizeof(value) * CHAR_BIT - shift));
56 64 equemene
}
57 64 equemene
58 64 equemene
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
59 64 equemene
{
60 79 equemene
61 79 equemene
#if defined TCONG
62 79 equemene
   unsigned int jcong=seed_z;
63 79 equemene
#elif defined TSHR3
64 79 equemene
   unsigned int jsr=seed_w;
65 79 equemene
#elif defined TMWC
66 64 equemene
   unsigned int z=seed_z;
67 64 equemene
   unsigned int w=seed_w;
68 79 equemene
#elif defined TKISS
69 79 equemene
   unsigned int jcong=seed_z;
70 79 equemene
   unsigned int jsr=seed_w;
71 79 equemene
   unsigned int z=seed_z;
72 79 equemene
   unsigned int w=seed_w;
73 79 equemene
#endif
74 79 equemene
75 64 equemene
   LENGTH total=0;
76 64 equemene
77 64 equemene
   for (LENGTH i=0;i<iterations;i++) {
78 64 equemene
79 79 equemene
#if defined TINT32
80 79 equemene
    #define THEONE 1073741824
81 79 equemene
    #if defined TCONG
82 79 equemene
        unsigned int x=CONG>>17 ;
83 79 equemene
        unsigned int y=CONG>>17 ;
84 79 equemene
    #elif defined TSHR3
85 79 equemene
        unsigned int x=SHR3>>17 ;
86 79 equemene
        unsigned int y=SHR3>>17 ;
87 79 equemene
    #elif defined TMWC
88 79 equemene
        unsigned int x=MWC>>17 ;
89 79 equemene
        unsigned int y=MWC>>17 ;
90 79 equemene
    #elif defined TKISS
91 79 equemene
        unsigned int x=KISS>>17 ;
92 79 equemene
        unsigned int y=KISS>>17 ;
93 79 equemene
    #endif
94 79 equemene
#elif defined TINT64
95 79 equemene
    #define THEONE 4611686018427387904
96 79 equemene
    #if defined TCONG
97 79 equemene
        unsigned long x=(unsigned long)(CONG>>1) ;
98 79 equemene
        unsigned long y=(unsigned long)(CONG>>1) ;
99 79 equemene
    #elif defined TSHR3
100 79 equemene
        unsigned long x=(unsigned long)(SHR3>>1) ;
101 79 equemene
        unsigned long y=(unsigned long)(SHR3>>1) ;
102 79 equemene
    #elif defined TMWC
103 79 equemene
        unsigned long x=(unsigned long)(MWC>>1) ;
104 79 equemene
        unsigned long y=(unsigned long)(MWC>>1) ;
105 79 equemene
    #elif defined TKISS
106 79 equemene
        unsigned long x=(unsigned long)(KISS>>1) ;
107 79 equemene
        unsigned long y=(unsigned long)(KISS>>1) ;
108 79 equemene
    #endif
109 79 equemene
#elif defined TFP32
110 79 equemene
    #define THEONE 1.0f
111 79 equemene
    #if defined TCONG
112 79 equemene
        float x=CONGfp ;
113 79 equemene
        float y=CONGfp ;
114 79 equemene
    #elif defined TSHR3
115 79 equemene
        float x=SHR3fp ;
116 79 equemene
        float y=SHR3fp ;
117 79 equemene
    #elif defined TMWC
118 79 equemene
        float x=MWCfp ;
119 79 equemene
        float y=MWCfp ;
120 79 equemene
    #elif defined TKISS
121 79 equemene
      float x=KISSfp ;
122 79 equemene
      float y=KISSfp ;
123 79 equemene
    #endif
124 79 equemene
#elif defined TFP64
125 79 equemene
    #define THEONE 1.0f
126 79 equemene
    #if defined TCONG
127 79 equemene
        double x=(double)CONGfp ;
128 79 equemene
        double y=(double)CONGfp ;
129 79 equemene
    #elif defined TSHR3
130 79 equemene
        double x=(double)SHR3fp ;
131 79 equemene
        double y=(double)SHR3fp ;
132 79 equemene
    #elif defined TMWC
133 79 equemene
        double x=(double)MWCfp ;
134 79 equemene
        double y=(double)MWCfp ;
135 79 equemene
    #elif defined TKISS
136 79 equemene
        double x=(double)KISSfp ;
137 79 equemene
        double y=(double)KISSfp ;
138 79 equemene
    #endif
139 79 equemene
#endif
140 64 equemene
141 64 equemene
      // Matching test
142 79 equemene
      unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
143 64 equemene
      total+=inside;
144 64 equemene
   }
145 64 equemene
146 64 equemene
   return(total);
147 64 equemene
148 64 equemene
}
149 64 equemene
150 64 equemene
int main(int argc, char *argv[]) {
151 64 equemene
152 64 equemene
  unsigned int seed_z=362436069,seed_w=52128862,process=PROCESS;
153 64 equemene
  // Number of NP or OpenMP processes <1024
154 64 equemene
  LENGTH iterations=ITERATIONS,insideMPI[1024],insideOpenMP[1024],
155 64 equemene
    part_inside,part_iterations,insides;
156 64 equemene
  int numtasks,rank,rc,tag=1,i;
157 64 equemene
  float pi;
158 64 equemene
159 64 equemene
  // Hostname supposed to be <128 characters
160 64 equemene
  char hostname[128];
161 64 equemene
162 64 equemene
  gethostname(hostname, sizeof hostname);
163 64 equemene
164 64 equemene
#ifdef TIME
165 64 equemene
  struct timeval start,end;
166 64 equemene
  long int useconds;
167 64 equemene
#endif
168 64 equemene
169 64 equemene
  MPI_Status Stat;
170 76 equemene
  MPI_Request RequestSend,RequestRecv,RequestSend2,RequestRecv2;
171 76 equemene
172 64 equemene
  rc = MPI_Init(&argc,&argv);
173 64 equemene
  if (rc != MPI_SUCCESS) {
174 64 equemene
    printf ("Error starting MPI program. Terminating.\n");
175 64 equemene
    MPI_Abort(MPI_COMM_WORLD, rc);
176 64 equemene
  }
177 64 equemene
178 64 equemene
  MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
179 64 equemene
180 64 equemene
    const int nitems=2;
181 64 equemene
    int blocklengths[2] = {1,1};
182 64 equemene
183 64 equemene
#ifdef LONG
184 64 equemene
    MPI_Datatype types[2] = {MPI_LONG, MPI_INT};
185 64 equemene
#else
186 64 equemene
    MPI_Datatype types[2] = {MPI_INT, MPI_INT};
187 64 equemene
#endif
188 64 equemene
189 64 equemene
    MPI_Datatype mpi_node_type;
190 64 equemene
    MPI_Aint     offsets[2];
191 64 equemene
192 64 equemene
    offsets[0] = offsetof(node, iterations);
193 64 equemene
    offsets[1] = offsetof(node, process);
194 64 equemene
195 64 equemene
    MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_node_type);
196 64 equemene
    MPI_Type_commit(&mpi_node_type);
197 64 equemene
198 64 equemene
    MPI_Comm_rank(MPI_COMM_WORLD,&rank);
199 64 equemene
200 64 equemene
  if (rank==0) {
201 64 equemene
202 64 equemene
    if (argc > 1) {
203 64 equemene
      iterations=(LENGTH)atoll(argv[1]);
204 64 equemene
      process=atoi(argv[2]);
205 64 equemene
    }
206 64 equemene
    else {
207 64 equemene
      printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
208 64 equemene
      printf("\t\t#1 : number of iterations (default 1 billion)\n\n");
209 64 equemene
      printf("\t\t#1 : number of OpenMP processes (default 1)\n\n");
210 64 equemene
    }
211 64 equemene
212 64 equemene
    printf ("\n\tInformation about architecture:\n\n");
213 64 equemene
214 64 equemene
    printf ("Sizeof int = %lld bytes.\n", (long long)sizeof(int));
215 64 equemene
    printf ("Sizeof long = %lld bytes.\n", (long long)sizeof(long));
216 64 equemene
    printf ("Sizeof long long = %lld bytes.\n", (long long)sizeof(long long));
217 64 equemene
218 64 equemene
    printf ("Max int = %u\n", INT_MAX);
219 64 equemene
    printf ("Max long = %ld\n", LONG_MAX);
220 64 equemene
    printf ("Max long long = %lld\n\n", LLONG_MAX);
221 64 equemene
222 64 equemene
    part_iterations=iterations/(numtasks*process)+1;
223 64 equemene
224 64 equemene
    node send;
225 64 equemene
    send.iterations=part_iterations;
226 64 equemene
    send.process=process;
227 64 equemene
228 64 equemene
    // Split part of code
229 76 equemene
    for (i=1;i<numtasks;i++) {
230 76 equemene
      rc = MPI_Isend(&send, 1, mpi_node_type, i, tag, MPI_COMM_WORLD, &RequestSend);
231 64 equemene
    }
232 76 equemene
    MPI_Wait(&RequestSend,&Stat);
233 64 equemene
234 64 equemene
#ifdef TIME
235 64 equemene
    gettimeofday(&start,(struct timezone *)0);
236 64 equemene
#endif
237 64 equemene
238 64 equemene
#pragma omp parallel for
239 64 equemene
  for (int i=0 ; i<process; i++) {
240 64 equemene
    insideOpenMP[i]=MainLoopGlobal(part_iterations,
241 64 equemene
                                   rotr(seed_w,process),
242 64 equemene
                                   rotl(seed_z,process));
243 64 equemene
    printf("\t(%s,%i) found %lld for process %i\n",hostname,0,
244 64 equemene
           (long long)insideOpenMP[i],i);
245 64 equemene
  }
246 64 equemene
  printf("\n");
247 64 equemene
248 64 equemene
  insides=0;
249 64 equemene
  for (int i=0 ; i<process; i++) {
250 64 equemene
    insides+=insideOpenMP[i];
251 64 equemene
  }
252 64 equemene
253 64 equemene
#ifdef TIME
254 64 equemene
    gettimeofday(&end,(struct timezone *)0);
255 64 equemene
    useconds=(end.tv_sec-start.tv_sec)*1000000+end.tv_usec-start.tv_usec;
256 64 equemene
257 64 equemene
      printf("\tOn %s with %i find %lld inside in %lu useconds.\n",
258 64 equemene
             hostname,rank,(long long)insides,useconds);
259 64 equemene
#else
260 64 equemene
      printf("\tOn %s with %i find %lld inside\n",hostname,rank,
261 64 equemene
             (long long)insides);
262 64 equemene
263 64 equemene
#endif
264 64 equemene
265 64 equemene
    // Join part of code
266 64 equemene
      for (i=1;i<numtasks;i++) {
267 64 equemene
#ifdef LONG
268 76 equemene
        rc = MPI_Irecv(&insideMPI[i], 1, MPI_LONG_LONG, i, tag,
269 76 equemene
                       MPI_COMM_WORLD, &RequestRecv2);
270 64 equemene
#else
271 76 equemene
        rc = MPI_Irecv(&insideMPI[i], 1, MPI_INT, i, tag,
272 76 equemene
                       MPI_COMM_WORLD, &RequestRecv2);
273 64 equemene
#endif
274 76 equemene
        MPI_Wait(&RequestRecv2,&Stat);
275 64 equemene
        printf("\tReceive %lu inside from rank %i\n",(unsigned long)insideMPI[i],i);
276 64 equemene
        insides+=insideMPI[i];
277 64 equemene
      }
278 64 equemene
279 64 equemene
      pi=4.*(float)insides/(float)((iterations/numtasks)*numtasks);
280 64 equemene
281 64 equemene
      printf("\n\tPi=%.40f\n\twith error %.40f\n\twith %lld iterations\n\n",pi,
282 64 equemene
             fabs(pi-4*atan(1.))/pi,(long long)iterations);
283 64 equemene
284 64 equemene
  }
285 64 equemene
  else
286 64 equemene
    {
287 64 equemene
      // Receive information from master
288 64 equemene
289 64 equemene
      node recv;
290 64 equemene
291 76 equemene
      rc = MPI_Irecv(&recv, 1, mpi_node_type, 0, tag, MPI_COMM_WORLD, &RequestRecv);
292 76 equemene
      MPI_Wait(&RequestRecv,&Stat);
293 64 equemene
294 64 equemene
      printf("\t(%s,%i) receive from master %lld with %i process\n",
295 64 equemene
             hostname,rank,(long long)recv.iterations,recv.process);
296 64 equemene
297 64 equemene
#ifdef TIME
298 64 equemene
      gettimeofday(&start,(struct timezone *)0);
299 64 equemene
#endif
300 64 equemene
301 64 equemene
#pragma omp parallel for
302 64 equemene
  for (int i=0 ; i<recv.process; i++) {
303 64 equemene
      insideOpenMP[i]=MainLoopGlobal(recv.iterations,rotr(seed_w,rank+process),rotl(seed_z,rank+process));
304 64 equemene
      printf("\t(%s,%i) found %lld for process %i\n",hostname,rank,
305 64 equemene
             (long long)insideOpenMP[i],i);
306 64 equemene
  }
307 64 equemene
  printf("\n");
308 64 equemene
309 64 equemene
  part_inside=0;
310 64 equemene
  for (int i=0 ; i<recv.process; i++) {
311 64 equemene
    part_inside+=insideOpenMP[i];
312 64 equemene
  }
313 64 equemene
314 64 equemene
#ifdef TIME
315 64 equemene
      gettimeofday(&end,(struct timezone *)0);
316 64 equemene
      useconds=(end.tv_sec-start.tv_sec)*1000000+end.tv_usec-start.tv_usec;
317 64 equemene
318 64 equemene
      printf("\tOn %s rank %i find %lld inside in %lu useconds.\n",
319 64 equemene
             hostname,rank,(long long)part_inside,useconds);
320 64 equemene
#else
321 64 equemene
      printf("\tOn %s rank %i find %lld inside\n",hostname,rank,
322 64 equemene
             (long long)part_inside);
323 64 equemene
324 64 equemene
#endif
325 76 equemene
326 64 equemene
#ifdef LONG
327 76 equemene
      rc = MPI_Isend(&part_inside, 1, MPI_LONG_LONG, 0, tag, MPI_COMM_WORLD, &RequestSend2);
328 64 equemene
#else
329 76 equemene
      rc = MPI_Isend(&part_inside, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &RequestSend2);
330 64 equemene
#endif
331 64 equemene
332 76 equemene
      MPI_Wait(&RequestSend2, &Stat);
333 64 equemene
    }
334 64 equemene
335 64 equemene
  MPI_Type_free(&mpi_node_type);
336 64 equemene
337 64 equemene
  MPI_Finalize();
338 64 equemene
339 64 equemene
}