Revision 183

Pi/C/Hybrid/Pi_Hybrid.c (revision 183)
43 43
#endif
44 44

  
45 45
typedef struct compute_node {
46
        LENGTH iterations;
47
        int process;
46
  LENGTH iterations;
47
  int process;
48 48
} node;
49 49

  
50
typedef struct compute_node {
51
        LENGTH inside;
52
        long int useconds;
50
typedef struct compute_result {
51
  LENGTH inside;
52
  long int useconds;
53 53
} result;
54 54

  
55 55
unsigned int rotl(unsigned int value, int shift) {
56
    return (value << shift) | (value >> (sizeof(value) * CHAR_BIT - shift));
56
  return (value << shift) | (value >> (sizeof(value) * CHAR_BIT - shift));
57 57
}
58
 
58

  
59 59
unsigned int rotr(unsigned int value, int shift) {
60
    return (value >> shift) | (value << (sizeof(value) * CHAR_BIT - shift));
60
  return (value >> shift) | (value << (sizeof(value) * CHAR_BIT - shift));
61 61
}
62 62

  
63 63
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
64 64
{
65

  
65
  
66 66
#if defined TCONG
67
   unsigned int jcong=seed_z;
67
  unsigned int jcong=seed_z;
68 68
#elif defined TSHR3
69
   unsigned int jsr=seed_w;
69
  unsigned int jsr=seed_w;
70 70
#elif defined TMWC
71
   unsigned int z=seed_z;
72
   unsigned int w=seed_w;
71
  unsigned int z=seed_z;
72
  unsigned int w=seed_w;
73 73
#elif defined TKISS
74
   unsigned int jcong=seed_z;
75
   unsigned int jsr=seed_w;
76
   unsigned int z=seed_z;
77
   unsigned int w=seed_w;
74
  unsigned int jcong=seed_z;
75
  unsigned int jsr=seed_w;
76
  unsigned int z=seed_z;
77
  unsigned int w=seed_w;
78 78
#endif
79 79
   
80
   LENGTH total=0;
81

  
82
   for (LENGTH i=0;i<iterations;i++) {
83

  
80
  LENGTH total=0;
81
  
82
  for (LENGTH i=0;i<iterations;i++) {
83
    
84 84
#if defined TINT32
85
    #define THEONE 1073741824
86
    #if defined TCONG
87
        unsigned int x=CONG>>17 ;
88
        unsigned int y=CONG>>17 ;
89
    #elif defined TSHR3
90
        unsigned int x=SHR3>>17 ;
91
        unsigned int y=SHR3>>17 ;
92
    #elif defined TMWC
93
        unsigned int x=MWC>>17 ;
94
        unsigned int y=MWC>>17 ;
95
    #elif defined TKISS
96
        unsigned int x=KISS>>17 ;
97
        unsigned int y=KISS>>17 ;
98
    #endif
85
#define THEONE 1073741824
86
#if defined TCONG
87
    unsigned int x=CONG>>17 ;
88
    unsigned int y=CONG>>17 ;
89
#elif defined TSHR3
90
    unsigned int x=SHR3>>17 ;
91
    unsigned int y=SHR3>>17 ;
92
#elif defined TMWC
93
    unsigned int x=MWC>>17 ;
94
    unsigned int y=MWC>>17 ;
95
#elif defined TKISS
96
    unsigned int x=KISS>>17 ;
97
    unsigned int y=KISS>>17 ;
98
#endif
99 99
#elif defined TINT64
100
    #define THEONE 4611686018427387904
101
    #if defined TCONG
102
        unsigned long x=(unsigned long)(CONG>>1) ;
103
        unsigned long y=(unsigned long)(CONG>>1) ;
104
    #elif defined TSHR3
105
        unsigned long x=(unsigned long)(SHR3>>1) ;
106
        unsigned long y=(unsigned long)(SHR3>>1) ;
107
    #elif defined TMWC
108
        unsigned long x=(unsigned long)(MWC>>1) ;
109
        unsigned long y=(unsigned long)(MWC>>1) ;
110
    #elif defined TKISS
111
        unsigned long x=(unsigned long)(KISS>>1) ;
112
        unsigned long y=(unsigned long)(KISS>>1) ;
113
    #endif
100
#define THEONE 4611686018427387904
101
#if defined TCONG
102
    unsigned long x=(unsigned long)(CONG>>1) ;
103
    unsigned long y=(unsigned long)(CONG>>1) ;
104
#elif defined TSHR3
105
    unsigned long x=(unsigned long)(SHR3>>1) ;
106
    unsigned long y=(unsigned long)(SHR3>>1) ;
107
#elif defined TMWC
108
    unsigned long x=(unsigned long)(MWC>>1) ;
109
    unsigned long y=(unsigned long)(MWC>>1) ;
110
#elif defined TKISS
111
    unsigned long x=(unsigned long)(KISS>>1) ;
112
    unsigned long y=(unsigned long)(KISS>>1) ;
113
#endif
114 114
#elif defined TFP32
115
    #define THEONE 1.0f
116
    #if defined TCONG
117
        float x=CONGfp ;
118
        float y=CONGfp ;
119
    #elif defined TSHR3
120
        float x=SHR3fp ;
121
        float y=SHR3fp ;
122
    #elif defined TMWC
123
        float x=MWCfp ;
124
        float y=MWCfp ;
125
    #elif defined TKISS
126
      float x=KISSfp ;
127
      float y=KISSfp ;
128
    #endif
115
#define THEONE 1.0f
116
#if defined TCONG
117
    float x=CONGfp ;
118
    float y=CONGfp ;
119
#elif defined TSHR3
120
    float x=SHR3fp ;
121
    float y=SHR3fp ;
122
#elif defined TMWC
123
    float x=MWCfp ;
124
    float y=MWCfp ;
125
#elif defined TKISS
126
    float x=KISSfp ;
127
    float y=KISSfp ;
128
#endif
129 129
#elif defined TFP64
130
    #define THEONE 1.0f
131
    #if defined TCONG
132
        double x=(double)CONGfp ;
133
        double y=(double)CONGfp ;
134
    #elif defined TSHR3
135
        double x=(double)SHR3fp ;
136
        double y=(double)SHR3fp ;
137
    #elif defined TMWC
138
        double x=(double)MWCfp ;
139
        double y=(double)MWCfp ;
140
    #elif defined TKISS
141
        double x=(double)KISSfp ;
142
        double y=(double)KISSfp ;
143
    #endif
130
#define THEONE 1.0f
131
#if defined TCONG
132
    double x=(double)CONGfp ;
133
    double y=(double)CONGfp ;
134
#elif defined TSHR3
135
    double x=(double)SHR3fp ;
136
    double y=(double)SHR3fp ;
137
#elif defined TMWC
138
    double x=(double)MWCfp ;
139
    double y=(double)MWCfp ;
140
#elif defined TKISS
141
    double x=(double)KISSfp ;
142
    double y=(double)KISSfp ;
144 143
#endif
144
#endif
145 145

  
146
      // Matching test
147
      unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
148
      total+=inside;
149
   }
150

  
151
   return(total);
152

  
146
    // Matching test
147
    unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
148
    total+=inside;
149
  }
150
  
151
  return(total);
152
  
153 153
}
154 154

  
155 155
int main(int argc, char *argv[]) {
156

  
156
  
157 157
  unsigned int seed_z=362436069,seed_w=52128862,process=PROCESS;
158 158
  // Number of NP or OpenMP processes <1024
159 159
  LENGTH iterations=ITERATIONS,insideMPI[8192],insideOpenMP[1024],
160
    part_inside,part_iterations,insides;
160
    part_inside=0,part_iterations,insides=0;
161 161
  int numtasks,rank,rc,tag=1,i;
162 162
  float pi;
163 163
  
164 164
  // Hostname supposed to be <128 characters
165 165
  char hostname[128];
166

  
166
  
167 167
  gethostname(hostname, sizeof hostname);
168

  
168
  
169 169
  struct timeval start,end;
170 170
  long int useconds;
171

  
171
  
172 172
  MPI_Status Stat;
173 173
  
174 174
  rc = MPI_Init(&argc,&argv);
......
179 179

  
180 180
  MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
181 181
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
182

  
182
  
183 183
  const int nitems=2;
184 184
  int blocklengths[2] = {1,1};
185

  
185
  
186 186
#ifdef LONG
187 187
  MPI_Datatype types_node[2] = {MPI_LONG, MPI_INT};
188 188
  MPI_Datatype types_result[2] = {MPI_LONG, MPI_LONG};
......
190 190
  MPI_Datatype types_node[2] = {MPI_INT, MPI_INT};
191 191
  MPI_Datatype types_result[2] = {MPI_INT, MPI_LONG};
192 192
#endif
193

  
193
  
194 194
  MPI_Datatype mpi_node_type,mpi_result_type;
195 195
  MPI_Aint     offsets[2],offsetsr[2];
196

  
196
  
197 197
  offsets[0] = offsetof(node, iterations);
198 198
  offsets[1] = offsetof(node, process);
199

  
200
  MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_node_type);
199
  
200
  MPI_Type_create_struct(nitems, blocklengths, offsets, types_node, &mpi_node_type);
201 201
  MPI_Type_commit(&mpi_node_type);
202

  
202
  
203 203
  offsetsr[0] = offsetof(result, inside);
204 204
  offsetsr[1] = offsetof(result, useconds);
205

  
206
  MPI_Type_create_struct(nitems, blocklengths, offsetsr, types, &mpi_result_type);
205
  
206
  MPI_Type_create_struct(nitems, blocklengths, offsetsr, types_result, &mpi_result_type);
207 207
  MPI_Type_commit(&mpi_result_type);
208

  
208
  
209 209
  if (rank==0) {
210 210
    
211 211
    if (argc > 1) {
......
214 214
    }
215 215
    else {
216 216
      printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
217
      printf("\t\t#1 : number of iterations (default 1 billion)\n\n");
218
      printf("\t\t#1 : number of OpenMP processes (default 1)\n\n");
217
      printf("\t\t#1 : number of iterations (default 1 billion)\n");
218
      printf("\t\t#2 : number of OpenMP processes (default 1)\n\n");
219 219
    }
220 220
    
221 221
    printf ("\n\tInformation about architecture:\n\n");
222

  
223
    printf ("Sizeof int = %lld bytes.\n", (long long)sizeof(int));
224
    printf ("Sizeof long = %lld bytes.\n", (long long)sizeof(long));
225
    printf ("Sizeof long long = %lld bytes.\n", (long long)sizeof(long long));
226 222
    
227
    printf ("Max int = %u\n", INT_MAX);
228
    printf ("Max long = %ld\n", LONG_MAX);
229
    printf ("Max long long = %lld\n\n", LLONG_MAX);
223
    printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
224
    printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
225
    printf ("\tSizeof long long = %lld bytes.\n", (long long)sizeof(long long));
230 226
    
231
    part_iterations=((iterations%numtasks) == 0) ? iterations/numtasks:iterations/numtasks+1 ;
227
    printf ("\tMax int = %u\n", INT_MAX);
228
    printf ("\tMax long = %ld\n", LONG_MAX);
229
    printf ("\tMax long long = %lld\n\n", LLONG_MAX);
232 230
    
231
    part_iterations=(((iterations%numtasks)%process) == 0) ? iterations/numtasks/process:iterations/numtasks/process+1 ;
232
    
233 233
    node send;
234 234
    send.iterations=part_iterations;
235 235
    send.process=process;
......
242 242
    gettimeofday(&start,(struct timezone *)0);
243 243
    
244 244
#pragma omp parallel for
245
  for (int i=0 ; i<process; i++) {
246
    insideOpenMP[i]=MainLoopGlobal(part_iterations,
247
				   rotr(seed_w,process),
248
				   rotl(seed_z,process));
249
    printf("\t(%s,%i) found %lld for process %i\n",hostname,0,
250
	   (long long)insideOpenMP[i],i);
251
  }
252
  printf("\n");
253

  
254
  insides=0;
255
  for (int i=0 ; i<process; i++) {
256
    insides+=insideOpenMP[i];
257
  }
258

  
259
  gettimeofday(&end,(struct timezone *)0);
260
  useconds=(end.tv_sec-start.tv_sec)*1000000+end.tv_usec-start.tv_usec;
261
  
262
  printf("\tOn %s with %i find %lld inside in %lu useconds.\n",
263
	 hostname,rank,(long long)insides,useconds);
264
      
265
  // Join part of code
266
  for (i=1;i<numtasks;i++) {
267

  
268
    result recv;
245
    for (int i=0 ; i<process; i++) {
246
      insideOpenMP[i]=MainLoopGlobal(part_iterations,
247
				     rotr(seed_w,i),
248
				     rotl(seed_z,i));
249
      /*
250
	printf("\t(%s,%i) found %lld for process %i\n",hostname,0,
251
	(long long)insideOpenMP[i],i); */
252
    }
253
    /*
254
      printf("\n");
255
    */
269 256
    
270
    rc = MPI_Recv(&recv, 1, mpi_result_type, i, tag, MPI_COMM_WORLD,&Stat);
271

  
272
    insideMPI[i]=recv.inside;
273
    useconds=recv.useconds;
274

  
275
    printf("\tReceive from %i, find %lld inside in %lu useconds\n",i,(long long)insideMPI[i],useconds);
257
    insides=0;
258
    for (int i=0 ; i<process; i++) {
259
      insides+=insideOpenMP[i];
260
    }
276 261
    
277
    insides+=insideMPI[i];
278
  }
262
    gettimeofday(&end,(struct timezone *)0);
263
    useconds=(end.tv_sec-start.tv_sec)*1000000+end.tv_usec-start.tv_usec;
264
    
265
    printf("\tOn %s with rank #%i find %lld inside in %lu useconds.\n",
266
	   hostname,rank,(long long)insides,useconds);
267
    
268
    // Join part of code
269
    for (i=1;i<numtasks;i++) {
279 270
      
280
  pi=4.*(float)insides/(float)(part_iterations*numtasks);
271
      result recv;
281 272
      
282
  printf("\n\tPi=%.40f\n\twith error %.40f\n\twith %lld iterations\n\n",pi,
283
	 fabs(pi-4*atan(1.))/pi,(long long)(part_iterations*numtasks));
284

  
273
      rc = MPI_Recv(&recv, 1, mpi_result_type, i, tag, MPI_COMM_WORLD,&Stat);
274
      
275
      insideMPI[i]=recv.inside;
276
      useconds=recv.useconds;
277
      
278
      printf("\tReceive from rank #%i, find %lld inside in %lu useconds\n",i,(long long)insideMPI[i],useconds);
279
    
280
      insides+=insideMPI[i];
281
    }
282
    
283
    pi=4.*(float)insides/(float)(part_iterations*numtasks*process);
284
    
285
    printf("\n\tPi=%.40f\n\twith error %.40f\n\twith %lld iterations\n\n",pi,
286
	   fabs(pi-4*atan(1.))/pi,(long long)(part_iterations*numtasks*process));
287
    
285 288
  }
286 289
  else
287 290
    {
288 291
      // Receive information from master
289
     
292
      
290 293
      node recv;
291

  
294
      
292 295
      rc = MPI_Recv(&recv, 1, mpi_node_type, 0, tag, MPI_COMM_WORLD,&Stat);
293
      /*      
296
      /*   
294 297
      printf("\t(%s,%i) receive from master %lld with %i process\n",
295
             hostname,rank,(long long)recv.iterations,recv.process);
298
      hostname,rank,(long long)recv.iterations,recv.process);
296 299
      */
297

  
300
      
298 301
      gettimeofday(&start,(struct timezone *)0);
299

  
302
      
300 303
#pragma omp parallel for
301 304
      for (int i=0 ; i<recv.process; i++) {
302
	insideOpenMP[i]=MainLoopGlobal(recv.iterations,rotr(seed_w,rank+process),rotl(seed_z,rank+process));
305
	insideOpenMP[i]=MainLoopGlobal(recv.iterations,rotr(seed_w,rank+i),rotl(seed_z,rank-i));
303 306
	/*
304
	printf("\t(%s,%i) found %lld for process %i\n",hostname,rank,
305
	       (long long)insideOpenMP[i],i);
307
	  printf("\t(%s,%i) found %lld for process %i\n",hostname,rank,
308
	  (long long)insideOpenMP[i],i);
306 309
	*/
307 310
      }
308 311
      
309 312
      /* printf("\n"); */
310

  
311
      part_inside=0;
313
      
312 314
      for (int i=0 ; i<recv.process; i++) {
313 315
	part_inside+=insideOpenMP[i];
314 316
      }
......
316 318
      gettimeofday(&end,(struct timezone *)0);
317 319
      useconds=(end.tv_sec-start.tv_sec)*1000000+end.tv_usec-start.tv_usec;
318 320
      /*
319
      printf("\tOn %s rank %i find %lld inside in %lu useconds.\n",
320
	     hostname,rank,(long long)part_inside,useconds);
321
	printf("\tOn %s rank %i find %lld inside in %lu useconds.\n",
322
	hostname,rank,(long long)part_inside,useconds);
321 323
      */
322 324
      result send;
323 325
      send.inside=part_inside;
324 326
      send.useconds=useconds;
325 327
      
326 328
      rc = MPI_Send(&send, 1, mpi_result_type, 0, tag, MPI_COMM_WORLD);
327

  
329
      
328 330
    }
329 331
  
330 332
  MPI_Type_free(&mpi_node_type);
331 333
  MPI_Type_free(&mpi_result_type);
332

  
334
  
333 335
  MPI_Finalize();
334 336
  
335 337
}
Pi/C/OpenACC/Pi_OpenACC.c (revision 183)
1
//
2
// Estimation of Pi using Monte Carlo exploration process
3
// Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
4
// Exploit OpenACC on Nvidia GPU
5
// module load 
6
// icpc -std=c99 -O3 -o Pi_OpenACC Pi_OpenACC.c -lm 
7
//
8

  
9
#include <math.h>
10
#include <stdio.h>
11
#include <stdlib.h>
12
#include <limits.h>
13
#include <openacc.h>
14
#include <sys/time.h>
15

  
16
// Marsaglia RNG very simple implementation
17
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
18
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
19
#define MWC   (znew+wnew)
20
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
21
#define CONG  (jcong=69069*jcong+1234567)
22
#define KISS  ((MWC^CONG)+SHR3)
23

  
24
#define MWCfp MWC * 2.328306435454494e-10f
25
#define KISSfp KISS * 2.328306435454494e-10f
26
#define SHR3fp SHR3 * 2.328306435454494e-10f
27
#define CONGfp CONG * 2.328306435454494e-10f
28

  
29
#define ITERATIONS 1000000000
30

  
31
#define PARALLELRATE 1024
32

  
33
#ifdef LONG
34
#define LENGTH long long
35
#else
36
#define LENGTH int
37
#endif
38

  
39
// LENGTH splitter(int,int,int,LENGTH);
40

  
41
#pragma acc routine
42
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
43
{
44
#if defined TCONG
45
   unsigned int jcong=seed_z;
46
#elif defined TSHR3
47
   unsigned int jsr=seed_w;
48
#elif defined TMWC
49
   unsigned int z=seed_z;
50
   unsigned int w=seed_w;
51
#elif defined TKISS
52
   unsigned int jcong=seed_z;
53
   unsigned int jsr=seed_w;
54
   unsigned int z=seed_z;
55
   unsigned int w=seed_w;
56
#endif
57
  
58
   LENGTH total=0;
59

  
60
   for (LENGTH i=0;i<iterations;i++) {
61

  
62
#if defined TINT32
63
    #define THEONE 1073741824
64
    #if defined TCONG
65
        unsigned int x=CONG>>17 ;
66
        unsigned int y=CONG>>17 ;
67
    #elif defined TSHR3
68
        unsigned int x=SHR3>>17 ;
69
        unsigned int y=SHR3>>17 ;
70
    #elif defined TMWC
71
        unsigned int x=MWC>>17 ;
72
        unsigned int y=MWC>>17 ;
73
    #elif defined TKISS
74
        unsigned int x=KISS>>17 ;
75
        unsigned int y=KISS>>17 ;
76
    #endif
77
#elif defined TINT64
78
    #define THEONE 4611686018427387904
79
    #if defined TCONG
80
        unsigned long x=(unsigned long)(CONG>>1) ;
81
        unsigned long y=(unsigned long)(CONG>>1) ;
82
    #elif defined TSHR3
83
        unsigned long x=(unsigned long)(SHR3>>1) ;
84
        unsigned long y=(unsigned long)(SHR3>>1) ;
85
    #elif defined TMWC
86
        unsigned long x=(unsigned long)(MWC>>1) ;
87
        unsigned long y=(unsigned long)(MWC>>1) ;
88
    #elif defined TKISS
89
        unsigned long x=(unsigned long)(KISS>>1) ;
90
        unsigned long y=(unsigned long)(KISS>>1) ;
91
    #endif
92
#elif defined TFP32
93
    #define THEONE 1.0f
94
    #if defined TCONG
95
        float x=CONGfp ;
96
        float y=CONGfp ;
97
    #elif defined TSHR3
98
        float x=SHR3fp ;
99
        float y=SHR3fp ;
100
    #elif defined TMWC
101
        float x=MWCfp ;
102
        float y=MWCfp ;
103
    #elif defined TKISS
104
      float x=KISSfp ;
105
      float y=KISSfp ;
106
    #endif
107
#elif defined TFP64
108
    #define THEONE 1.0f
109
    #if defined TCONG
110
        double x=(double)CONGfp ;
111
        double y=(double)CONGfp ;
112
    #elif defined TSHR3
113
        double x=(double)SHR3fp ;
114
        double y=(double)SHR3fp ;
115
    #elif defined TMWC
116
        double x=(double)MWCfp ;
117
        double y=(double)MWCfp ;
118
    #elif defined TKISS
119
        double x=(double)KISSfp ;
120
        double y=(double)KISSfp ;
121
    #endif
122
#endif
123

  
124
      // Matching test
125
      unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
126
      total+=inside;
127

  
128
   }
129

  
130
   return(total);
131
}
132

  
133
LENGTH splitter(LENGTH iterations,int seed_w,int seed_z,int ParallelRate) {
134

  
135
  LENGTH inside[1048576],insides=0;
136
  int i;
137
  struct timeval tv1,tv2;
138
  struct timezone tz;
139
  LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
140
  
141
#if _OPENACC
142
  acc_init(acc_device_nvidia);
143
#endif
144

  
145
  gettimeofday(&tv1, &tz);
146
#pragma omp parallel for shared(ParallelRate,inside)
147
#pragma acc kernels loop
148
  for (int i=0 ; i<ParallelRate; i++) {
149
    inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
150
  }
151

  
152
  gettimeofday(&tv2, &tz);
153
  
154
  for (int i=0 ; i<ParallelRate; i++) {
155
    printf("\tFound %lld for case %i\n",(long long)inside[i],i);
156
    insides+=inside[i];
157
  }
158
  printf("\n");
159

  
160
  double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
161
			  (tv2.tv_usec-tv1.tv_usec))/1000000;
162

  
163
  double itops=(double)(ParallelRate*IterationsEach)/elapsed;
164
  
165
  printf("ParallelRate %i\nElapsed Time %.2f\nItops %.0f\n",ParallelRate,elapsed,itops);
166
  
167
  return(insides);
168
}
169
 
170
int main(int argc, char *argv[]) {
171

  
172
  unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE;
173
  LENGTH iterations=ITERATIONS;
174
  LENGTH insides=0;
175

  
176
  if (argc > 1) {
177
    iterations=(LENGTH)atoll(argv[1]);
178
    ParallelRate=atoi(argv[2]);
179
  }
180
  else {
181
    printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
182
    printf("\t\t#1 : number of iterations (default 1 billion)\n");
183
    printf("\t\t#2 : Parallel Rate (default 1024)\n\n");
184
  }
185

  
186
  printf ("\n\tInformation about architecture:\n\n");
187

  
188
  printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
189
  printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
190
  printf ("\tSizeof long long = %lld bytes.\n\n", (long long)sizeof(long long));
191

  
192
  printf ("\tMax int = %u\n", INT_MAX);
193
  printf ("\tMax long = %ld\n", LONG_MAX);
194
  printf ("\tMax long long = %lld\n\n", LLONG_MAX);
195

  
196
  insides=splitter(iterations,seed_w,seed_z,ParallelRate);
197

  
198
  LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate;
199

  
200
  printf("Inside/Total %ld %ld\nPi estimation %f\n\n",insides,total,(4.*(float)insides/total));
201
   
202
}
Pi/C/OpenACC/Makefile (revision 183)
1
# Compile all version of Pi_OpenACC using Nvidia GPU
2
# Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
3

  
4
SOURCE=Pi_OpenACC.c
5

  
6
COMPUTING=INT32 INT64 FP32 FP64
7
MARSAGLIA=SHR3 CONG MWC KISS
8

  
9
CC=pgcc
10
CFLAGS=-O3 
11
LIBRARY=
12
ACCFLAGS=-acc -ta=tesla:cc70 -Minfo=accel
13

  
14
all: $(SOURCE)
15

  
16
	$(foreach TVAR,$(COMPUTING),$(foreach TRND,$(MARSAGLIA),$(CC) $(CFLAGS) $(ACCFLAGS) -DT$(TVAR) -DT$(TRND) -DLONG -DTIME -o $(<:.c=)_$(TVAR)_$(TRND) $< $(LIBRARY); ) )
17

  
18
.PHONY: clean check mrproper
19

  
20
mrproper:
21
	rm -rf $(foreach SRC,$(SOURCE),$(foreach TVAR,$(COMPUTING),$(foreach TRND,$(MARSAGLIA),$(SRC:.c=)_$(TVAR)_$(TRND) ) ) )
22
	find . -name "*~" -exec rm {} \;
23

  
24
clean:
25
	find . -name "*~" -exec rm {} \;
26

  
27
check: $(EXECUTABLE)
28

  
29
	@echo "To be Defined"
Pi/C/OpenMP/bench.sh (revision 183)
1
#!/bin/bash
2

  
3
EXE=Pi_OpenMP_LONG
4
ITERATIONS=10000000000
5
TIME=time
6

  
7
REPEAT=10
8
PROCESS=16
9

  
10
[ ! $1 == '' ] && EXE=$1
11
[ ! $2 == '' ] && ITERATIONS=$2
12
[ ! $3 == '' ] && PROCESS=$3
13

  
14
LOGFILE=${EXE}_${HOSTNAME}_${ITERATIONS}.log
15

  
16
> $LOGFILE
17
p=1
18
while [ $p -le $PROCESS ]
19
do
20
    export OMP_NUM_THREADS=$p
21
    echo -e "Process $p" >> $LOGFILE
22
    echo -ne "Start $EXE with $ITERATIONS and $p : "
23
    i=1
24
    while [ $i -le $REPEAT ]
25
    do 
26
        echo -ne "$i "
27
        $TIME ./$EXE $ITERATIONS $p >> $LOGFILE 2>&1 
28
        i=$(($i+1))
29
    done
30
    echo
31
    p=$(($p+1))
32
done

Also available in: Unified diff