Statistics
| Revision:

root / Pi / C / OpenACC / Pi_OpenACC.c @ 183

History | View | Annotate | Download (5.4 kB)

1 183 equemene
//
2 183 equemene
// Estimation of Pi using Monte Carlo exploration process
3 183 equemene
// Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
4 183 equemene
// Exploit OpenACC on Nvidia GPU
5 183 equemene
// module load
6 183 equemene
// icpc -std=c99 -O3 -o Pi_OpenACC Pi_OpenACC.c -lm
7 183 equemene
//
8 183 equemene
9 183 equemene
#include <math.h>
10 183 equemene
#include <stdio.h>
11 183 equemene
#include <stdlib.h>
12 183 equemene
#include <limits.h>
13 183 equemene
#include <openacc.h>
14 183 equemene
#include <sys/time.h>
15 183 equemene
16 183 equemene
// Marsaglia RNG very simple implementation
17 183 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
18 183 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
19 183 equemene
#define MWC   (znew+wnew)
20 183 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
21 183 equemene
#define CONG  (jcong=69069*jcong+1234567)
22 183 equemene
#define KISS  ((MWC^CONG)+SHR3)
23 183 equemene
24 183 equemene
#define MWCfp MWC * 2.328306435454494e-10f
25 183 equemene
#define KISSfp KISS * 2.328306435454494e-10f
26 183 equemene
#define SHR3fp SHR3 * 2.328306435454494e-10f
27 183 equemene
#define CONGfp CONG * 2.328306435454494e-10f
28 183 equemene
29 183 equemene
#define ITERATIONS 1000000000
30 183 equemene
31 183 equemene
#define PARALLELRATE 1024
32 183 equemene
33 183 equemene
#ifdef LONG
34 183 equemene
#define LENGTH long long
35 183 equemene
#else
36 183 equemene
#define LENGTH int
37 183 equemene
#endif
38 183 equemene
39 183 equemene
// LENGTH splitter(int,int,int,LENGTH);
40 183 equemene
41 183 equemene
#pragma acc routine
42 183 equemene
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
43 183 equemene
{
44 183 equemene
#if defined TCONG
45 183 equemene
   unsigned int jcong=seed_z;
46 183 equemene
#elif defined TSHR3
47 183 equemene
   unsigned int jsr=seed_w;
48 183 equemene
#elif defined TMWC
49 183 equemene
   unsigned int z=seed_z;
50 183 equemene
   unsigned int w=seed_w;
51 183 equemene
#elif defined TKISS
52 183 equemene
   unsigned int jcong=seed_z;
53 183 equemene
   unsigned int jsr=seed_w;
54 183 equemene
   unsigned int z=seed_z;
55 183 equemene
   unsigned int w=seed_w;
56 183 equemene
#endif
57 183 equemene
58 183 equemene
   LENGTH total=0;
59 183 equemene
60 183 equemene
   for (LENGTH i=0;i<iterations;i++) {
61 183 equemene
62 183 equemene
#if defined TINT32
63 183 equemene
    #define THEONE 1073741824
64 183 equemene
    #if defined TCONG
65 183 equemene
        unsigned int x=CONG>>17 ;
66 183 equemene
        unsigned int y=CONG>>17 ;
67 183 equemene
    #elif defined TSHR3
68 183 equemene
        unsigned int x=SHR3>>17 ;
69 183 equemene
        unsigned int y=SHR3>>17 ;
70 183 equemene
    #elif defined TMWC
71 183 equemene
        unsigned int x=MWC>>17 ;
72 183 equemene
        unsigned int y=MWC>>17 ;
73 183 equemene
    #elif defined TKISS
74 183 equemene
        unsigned int x=KISS>>17 ;
75 183 equemene
        unsigned int y=KISS>>17 ;
76 183 equemene
    #endif
77 183 equemene
#elif defined TINT64
78 183 equemene
    #define THEONE 4611686018427387904
79 183 equemene
    #if defined TCONG
80 183 equemene
        unsigned long x=(unsigned long)(CONG>>1) ;
81 183 equemene
        unsigned long y=(unsigned long)(CONG>>1) ;
82 183 equemene
    #elif defined TSHR3
83 183 equemene
        unsigned long x=(unsigned long)(SHR3>>1) ;
84 183 equemene
        unsigned long y=(unsigned long)(SHR3>>1) ;
85 183 equemene
    #elif defined TMWC
86 183 equemene
        unsigned long x=(unsigned long)(MWC>>1) ;
87 183 equemene
        unsigned long y=(unsigned long)(MWC>>1) ;
88 183 equemene
    #elif defined TKISS
89 183 equemene
        unsigned long x=(unsigned long)(KISS>>1) ;
90 183 equemene
        unsigned long y=(unsigned long)(KISS>>1) ;
91 183 equemene
    #endif
92 183 equemene
#elif defined TFP32
93 183 equemene
    #define THEONE 1.0f
94 183 equemene
    #if defined TCONG
95 183 equemene
        float x=CONGfp ;
96 183 equemene
        float y=CONGfp ;
97 183 equemene
    #elif defined TSHR3
98 183 equemene
        float x=SHR3fp ;
99 183 equemene
        float y=SHR3fp ;
100 183 equemene
    #elif defined TMWC
101 183 equemene
        float x=MWCfp ;
102 183 equemene
        float y=MWCfp ;
103 183 equemene
    #elif defined TKISS
104 183 equemene
      float x=KISSfp ;
105 183 equemene
      float y=KISSfp ;
106 183 equemene
    #endif
107 183 equemene
#elif defined TFP64
108 183 equemene
    #define THEONE 1.0f
109 183 equemene
    #if defined TCONG
110 183 equemene
        double x=(double)CONGfp ;
111 183 equemene
        double y=(double)CONGfp ;
112 183 equemene
    #elif defined TSHR3
113 183 equemene
        double x=(double)SHR3fp ;
114 183 equemene
        double y=(double)SHR3fp ;
115 183 equemene
    #elif defined TMWC
116 183 equemene
        double x=(double)MWCfp ;
117 183 equemene
        double y=(double)MWCfp ;
118 183 equemene
    #elif defined TKISS
119 183 equemene
        double x=(double)KISSfp ;
120 183 equemene
        double y=(double)KISSfp ;
121 183 equemene
    #endif
122 183 equemene
#endif
123 183 equemene
124 183 equemene
      // Matching test
125 183 equemene
      unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
126 183 equemene
      total+=inside;
127 183 equemene
128 183 equemene
   }
129 183 equemene
130 183 equemene
   return(total);
131 183 equemene
}
132 183 equemene
133 183 equemene
LENGTH splitter(LENGTH iterations,int seed_w,int seed_z,int ParallelRate) {
134 183 equemene
135 183 equemene
  LENGTH inside[1048576],insides=0;
136 183 equemene
  int i;
137 183 equemene
  struct timeval tv1,tv2;
138 183 equemene
  struct timezone tz;
139 183 equemene
  LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
140 183 equemene
141 183 equemene
#if _OPENACC
142 183 equemene
  acc_init(acc_device_nvidia);
143 183 equemene
#endif
144 183 equemene
145 183 equemene
  gettimeofday(&tv1, &tz);
146 183 equemene
#pragma omp parallel for shared(ParallelRate,inside)
147 183 equemene
#pragma acc kernels loop
148 183 equemene
  for (int i=0 ; i<ParallelRate; i++) {
149 183 equemene
    inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
150 183 equemene
  }
151 183 equemene
152 183 equemene
  gettimeofday(&tv2, &tz);
153 183 equemene
154 183 equemene
  for (int i=0 ; i<ParallelRate; i++) {
155 183 equemene
    printf("\tFound %lld for case %i\n",(long long)inside[i],i);
156 183 equemene
    insides+=inside[i];
157 183 equemene
  }
158 183 equemene
  printf("\n");
159 183 equemene
160 183 equemene
  double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
161 183 equemene
                          (tv2.tv_usec-tv1.tv_usec))/1000000;
162 183 equemene
163 183 equemene
  double itops=(double)(ParallelRate*IterationsEach)/elapsed;
164 183 equemene
165 183 equemene
  printf("ParallelRate %i\nElapsed Time %.2f\nItops %.0f\n",ParallelRate,elapsed,itops);
166 183 equemene
167 183 equemene
  return(insides);
168 183 equemene
}
169 183 equemene
170 183 equemene
int main(int argc, char *argv[]) {
171 183 equemene
172 183 equemene
  unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE;
173 183 equemene
  LENGTH iterations=ITERATIONS;
174 183 equemene
  LENGTH insides=0;
175 183 equemene
176 183 equemene
  if (argc > 1) {
177 183 equemene
    iterations=(LENGTH)atoll(argv[1]);
178 183 equemene
    ParallelRate=atoi(argv[2]);
179 183 equemene
  }
180 183 equemene
  else {
181 183 equemene
    printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
182 183 equemene
    printf("\t\t#1 : number of iterations (default 1 billion)\n");
183 183 equemene
    printf("\t\t#2 : Parallel Rate (default 1024)\n\n");
184 183 equemene
  }
185 183 equemene
186 183 equemene
  printf ("\n\tInformation about architecture:\n\n");
187 183 equemene
188 183 equemene
  printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
189 183 equemene
  printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
190 183 equemene
  printf ("\tSizeof long long = %lld bytes.\n\n", (long long)sizeof(long long));
191 183 equemene
192 183 equemene
  printf ("\tMax int = %u\n", INT_MAX);
193 183 equemene
  printf ("\tMax long = %ld\n", LONG_MAX);
194 183 equemene
  printf ("\tMax long long = %lld\n\n", LLONG_MAX);
195 183 equemene
196 183 equemene
  insides=splitter(iterations,seed_w,seed_z,ParallelRate);
197 183 equemene
198 183 equemene
  LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate;
199 183 equemene
200 183 equemene
  printf("Inside/Total %ld %ld\nPi estimation %f\n\n",insides,total,(4.*(float)insides/total));
201 183 equemene
202 183 equemene
}