Statistiques
| Révision :

root / Pi / C / OpenACC / Pi_OpenACC_PGI.c @ 284

Historique | Voir | Annoter | Télécharger (5,46 ko)

1 283 equemene
//
2 283 equemene
// Estimation of Pi using Monte Carlo exploration process
3 283 equemene
// Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
4 283 equemene
// Exploit OpenACC on Nvidia GPU
5 283 equemene
// module load
6 283 equemene
// icpc -std=c99 -O3 -o Pi_OpenACC Pi_OpenACC.c -lm
7 283 equemene
//
8 283 equemene
9 283 equemene
#include <math.h>
10 283 equemene
#include <stdio.h>
11 283 equemene
#include <stdlib.h>
12 283 equemene
#include <limits.h>
13 283 equemene
#include <openacc.h>
14 283 equemene
#include <sys/time.h>
15 283 equemene
16 283 equemene
// Marsaglia RNG very simple implementation
17 283 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
18 283 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
19 283 equemene
#define MWC   (znew+wnew)
20 283 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
21 283 equemene
#define CONG  (jcong=69069*jcong+1234567)
22 283 equemene
#define KISS  ((MWC^CONG)+SHR3)
23 283 equemene
24 283 equemene
#define MWCfp MWC * 2.328306435454494e-10f
25 283 equemene
#define KISSfp KISS * 2.328306435454494e-10f
26 283 equemene
#define SHR3fp SHR3 * 2.328306435454494e-10f
27 283 equemene
#define CONGfp CONG * 2.328306435454494e-10f
28 283 equemene
29 283 equemene
#define ITERATIONS 1000000000
30 283 equemene
31 283 equemene
#define PARALLELRATE 1024
32 283 equemene
33 283 equemene
#ifdef LONG
34 283 equemene
#define LENGTH long long
35 283 equemene
#else
36 283 equemene
#define LENGTH int
37 283 equemene
#endif
38 283 equemene
39 283 equemene
// LENGTH splitter(int,int,int,LENGTH);
40 283 equemene
41 283 equemene
#pragma acc routine
42 283 equemene
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
43 283 equemene
{
44 283 equemene
#if defined TCONG
45 283 equemene
   unsigned int jcong=seed_z;
46 283 equemene
#elif defined TSHR3
47 283 equemene
   unsigned int jsr=seed_w;
48 283 equemene
#elif defined TMWC
49 283 equemene
   unsigned int z=seed_z;
50 283 equemene
   unsigned int w=seed_w;
51 283 equemene
#elif defined TKISS
52 283 equemene
   unsigned int jcong=seed_z;
53 283 equemene
   unsigned int jsr=seed_w;
54 283 equemene
   unsigned int z=seed_z;
55 283 equemene
   unsigned int w=seed_w;
56 283 equemene
#endif
57 283 equemene
58 283 equemene
   LENGTH total=0;
59 283 equemene
60 283 equemene
   for (LENGTH i=0;i<iterations;i++) {
61 283 equemene
62 283 equemene
#if defined TINT32
63 283 equemene
    #define THEONE 1073741824
64 283 equemene
    #if defined TCONG
65 283 equemene
        unsigned int x=CONG>>17 ;
66 283 equemene
        unsigned int y=CONG>>17 ;
67 283 equemene
    #elif defined TSHR3
68 283 equemene
        unsigned int x=SHR3>>17 ;
69 283 equemene
        unsigned int y=SHR3>>17 ;
70 283 equemene
    #elif defined TMWC
71 283 equemene
        unsigned int x=MWC>>17 ;
72 283 equemene
        unsigned int y=MWC>>17 ;
73 283 equemene
    #elif defined TKISS
74 283 equemene
        unsigned int x=KISS>>17 ;
75 283 equemene
        unsigned int y=KISS>>17 ;
76 283 equemene
    #endif
77 283 equemene
#elif defined TINT64
78 283 equemene
    #define THEONE 4611686018427387904
79 283 equemene
    #if defined TCONG
80 283 equemene
        unsigned long x=(unsigned long)(CONG>>1) ;
81 283 equemene
        unsigned long y=(unsigned long)(CONG>>1) ;
82 283 equemene
    #elif defined TSHR3
83 283 equemene
        unsigned long x=(unsigned long)(SHR3>>1) ;
84 283 equemene
        unsigned long y=(unsigned long)(SHR3>>1) ;
85 283 equemene
    #elif defined TMWC
86 283 equemene
        unsigned long x=(unsigned long)(MWC>>1) ;
87 283 equemene
        unsigned long y=(unsigned long)(MWC>>1) ;
88 283 equemene
    #elif defined TKISS
89 283 equemene
        unsigned long x=(unsigned long)(KISS>>1) ;
90 283 equemene
        unsigned long y=(unsigned long)(KISS>>1) ;
91 283 equemene
    #endif
92 283 equemene
#elif defined TFP32
93 283 equemene
    #define THEONE 1.0f
94 283 equemene
    #if defined TCONG
95 283 equemene
        float x=CONGfp ;
96 283 equemene
        float y=CONGfp ;
97 283 equemene
    #elif defined TSHR3
98 283 equemene
        float x=SHR3fp ;
99 283 equemene
        float y=SHR3fp ;
100 283 equemene
    #elif defined TMWC
101 283 equemene
        float x=MWCfp ;
102 283 equemene
        float y=MWCfp ;
103 283 equemene
    #elif defined TKISS
104 283 equemene
      float x=KISSfp ;
105 283 equemene
      float y=KISSfp ;
106 283 equemene
    #endif
107 283 equemene
#elif defined TFP64
108 283 equemene
    #define THEONE 1.0f
109 283 equemene
    #if defined TCONG
110 283 equemene
        double x=(double)CONGfp ;
111 283 equemene
        double y=(double)CONGfp ;
112 283 equemene
    #elif defined TSHR3
113 283 equemene
        double x=(double)SHR3fp ;
114 283 equemene
        double y=(double)SHR3fp ;
115 283 equemene
    #elif defined TMWC
116 283 equemene
        double x=(double)MWCfp ;
117 283 equemene
        double y=(double)MWCfp ;
118 283 equemene
    #elif defined TKISS
119 283 equemene
        double x=(double)KISSfp ;
120 283 equemene
        double y=(double)KISSfp ;
121 283 equemene
    #endif
122 283 equemene
#endif
123 283 equemene
124 283 equemene
      // Matching test
125 283 equemene
      unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
126 283 equemene
      total+=inside;
127 283 equemene
128 283 equemene
   }
129 283 equemene
130 283 equemene
   return(total);
131 283 equemene
}
132 283 equemene
133 283 equemene
LENGTH splitter(LENGTH iterations,unsigned int seed_w,unsigned int seed_z,unsigned int ParallelRate) {
134 283 equemene
135 283 equemene
  LENGTH *inside,insides=0;
136 283 equemene
  int i;
137 283 equemene
  struct timeval tv1,tv2;
138 283 equemene
  struct timezone tz;
139 283 equemene
  LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
140 283 equemene
141 283 equemene
  inside=(LENGTH*)malloc(sizeof(LENGTH)*ParallelRate);
142 283 equemene
143 283 equemene
#if _OPENACC
144 283 equemene
  acc_init(acc_device_nvidia);
145 283 equemene
#endif
146 283 equemene
147 283 equemene
  gettimeofday(&tv1, &tz);
148 283 equemene
#pragma omp parallel for shared(ParallelRate,inside)
149 283 equemene
#pragma acc kernels loop
150 283 equemene
  for (int i=0 ; i<ParallelRate; i++) {
151 283 equemene
    inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
152 283 equemene
  }
153 283 equemene
154 283 equemene
  gettimeofday(&tv2, &tz);
155 283 equemene
156 283 equemene
  for (int i=0 ; i<ParallelRate; i++) {
157 283 equemene
    printf("\tFound %lld for case %i\n",(long long)inside[i],i);
158 283 equemene
    insides+=inside[i];
159 283 equemene
  }
160 283 equemene
  printf("\n");
161 283 equemene
162 283 equemene
  double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
163 283 equemene
                          (tv2.tv_usec-tv1.tv_usec))/1000000;
164 283 equemene
165 283 equemene
  double itops=(double)(ParallelRate*IterationsEach)/elapsed;
166 283 equemene
167 283 equemene
  printf("ParallelRate %i\nElapsed Time %.2f\nItops %.0f\n",ParallelRate,elapsed,itops);
168 283 equemene
169 283 equemene
  free(inside);
170 283 equemene
  return(insides);
171 283 equemene
}
172 283 equemene
173 283 equemene
int main(int argc, char *argv[]) {
174 283 equemene
175 283 equemene
  unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE;
176 283 equemene
  LENGTH iterations=ITERATIONS;
177 283 equemene
  LENGTH insides=0;
178 283 equemene
179 283 equemene
  if (argc > 1) {
180 283 equemene
    iterations=(LENGTH)atoll(argv[1]);
181 283 equemene
    ParallelRate=atoi(argv[2]);
182 283 equemene
  }
183 283 equemene
  else {
184 283 equemene
    printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
185 283 equemene
    printf("\t\t#1 : number of iterations (default 1 billion)\n");
186 283 equemene
    printf("\t\t#2 : Parallel Rate (default 1024)\n\n");
187 283 equemene
  }
188 283 equemene
189 283 equemene
  printf ("\n\tInformation about architecture:\n\n");
190 283 equemene
191 283 equemene
  printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
192 283 equemene
  printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
193 283 equemene
  printf ("\tSizeof long long = %lld bytes.\n\n", (long long)sizeof(long long));
194 283 equemene
195 283 equemene
  printf ("\tMax int = %u\n", INT_MAX);
196 283 equemene
  printf ("\tMax long = %ld\n", LONG_MAX);
197 283 equemene
  printf ("\tMax long long = %lld\n\n", LLONG_MAX);
198 283 equemene
199 283 equemene
  insides=splitter(iterations,seed_w,seed_z,ParallelRate);
200 283 equemene
201 283 equemene
  LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate;
202 283 equemene
203 283 equemene
  printf("Inside/Total %ld %ld\nPi estimation %f\n\n",insides,total,(4.*(float)insides/total));
204 283 equemene
205 283 equemene
}