Statistiques
| Révision :

root / Pi / C / OpenACC / Pi_OpenACC.c @ 286

Historique | Voir | Annoter | Télécharger (5,47 ko)

1 183 equemene
//
2 183 equemene
// Estimation of Pi using Monte Carlo exploration process
3 183 equemene
// Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
4 282 equemene
// Tested on GCC-10
5 282 equemene
// gcc -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35" Pi_OpenACC.c
6 183 equemene
//
7 183 equemene
8 183 equemene
#include <math.h>
9 183 equemene
#include <stdio.h>
10 183 equemene
#include <stdlib.h>
11 183 equemene
#include <limits.h>
12 183 equemene
#include <openacc.h>
13 183 equemene
#include <sys/time.h>
14 183 equemene
15 183 equemene
// Marsaglia RNG very simple implementation
16 183 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
17 183 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
18 183 equemene
#define MWC   (znew+wnew)
19 183 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
20 183 equemene
#define CONG  (jcong=69069*jcong+1234567)
21 183 equemene
#define KISS  ((MWC^CONG)+SHR3)
22 183 equemene
23 183 equemene
#define MWCfp MWC * 2.328306435454494e-10f
24 183 equemene
#define KISSfp KISS * 2.328306435454494e-10f
25 183 equemene
#define SHR3fp SHR3 * 2.328306435454494e-10f
26 183 equemene
#define CONGfp CONG * 2.328306435454494e-10f
27 183 equemene
28 183 equemene
#define ITERATIONS 1000000000
29 183 equemene
30 183 equemene
#define PARALLELRATE 1024
31 183 equemene
32 183 equemene
#ifdef LONG
33 183 equemene
#define LENGTH long long
34 183 equemene
#else
35 183 equemene
#define LENGTH int
36 183 equemene
#endif
37 183 equemene
38 183 equemene
// LENGTH splitter(int,int,int,LENGTH);
39 183 equemene
40 183 equemene
#pragma acc routine
41 183 equemene
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
42 183 equemene
{
43 183 equemene
#if defined TCONG
44 183 equemene
   unsigned int jcong=seed_z;
45 183 equemene
#elif defined TSHR3
46 183 equemene
   unsigned int jsr=seed_w;
47 183 equemene
#elif defined TMWC
48 183 equemene
   unsigned int z=seed_z;
49 183 equemene
   unsigned int w=seed_w;
50 183 equemene
#elif defined TKISS
51 183 equemene
   unsigned int jcong=seed_z;
52 183 equemene
   unsigned int jsr=seed_w;
53 183 equemene
   unsigned int z=seed_z;
54 183 equemene
   unsigned int w=seed_w;
55 183 equemene
#endif
56 183 equemene
57 183 equemene
   LENGTH total=0;
58 183 equemene
59 183 equemene
   for (LENGTH i=0;i<iterations;i++) {
60 183 equemene
61 183 equemene
#if defined TINT32
62 183 equemene
    #define THEONE 1073741824
63 183 equemene
    #if defined TCONG
64 183 equemene
        unsigned int x=CONG>>17 ;
65 183 equemene
        unsigned int y=CONG>>17 ;
66 183 equemene
    #elif defined TSHR3
67 183 equemene
        unsigned int x=SHR3>>17 ;
68 183 equemene
        unsigned int y=SHR3>>17 ;
69 183 equemene
    #elif defined TMWC
70 183 equemene
        unsigned int x=MWC>>17 ;
71 183 equemene
        unsigned int y=MWC>>17 ;
72 183 equemene
    #elif defined TKISS
73 183 equemene
        unsigned int x=KISS>>17 ;
74 183 equemene
        unsigned int y=KISS>>17 ;
75 183 equemene
    #endif
76 183 equemene
#elif defined TINT64
77 183 equemene
    #define THEONE 4611686018427387904
78 183 equemene
    #if defined TCONG
79 183 equemene
        unsigned long x=(unsigned long)(CONG>>1) ;
80 183 equemene
        unsigned long y=(unsigned long)(CONG>>1) ;
81 183 equemene
    #elif defined TSHR3
82 183 equemene
        unsigned long x=(unsigned long)(SHR3>>1) ;
83 183 equemene
        unsigned long y=(unsigned long)(SHR3>>1) ;
84 183 equemene
    #elif defined TMWC
85 183 equemene
        unsigned long x=(unsigned long)(MWC>>1) ;
86 183 equemene
        unsigned long y=(unsigned long)(MWC>>1) ;
87 183 equemene
    #elif defined TKISS
88 183 equemene
        unsigned long x=(unsigned long)(KISS>>1) ;
89 183 equemene
        unsigned long y=(unsigned long)(KISS>>1) ;
90 183 equemene
    #endif
91 183 equemene
#elif defined TFP32
92 183 equemene
    #define THEONE 1.0f
93 183 equemene
    #if defined TCONG
94 183 equemene
        float x=CONGfp ;
95 183 equemene
        float y=CONGfp ;
96 183 equemene
    #elif defined TSHR3
97 183 equemene
        float x=SHR3fp ;
98 183 equemene
        float y=SHR3fp ;
99 183 equemene
    #elif defined TMWC
100 183 equemene
        float x=MWCfp ;
101 183 equemene
        float y=MWCfp ;
102 183 equemene
    #elif defined TKISS
103 183 equemene
      float x=KISSfp ;
104 183 equemene
      float y=KISSfp ;
105 183 equemene
    #endif
106 183 equemene
#elif defined TFP64
107 183 equemene
    #define THEONE 1.0f
108 183 equemene
    #if defined TCONG
109 183 equemene
        double x=(double)CONGfp ;
110 183 equemene
        double y=(double)CONGfp ;
111 183 equemene
    #elif defined TSHR3
112 183 equemene
        double x=(double)SHR3fp ;
113 183 equemene
        double y=(double)SHR3fp ;
114 183 equemene
    #elif defined TMWC
115 183 equemene
        double x=(double)MWCfp ;
116 183 equemene
        double y=(double)MWCfp ;
117 183 equemene
    #elif defined TKISS
118 183 equemene
        double x=(double)KISSfp ;
119 183 equemene
        double y=(double)KISSfp ;
120 183 equemene
    #endif
121 183 equemene
#endif
122 183 equemene
123 183 equemene
      // Matching test
124 183 equemene
      unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
125 183 equemene
      total+=inside;
126 183 equemene
127 183 equemene
   }
128 183 equemene
129 183 equemene
   return(total);
130 183 equemene
}
131 183 equemene
132 187 equemene
LENGTH splitter(LENGTH iterations,unsigned int seed_w,unsigned int seed_z,unsigned int ParallelRate) {
133 183 equemene
134 187 equemene
  LENGTH *inside,insides=0;
135 183 equemene
  int i;
136 183 equemene
  struct timeval tv1,tv2;
137 183 equemene
  struct timezone tz;
138 183 equemene
  LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
139 187 equemene
140 187 equemene
  inside=(LENGTH*)malloc(sizeof(LENGTH)*ParallelRate);
141 183 equemene
142 183 equemene
  gettimeofday(&tv1, &tz);
143 183 equemene
144 286 equemene
#pragma acc data copy(inside[0:ParallelRate])
145 282 equemene
  {
146 286 equemene
    #pragma acc parallel loop
147 282 equemene
    for (int i=0 ; i<ParallelRate; i++) {
148 282 equemene
      inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
149 282 equemene
    }
150 282 equemene
  }
151 183 equemene
  gettimeofday(&tv2, &tz);
152 183 equemene
153 183 equemene
  for (int i=0 ; i<ParallelRate; i++) {
154 284 equemene
    //    printf("\tFound %lld for case %i\n",(long long)inside[i],i);
155 183 equemene
    insides+=inside[i];
156 183 equemene
  }
157 183 equemene
  printf("\n");
158 183 equemene
159 183 equemene
  double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
160 183 equemene
                          (tv2.tv_usec-tv1.tv_usec))/1000000;
161 183 equemene
162 183 equemene
  double itops=(double)(ParallelRate*IterationsEach)/elapsed;
163 187 equemene
164 282 equemene
  printf("Inside/Total %ld %ld\nParallelRate %i\nElapsed Time %.2f\nItops %.0f\nLogItops %.2f\n",insides,ParallelRate*IterationsEach,ParallelRate,elapsed,itops,log10(itops));
165 282 equemene
166 187 equemene
  free(inside);
167 183 equemene
  return(insides);
168 183 equemene
}
169 183 equemene
170 183 equemene
int main(int argc, char *argv[]) {
171 183 equemene
172 183 equemene
  unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE;
173 183 equemene
  LENGTH iterations=ITERATIONS;
174 183 equemene
  LENGTH insides=0;
175 183 equemene
176 183 equemene
  if (argc > 1) {
177 183 equemene
    iterations=(LENGTH)atoll(argv[1]);
178 183 equemene
    ParallelRate=atoi(argv[2]);
179 183 equemene
  }
180 183 equemene
  else {
181 183 equemene
    printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
182 183 equemene
    printf("\t\t#1 : number of iterations (default 1 billion)\n");
183 183 equemene
    printf("\t\t#2 : Parallel Rate (default 1024)\n\n");
184 183 equemene
  }
185 183 equemene
186 183 equemene
  printf ("\n\tInformation about architecture:\n\n");
187 183 equemene
188 183 equemene
  printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
189 183 equemene
  printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
190 183 equemene
  printf ("\tSizeof long long = %lld bytes.\n\n", (long long)sizeof(long long));
191 183 equemene
192 183 equemene
  printf ("\tMax int = %u\n", INT_MAX);
193 183 equemene
  printf ("\tMax long = %ld\n", LONG_MAX);
194 183 equemene
  printf ("\tMax long long = %lld\n\n", LLONG_MAX);
195 183 equemene
196 183 equemene
  insides=splitter(iterations,seed_w,seed_z,ParallelRate);
197 183 equemene
198 183 equemene
  LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate;
199 183 equemene
200 282 equemene
  printf("Pi estimation %f\n\n",total,(4.*(float)insides/total));
201 282 equemene
202 282 equemene
203 183 equemene
}