Statistiques
| Révision :

root / Pi / C / OpenMP / GPU / Pi_OpenMP.c @ 308

Historique | Voir | Annoter | Télécharger (5,65 ko)

1 308 equemene
//
2 308 equemene
// Estimation of Pi using Monte Carlo exploration process
3 308 equemene
// Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
4 308 equemene
// For Nvidia Devices
5 308 equemene
// gcc -Wall -O3 -std=c99 -foffload=nvptx-none -foffload="-O3 -misa=sm_35" -fopenmp -g Pi_OpenMP Pi_OpenMP.c -lm -lgomp
6 308 equemene
// For AMD Devices
7 308 equemene
// gcc -Wall -O3 -std=c99 -foffload=amdgcu-none -foffload="-O3 -misa=sm_35" -fopenmp -g Pi_OpenMP Pi_OpenMP.c -lm -lgomp
8 308 equemene
9 308 equemene
#include <math.h>
10 308 equemene
#include <stdio.h>
11 308 equemene
#include <stdlib.h>
12 308 equemene
#include <omp.h>
13 308 equemene
#include <limits.h>
14 308 equemene
#include <sys/time.h>
15 308 equemene
16 308 equemene
// Marsaglia RNG very simple implementation
17 308 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
18 308 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
19 308 equemene
#define MWC   (znew+wnew)
20 308 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
21 308 equemene
#define CONG  (jcong=69069*jcong+1234567)
22 308 equemene
#define KISS  ((MWC^CONG)+SHR3)
23 308 equemene
24 308 equemene
#define MWCfp MWC * 2.328306435454494e-10f
25 308 equemene
#define KISSfp KISS * 2.328306435454494e-10f
26 308 equemene
#define SHR3fp SHR3 * 2.328306435454494e-10f
27 308 equemene
#define CONGfp CONG * 2.328306435454494e-10f
28 308 equemene
29 308 equemene
#define ITERATIONS 1000000000
30 308 equemene
31 308 equemene
#define PARALLELRATE 1024
32 308 equemene
33 308 equemene
#ifdef LONG
34 308 equemene
#define LENGTH long long
35 308 equemene
#else
36 308 equemene
#define LENGTH int
37 308 equemene
#endif
38 308 equemene
39 308 equemene
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
40 308 equemene
{
41 308 equemene
#if defined TCONG
42 308 equemene
   unsigned int jcong=seed_z;
43 308 equemene
#elif defined TSHR3
44 308 equemene
   unsigned int jsr=seed_w;
45 308 equemene
#elif defined TMWC
46 308 equemene
   unsigned int z=seed_z;
47 308 equemene
   unsigned int w=seed_w;
48 308 equemene
#elif defined TKISS
49 308 equemene
   unsigned int jcong=seed_z;
50 308 equemene
   unsigned int jsr=seed_w;
51 308 equemene
   unsigned int z=seed_z;
52 308 equemene
   unsigned int w=seed_w;
53 308 equemene
#endif
54 308 equemene
55 308 equemene
   LENGTH total=0;
56 308 equemene
57 308 equemene
   for (LENGTH i=0;i<iterations;i++) {
58 308 equemene
59 308 equemene
#if defined TINT32
60 308 equemene
    #define THEONE 1073741824
61 308 equemene
    #if defined TCONG
62 308 equemene
        unsigned int x=CONG>>17 ;
63 308 equemene
        unsigned int y=CONG>>17 ;
64 308 equemene
    #elif defined TSHR3
65 308 equemene
        unsigned int x=SHR3>>17 ;
66 308 equemene
        unsigned int y=SHR3>>17 ;
67 308 equemene
    #elif defined TMWC
68 308 equemene
        unsigned int x=MWC>>17 ;
69 308 equemene
        unsigned int y=MWC>>17 ;
70 308 equemene
    #elif defined TKISS
71 308 equemene
        unsigned int x=KISS>>17 ;
72 308 equemene
        unsigned int y=KISS>>17 ;
73 308 equemene
    #endif
74 308 equemene
#elif defined TINT64
75 308 equemene
    #define THEONE 4611686018427387904
76 308 equemene
    #if defined TCONG
77 308 equemene
        unsigned long x=(unsigned long)(CONG>>1) ;
78 308 equemene
        unsigned long y=(unsigned long)(CONG>>1) ;
79 308 equemene
    #elif defined TSHR3
80 308 equemene
        unsigned long x=(unsigned long)(SHR3>>1) ;
81 308 equemene
        unsigned long y=(unsigned long)(SHR3>>1) ;
82 308 equemene
    #elif defined TMWC
83 308 equemene
        unsigned long x=(unsigned long)(MWC>>1) ;
84 308 equemene
        unsigned long y=(unsigned long)(MWC>>1) ;
85 308 equemene
    #elif defined TKISS
86 308 equemene
        unsigned long x=(unsigned long)(KISS>>1) ;
87 308 equemene
        unsigned long y=(unsigned long)(KISS>>1) ;
88 308 equemene
    #endif
89 308 equemene
#elif defined TFP32
90 308 equemene
    #define THEONE 1.0f
91 308 equemene
    #if defined TCONG
92 308 equemene
        float x=CONGfp ;
93 308 equemene
        float y=CONGfp ;
94 308 equemene
    #elif defined TSHR3
95 308 equemene
        float x=SHR3fp ;
96 308 equemene
        float y=SHR3fp ;
97 308 equemene
    #elif defined TMWC
98 308 equemene
        float x=MWCfp ;
99 308 equemene
        float y=MWCfp ;
100 308 equemene
    #elif defined TKISS
101 308 equemene
      float x=KISSfp ;
102 308 equemene
      float y=KISSfp ;
103 308 equemene
    #endif
104 308 equemene
#elif defined TFP64
105 308 equemene
    #define THEONE 1.0f
106 308 equemene
    #if defined TCONG
107 308 equemene
        double x=(double)CONGfp ;
108 308 equemene
        double y=(double)CONGfp ;
109 308 equemene
    #elif defined TSHR3
110 308 equemene
        double x=(double)SHR3fp ;
111 308 equemene
        double y=(double)SHR3fp ;
112 308 equemene
    #elif defined TMWC
113 308 equemene
        double x=(double)MWCfp ;
114 308 equemene
        double y=(double)MWCfp ;
115 308 equemene
    #elif defined TKISS
116 308 equemene
        double x=(double)KISSfp ;
117 308 equemene
        double y=(double)KISSfp ;
118 308 equemene
    #endif
119 308 equemene
#endif
120 308 equemene
121 308 equemene
      unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
122 308 equemene
      total+=inside;
123 308 equemene
   }
124 308 equemene
125 308 equemene
   return(total);
126 308 equemene
}
127 308 equemene
128 308 equemene
LENGTH splitter(LENGTH iterations,unsigned int seed_w,unsigned int seed_z,unsigned int ParallelRate)
129 308 equemene
{
130 308 equemene
  LENGTH *inside,insides=0;
131 308 equemene
  struct timeval tv1,tv2;
132 308 equemene
  LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
133 308 equemene
134 308 equemene
  inside=(LENGTH*)malloc(sizeof(LENGTH)*ParallelRate);
135 308 equemene
136 308 equemene
  gettimeofday(&tv1, NULL);
137 308 equemene
138 308 equemene
// #pragma omp parallel for
139 308 equemene
#pragma omp target teams distribute parallel for simd \
140 308 equemene
   map(tofrom:inside[0:ParallelRate])
141 308 equemene
  for (int i=0 ; i<ParallelRate; i++) {
142 308 equemene
    inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
143 308 equemene
  }
144 308 equemene
145 308 equemene
  for (int i=0 ; i<ParallelRate; i++) {
146 308 equemene
    insides+=inside[i];
147 308 equemene
  }
148 308 equemene
149 308 equemene
  gettimeofday(&tv2, NULL);
150 308 equemene
151 308 equemene
  for (int i=0 ; i<ParallelRate; i++) {
152 308 equemene
    printf("\tFound %lld for ParallelRate %i\n",(long long)inside[i],i);
153 308 equemene
  }
154 308 equemene
  printf("\n");
155 308 equemene
156 308 equemene
  double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
157 308 equemene
                            (tv2.tv_usec-tv1.tv_usec))/1000000;
158 308 equemene
159 308 equemene
  double itops=(double)(ParallelRate*IterationsEach)/elapsed;
160 308 equemene
161 308 equemene
  printf("ParallelRate %i\nElapsed Time %.2f\nItops %.0f\nLogItops %.2f\n",ParallelRate,elapsed,itops,log10(itops));
162 308 equemene
163 308 equemene
  free(inside);
164 308 equemene
165 308 equemene
  return(insides);
166 308 equemene
}
167 308 equemene
168 308 equemene
169 308 equemene
int main(int argc, char *argv[]) {
170 308 equemene
171 308 equemene
  unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE;
172 308 equemene
  LENGTH iterations=ITERATIONS,insides=0;
173 308 equemene
174 308 equemene
  if (argc > 1) {
175 308 equemene
    iterations=(LENGTH)atoll(argv[1]);
176 308 equemene
    if (argc > 2) {
177 308 equemene
      ParallelRate=atoi(argv[2]);
178 308 equemene
    }
179 308 equemene
  }
180 308 equemene
  else {
181 308 equemene
    printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
182 308 equemene
    printf("\t\t#1 : number of iterations (default 1 billion)\n");
183 308 equemene
    printf("\t\t#2 : number of ParallelRate (default 1024)\n\n");
184 308 equemene
  }
185 308 equemene
186 308 equemene
  printf ("\n\tInformation about architecture:\n\n");
187 308 equemene
188 308 equemene
  printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
189 308 equemene
  printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
190 308 equemene
  printf ("\tSizeof long long = %lld bytes.\n\n", (long long)sizeof(long long));
191 308 equemene
192 308 equemene
  printf ("\tMax int = %u\n", INT_MAX);
193 308 equemene
  printf ("\tMax long = %ld\n", LONG_MAX);
194 308 equemene
  printf ("\tMax long long = %lld\n\n", LLONG_MAX);
195 308 equemene
196 308 equemene
  insides=splitter(iterations,seed_w,seed_z,ParallelRate);
197 308 equemene
198 308 equemene
  LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate;
199 308 equemene
200 308 equemene
  printf("Inside/Total %ld %ld\nPi estimation %f\n\n",(long int)insides,(long int)total,(4.*(float)insides/total));
201 308 equemene
202 308 equemene
}