Statistiques
| Révision :

root / Pi / C / OpenACC / Pi_OpenACC.c @ 187

Historique | Voir | Annoter | Télécharger (5,46 ko)

1
//
2
// Estimation of Pi using Monte Carlo exploration process
3
// Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
4
// Exploit OpenACC on Nvidia GPU
5
// module load 
6
// icpc -std=c99 -O3 -o Pi_OpenACC Pi_OpenACC.c -lm 
7
//
8

    
9
#include <math.h>
10
#include <stdio.h>
11
#include <stdlib.h>
12
#include <limits.h>
13
#include <openacc.h>
14
#include <sys/time.h>
15

    
16
// Marsaglia RNG very simple implementation
17
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
18
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
19
#define MWC   (znew+wnew)
20
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
21
#define CONG  (jcong=69069*jcong+1234567)
22
#define KISS  ((MWC^CONG)+SHR3)
23

    
24
#define MWCfp MWC * 2.328306435454494e-10f
25
#define KISSfp KISS * 2.328306435454494e-10f
26
#define SHR3fp SHR3 * 2.328306435454494e-10f
27
#define CONGfp CONG * 2.328306435454494e-10f
28

    
29
#define ITERATIONS 1000000000
30

    
31
#define PARALLELRATE 1024
32

    
33
#ifdef LONG
34
#define LENGTH long long
35
#else
36
#define LENGTH int
37
#endif
38

    
39
// LENGTH splitter(int,int,int,LENGTH);
40

    
41
#pragma acc routine
42
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
43
{
44
#if defined TCONG
45
   unsigned int jcong=seed_z;
46
#elif defined TSHR3
47
   unsigned int jsr=seed_w;
48
#elif defined TMWC
49
   unsigned int z=seed_z;
50
   unsigned int w=seed_w;
51
#elif defined TKISS
52
   unsigned int jcong=seed_z;
53
   unsigned int jsr=seed_w;
54
   unsigned int z=seed_z;
55
   unsigned int w=seed_w;
56
#endif
57
  
58
   LENGTH total=0;
59

    
60
   for (LENGTH i=0;i<iterations;i++) {
61

    
62
#if defined TINT32
63
    #define THEONE 1073741824
64
    #if defined TCONG
65
        unsigned int x=CONG>>17 ;
66
        unsigned int y=CONG>>17 ;
67
    #elif defined TSHR3
68
        unsigned int x=SHR3>>17 ;
69
        unsigned int y=SHR3>>17 ;
70
    #elif defined TMWC
71
        unsigned int x=MWC>>17 ;
72
        unsigned int y=MWC>>17 ;
73
    #elif defined TKISS
74
        unsigned int x=KISS>>17 ;
75
        unsigned int y=KISS>>17 ;
76
    #endif
77
#elif defined TINT64
78
    #define THEONE 4611686018427387904
79
    #if defined TCONG
80
        unsigned long x=(unsigned long)(CONG>>1) ;
81
        unsigned long y=(unsigned long)(CONG>>1) ;
82
    #elif defined TSHR3
83
        unsigned long x=(unsigned long)(SHR3>>1) ;
84
        unsigned long y=(unsigned long)(SHR3>>1) ;
85
    #elif defined TMWC
86
        unsigned long x=(unsigned long)(MWC>>1) ;
87
        unsigned long y=(unsigned long)(MWC>>1) ;
88
    #elif defined TKISS
89
        unsigned long x=(unsigned long)(KISS>>1) ;
90
        unsigned long y=(unsigned long)(KISS>>1) ;
91
    #endif
92
#elif defined TFP32
93
    #define THEONE 1.0f
94
    #if defined TCONG
95
        float x=CONGfp ;
96
        float y=CONGfp ;
97
    #elif defined TSHR3
98
        float x=SHR3fp ;
99
        float y=SHR3fp ;
100
    #elif defined TMWC
101
        float x=MWCfp ;
102
        float y=MWCfp ;
103
    #elif defined TKISS
104
      float x=KISSfp ;
105
      float y=KISSfp ;
106
    #endif
107
#elif defined TFP64
108
    #define THEONE 1.0f
109
    #if defined TCONG
110
        double x=(double)CONGfp ;
111
        double y=(double)CONGfp ;
112
    #elif defined TSHR3
113
        double x=(double)SHR3fp ;
114
        double y=(double)SHR3fp ;
115
    #elif defined TMWC
116
        double x=(double)MWCfp ;
117
        double y=(double)MWCfp ;
118
    #elif defined TKISS
119
        double x=(double)KISSfp ;
120
        double y=(double)KISSfp ;
121
    #endif
122
#endif
123

    
124
      // Matching test
125
      unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
126
      total+=inside;
127

    
128
   }
129

    
130
   return(total);
131
}
132

    
133
LENGTH splitter(LENGTH iterations,unsigned int seed_w,unsigned int seed_z,unsigned int ParallelRate) {
134

    
135
  LENGTH *inside,insides=0;
136
  int i;
137
  struct timeval tv1,tv2;
138
  struct timezone tz;
139
  LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
140

    
141
  inside=(LENGTH*)malloc(sizeof(LENGTH)*ParallelRate);
142
  
143
#if _OPENACC
144
  acc_init(acc_device_nvidia);
145
#endif
146

    
147
  gettimeofday(&tv1, &tz);
148
#pragma omp parallel for shared(ParallelRate,inside)
149
#pragma acc kernels loop
150
  for (int i=0 ; i<ParallelRate; i++) {
151
    inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
152
  }
153

    
154
  gettimeofday(&tv2, &tz);
155
  
156
  for (int i=0 ; i<ParallelRate; i++) {
157
    printf("\tFound %lld for case %i\n",(long long)inside[i],i);
158
    insides+=inside[i];
159
  }
160
  printf("\n");
161

    
162
  double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
163
                          (tv2.tv_usec-tv1.tv_usec))/1000000;
164

    
165
  double itops=(double)(ParallelRate*IterationsEach)/elapsed;
166
  
167
  printf("ParallelRate %i\nElapsed Time %.2f\nItops %.0f\n",ParallelRate,elapsed,itops);
168

    
169
  free(inside);
170
  return(insides);
171
}
172
 
173
int main(int argc, char *argv[]) {
174

    
175
  unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE;
176
  LENGTH iterations=ITERATIONS;
177
  LENGTH insides=0;
178

    
179
  if (argc > 1) {
180
    iterations=(LENGTH)atoll(argv[1]);
181
    ParallelRate=atoi(argv[2]);
182
  }
183
  else {
184
    printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
185
    printf("\t\t#1 : number of iterations (default 1 billion)\n");
186
    printf("\t\t#2 : Parallel Rate (default 1024)\n\n");
187
  }
188

    
189
  printf ("\n\tInformation about architecture:\n\n");
190

    
191
  printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
192
  printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
193
  printf ("\tSizeof long long = %lld bytes.\n\n", (long long)sizeof(long long));
194

    
195
  printf ("\tMax int = %u\n", INT_MAX);
196
  printf ("\tMax long = %ld\n", LONG_MAX);
197
  printf ("\tMax long long = %lld\n\n", LLONG_MAX);
198

    
199
  insides=splitter(iterations,seed_w,seed_z,ParallelRate);
200

    
201
  LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate;
202

    
203
  printf("Inside/Total %ld %ld\nPi estimation %f\n\n",insides,total,(4.*(float)insides/total));
204
   
205
}