Statistics
| Revision:

root / Pi / C / OpenACC / Pi_OpenACC.c @ 183

History | View | Annotate | Download (5.4 kB)

1
//
2
// Estimation of Pi using Monte Carlo exploration process
3
// Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
4
// Exploit OpenACC on Nvidia GPU
5
// module load 
6
// icpc -std=c99 -O3 -o Pi_OpenACC Pi_OpenACC.c -lm 
7
//
8

    
9
#include <math.h>
10
#include <stdio.h>
11
#include <stdlib.h>
12
#include <limits.h>
13
#include <openacc.h>
14
#include <sys/time.h>
15

    
16
// Marsaglia RNG very simple implementation
17
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
18
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
19
#define MWC   (znew+wnew)
20
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
21
#define CONG  (jcong=69069*jcong+1234567)
22
#define KISS  ((MWC^CONG)+SHR3)
23

    
24
#define MWCfp MWC * 2.328306435454494e-10f
25
#define KISSfp KISS * 2.328306435454494e-10f
26
#define SHR3fp SHR3 * 2.328306435454494e-10f
27
#define CONGfp CONG * 2.328306435454494e-10f
28

    
29
#define ITERATIONS 1000000000
30

    
31
#define PARALLELRATE 1024
32

    
33
#ifdef LONG
34
#define LENGTH long long
35
#else
36
#define LENGTH int
37
#endif
38

    
39
// LENGTH splitter(int,int,int,LENGTH);
40

    
41
#pragma acc routine
42
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
43
{
44
#if defined TCONG
45
   unsigned int jcong=seed_z;
46
#elif defined TSHR3
47
   unsigned int jsr=seed_w;
48
#elif defined TMWC
49
   unsigned int z=seed_z;
50
   unsigned int w=seed_w;
51
#elif defined TKISS
52
   unsigned int jcong=seed_z;
53
   unsigned int jsr=seed_w;
54
   unsigned int z=seed_z;
55
   unsigned int w=seed_w;
56
#endif
57
  
58
   LENGTH total=0;
59

    
60
   for (LENGTH i=0;i<iterations;i++) {
61

    
62
#if defined TINT32
63
    #define THEONE 1073741824
64
    #if defined TCONG
65
        unsigned int x=CONG>>17 ;
66
        unsigned int y=CONG>>17 ;
67
    #elif defined TSHR3
68
        unsigned int x=SHR3>>17 ;
69
        unsigned int y=SHR3>>17 ;
70
    #elif defined TMWC
71
        unsigned int x=MWC>>17 ;
72
        unsigned int y=MWC>>17 ;
73
    #elif defined TKISS
74
        unsigned int x=KISS>>17 ;
75
        unsigned int y=KISS>>17 ;
76
    #endif
77
#elif defined TINT64
78
    #define THEONE 4611686018427387904
79
    #if defined TCONG
80
        unsigned long x=(unsigned long)(CONG>>1) ;
81
        unsigned long y=(unsigned long)(CONG>>1) ;
82
    #elif defined TSHR3
83
        unsigned long x=(unsigned long)(SHR3>>1) ;
84
        unsigned long y=(unsigned long)(SHR3>>1) ;
85
    #elif defined TMWC
86
        unsigned long x=(unsigned long)(MWC>>1) ;
87
        unsigned long y=(unsigned long)(MWC>>1) ;
88
    #elif defined TKISS
89
        unsigned long x=(unsigned long)(KISS>>1) ;
90
        unsigned long y=(unsigned long)(KISS>>1) ;
91
    #endif
92
#elif defined TFP32
93
    #define THEONE 1.0f
94
    #if defined TCONG
95
        float x=CONGfp ;
96
        float y=CONGfp ;
97
    #elif defined TSHR3
98
        float x=SHR3fp ;
99
        float y=SHR3fp ;
100
    #elif defined TMWC
101
        float x=MWCfp ;
102
        float y=MWCfp ;
103
    #elif defined TKISS
104
      float x=KISSfp ;
105
      float y=KISSfp ;
106
    #endif
107
#elif defined TFP64
108
    #define THEONE 1.0f
109
    #if defined TCONG
110
        double x=(double)CONGfp ;
111
        double y=(double)CONGfp ;
112
    #elif defined TSHR3
113
        double x=(double)SHR3fp ;
114
        double y=(double)SHR3fp ;
115
    #elif defined TMWC
116
        double x=(double)MWCfp ;
117
        double y=(double)MWCfp ;
118
    #elif defined TKISS
119
        double x=(double)KISSfp ;
120
        double y=(double)KISSfp ;
121
    #endif
122
#endif
123

    
124
      // Matching test
125
      unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
126
      total+=inside;
127

    
128
   }
129

    
130
   return(total);
131
}
132

    
133
LENGTH splitter(LENGTH iterations,int seed_w,int seed_z,int ParallelRate) {
134

    
135
  LENGTH inside[1048576],insides=0;
136
  int i;
137
  struct timeval tv1,tv2;
138
  struct timezone tz;
139
  LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
140
  
141
#if _OPENACC
142
  acc_init(acc_device_nvidia);
143
#endif
144

    
145
  gettimeofday(&tv1, &tz);
146
#pragma omp parallel for shared(ParallelRate,inside)
147
#pragma acc kernels loop
148
  for (int i=0 ; i<ParallelRate; i++) {
149
    inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
150
  }
151

    
152
  gettimeofday(&tv2, &tz);
153
  
154
  for (int i=0 ; i<ParallelRate; i++) {
155
    printf("\tFound %lld for case %i\n",(long long)inside[i],i);
156
    insides+=inside[i];
157
  }
158
  printf("\n");
159

    
160
  double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
161
                          (tv2.tv_usec-tv1.tv_usec))/1000000;
162

    
163
  double itops=(double)(ParallelRate*IterationsEach)/elapsed;
164
  
165
  printf("ParallelRate %i\nElapsed Time %.2f\nItops %.0f\n",ParallelRate,elapsed,itops);
166
  
167
  return(insides);
168
}
169
 
170
int main(int argc, char *argv[]) {
171

    
172
  unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE;
173
  LENGTH iterations=ITERATIONS;
174
  LENGTH insides=0;
175

    
176
  if (argc > 1) {
177
    iterations=(LENGTH)atoll(argv[1]);
178
    ParallelRate=atoi(argv[2]);
179
  }
180
  else {
181
    printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
182
    printf("\t\t#1 : number of iterations (default 1 billion)\n");
183
    printf("\t\t#2 : Parallel Rate (default 1024)\n\n");
184
  }
185

    
186
  printf ("\n\tInformation about architecture:\n\n");
187

    
188
  printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
189
  printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
190
  printf ("\tSizeof long long = %lld bytes.\n\n", (long long)sizeof(long long));
191

    
192
  printf ("\tMax int = %u\n", INT_MAX);
193
  printf ("\tMax long = %ld\n", LONG_MAX);
194
  printf ("\tMax long long = %lld\n\n", LLONG_MAX);
195

    
196
  insides=splitter(iterations,seed_w,seed_z,ParallelRate);
197

    
198
  LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate;
199

    
200
  printf("Inside/Total %ld %ld\nPi estimation %f\n\n",insides,total,(4.*(float)insides/total));
201
   
202
}