Statistiques
| Révision :

root / Pi / C / OpenACC / Pi_OpenACC.c @ 284

Historique | Voir | Annoter | Télécharger (5,47 ko)

1
//
2
// Estimation of Pi using Monte Carlo exploration process
3
// Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
4
// Tested on GCC-10
5
// gcc -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35" Pi_OpenACC.c
6
//
7

    
8
#include <math.h>
9
#include <stdio.h>
10
#include <stdlib.h>
11
#include <limits.h>
12
#include <openacc.h>
13
#include <sys/time.h>
14

    
15
// Marsaglia RNG very simple implementation
16
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
17
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
18
#define MWC   (znew+wnew)
19
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
20
#define CONG  (jcong=69069*jcong+1234567)
21
#define KISS  ((MWC^CONG)+SHR3)
22

    
23
#define MWCfp MWC * 2.328306435454494e-10f
24
#define KISSfp KISS * 2.328306435454494e-10f
25
#define SHR3fp SHR3 * 2.328306435454494e-10f
26
#define CONGfp CONG * 2.328306435454494e-10f
27

    
28
#define ITERATIONS 1000000000
29

    
30
#define PARALLELRATE 1024
31

    
32
#ifdef LONG
33
#define LENGTH long long
34
#else
35
#define LENGTH int
36
#endif
37

    
38
// LENGTH splitter(int,int,int,LENGTH);
39

    
40
#pragma acc routine
41
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z)
42
{
43
#if defined TCONG
44
   unsigned int jcong=seed_z;
45
#elif defined TSHR3
46
   unsigned int jsr=seed_w;
47
#elif defined TMWC
48
   unsigned int z=seed_z;
49
   unsigned int w=seed_w;
50
#elif defined TKISS
51
   unsigned int jcong=seed_z;
52
   unsigned int jsr=seed_w;
53
   unsigned int z=seed_z;
54
   unsigned int w=seed_w;
55
#endif
56
  
57
   LENGTH total=0;
58

    
59
   for (LENGTH i=0;i<iterations;i++) {
60

    
61
#if defined TINT32
62
    #define THEONE 1073741824
63
    #if defined TCONG
64
        unsigned int x=CONG>>17 ;
65
        unsigned int y=CONG>>17 ;
66
    #elif defined TSHR3
67
        unsigned int x=SHR3>>17 ;
68
        unsigned int y=SHR3>>17 ;
69
    #elif defined TMWC
70
        unsigned int x=MWC>>17 ;
71
        unsigned int y=MWC>>17 ;
72
    #elif defined TKISS
73
        unsigned int x=KISS>>17 ;
74
        unsigned int y=KISS>>17 ;
75
    #endif
76
#elif defined TINT64
77
    #define THEONE 4611686018427387904
78
    #if defined TCONG
79
        unsigned long x=(unsigned long)(CONG>>1) ;
80
        unsigned long y=(unsigned long)(CONG>>1) ;
81
    #elif defined TSHR3
82
        unsigned long x=(unsigned long)(SHR3>>1) ;
83
        unsigned long y=(unsigned long)(SHR3>>1) ;
84
    #elif defined TMWC
85
        unsigned long x=(unsigned long)(MWC>>1) ;
86
        unsigned long y=(unsigned long)(MWC>>1) ;
87
    #elif defined TKISS
88
        unsigned long x=(unsigned long)(KISS>>1) ;
89
        unsigned long y=(unsigned long)(KISS>>1) ;
90
    #endif
91
#elif defined TFP32
92
    #define THEONE 1.0f
93
    #if defined TCONG
94
        float x=CONGfp ;
95
        float y=CONGfp ;
96
    #elif defined TSHR3
97
        float x=SHR3fp ;
98
        float y=SHR3fp ;
99
    #elif defined TMWC
100
        float x=MWCfp ;
101
        float y=MWCfp ;
102
    #elif defined TKISS
103
      float x=KISSfp ;
104
      float y=KISSfp ;
105
    #endif
106
#elif defined TFP64
107
    #define THEONE 1.0f
108
    #if defined TCONG
109
        double x=(double)CONGfp ;
110
        double y=(double)CONGfp ;
111
    #elif defined TSHR3
112
        double x=(double)SHR3fp ;
113
        double y=(double)SHR3fp ;
114
    #elif defined TMWC
115
        double x=(double)MWCfp ;
116
        double y=(double)MWCfp ;
117
    #elif defined TKISS
118
        double x=(double)KISSfp ;
119
        double y=(double)KISSfp ;
120
    #endif
121
#endif
122

    
123
      // Matching test
124
      unsigned long inside=((x*x+y*y) < THEONE) ? 1:0;
125
      total+=inside;
126

    
127
   }
128

    
129
   return(total);
130
}
131

    
132
LENGTH splitter(LENGTH iterations,unsigned int seed_w,unsigned int seed_z,unsigned int ParallelRate) {
133

    
134
  LENGTH *inside,insides=0;
135
  int i;
136
  struct timeval tv1,tv2;
137
  struct timezone tz;
138
  LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1;
139

    
140
  inside=(LENGTH*)malloc(sizeof(LENGTH)*ParallelRate);
141

    
142
  gettimeofday(&tv1, &tz);
143

    
144
#pragma acc kernels copy(inside[0:ParallelRate])
145
  {
146
    #pragma acc independant
147
    for (int i=0 ; i<ParallelRate; i++) {
148
      inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i);
149
    }
150
  }
151
  gettimeofday(&tv2, &tz);
152
  
153
  for (int i=0 ; i<ParallelRate; i++) {
154
    //    printf("\tFound %lld for case %i\n",(long long)inside[i],i);
155
    insides+=inside[i];
156
  }
157
  printf("\n");
158

    
159
  double elapsed=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
160
                          (tv2.tv_usec-tv1.tv_usec))/1000000;
161

    
162
  double itops=(double)(ParallelRate*IterationsEach)/elapsed;
163

    
164
  printf("Inside/Total %ld %ld\nParallelRate %i\nElapsed Time %.2f\nItops %.0f\nLogItops %.2f\n",insides,ParallelRate*IterationsEach,ParallelRate,elapsed,itops,log10(itops));
165

    
166
  free(inside);
167
  return(insides);
168
}
169
 
170
int main(int argc, char *argv[]) {
171

    
172
  unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE;
173
  LENGTH iterations=ITERATIONS;
174
  LENGTH insides=0;
175

    
176
  if (argc > 1) {
177
    iterations=(LENGTH)atoll(argv[1]);
178
    ParallelRate=atoi(argv[2]);
179
  }
180
  else {
181
    printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");
182
    printf("\t\t#1 : number of iterations (default 1 billion)\n");
183
    printf("\t\t#2 : Parallel Rate (default 1024)\n\n");
184
  }
185

    
186
  printf ("\n\tInformation about architecture:\n\n");
187

    
188
  printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int));
189
  printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long));
190
  printf ("\tSizeof long long = %lld bytes.\n\n", (long long)sizeof(long long));
191

    
192
  printf ("\tMax int = %u\n", INT_MAX);
193
  printf ("\tMax long = %ld\n", LONG_MAX);
194
  printf ("\tMax long long = %lld\n\n", LLONG_MAX);
195

    
196
  insides=splitter(iterations,seed_w,seed_z,ParallelRate);
197

    
198
  LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate;
199

    
200
  printf("Pi estimation %f\n\n",total,(4.*(float)insides/total));
201

    
202

    
203
}