root / Pi / C / OpenACC / Pi_OpenACC.c @ 183
History  View  Annotate  Download (5.4 kB)
1 
//


2 
// Estimation of Pi using Monte Carlo exploration process

3 
// Cecill v2 Emmanuel QUEMENER <emmanuel.quemener@gmail.com>

4 
// Exploit OpenACC on Nvidia GPU

5 
// module load

6 
// icpc std=c99 O3 o Pi_OpenACC Pi_OpenACC.c lm

7 
//

8  
9 
#include <math.h> 
10 
#include <stdio.h> 
11 
#include <stdlib.h> 
12 
#include <limits.h> 
13 
#include <openacc.h> 
14 
#include <sys/time.h> 
15  
16 
// Marsaglia RNG very simple implementation

17 
#define znew ((z=36969*(z&65535)+(z>>16))<<16) 
18 
#define wnew ((w=18000*(w&65535)+(w>>16))&65535) 
19 
#define MWC (znew+wnew)

20 
#define SHR3 (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5)) 
21 
#define CONG (jcong=69069*jcong+1234567) 
22 
#define KISS ((MWC^CONG)+SHR3)

23  
24 
#define MWCfp MWC * 2.328306435454494e10f 
25 
#define KISSfp KISS * 2.328306435454494e10f 
26 
#define SHR3fp SHR3 * 2.328306435454494e10f 
27 
#define CONGfp CONG * 2.328306435454494e10f 
28  
29 
#define ITERATIONS 1000000000 
30  
31 
#define PARALLELRATE 1024 
32  
33 
#ifdef LONG

34 
#define LENGTH long long 
35 
#else

36 
#define LENGTH int 
37 
#endif

38  
39 
// LENGTH splitter(int,int,int,LENGTH);

40  
41 
#pragma acc routine

42 
LENGTH MainLoopGlobal(LENGTH iterations,unsigned int seed_w,unsigned int seed_z) 
43 
{ 
44 
#if defined TCONG

45 
unsigned int jcong=seed_z; 
46 
#elif defined TSHR3

47 
unsigned int jsr=seed_w; 
48 
#elif defined TMWC

49 
unsigned int z=seed_z; 
50 
unsigned int w=seed_w; 
51 
#elif defined TKISS

52 
unsigned int jcong=seed_z; 
53 
unsigned int jsr=seed_w; 
54 
unsigned int z=seed_z; 
55 
unsigned int w=seed_w; 
56 
#endif

57 

58 
LENGTH total=0;

59  
60 
for (LENGTH i=0;i<iterations;i++) { 
61  
62 
#if defined TINT32

63 
#define THEONE 1073741824 
64 
#if defined TCONG

65 
unsigned int x=CONG>>17 ; 
66 
unsigned int y=CONG>>17 ; 
67 
#elif defined TSHR3

68 
unsigned int x=SHR3>>17 ; 
69 
unsigned int y=SHR3>>17 ; 
70 
#elif defined TMWC

71 
unsigned int x=MWC>>17 ; 
72 
unsigned int y=MWC>>17 ; 
73 
#elif defined TKISS

74 
unsigned int x=KISS>>17 ; 
75 
unsigned int y=KISS>>17 ; 
76 
#endif

77 
#elif defined TINT64

78 
#define THEONE 4611686018427387904 
79 
#if defined TCONG

80 
unsigned long x=(unsigned long)(CONG>>1) ; 
81 
unsigned long y=(unsigned long)(CONG>>1) ; 
82 
#elif defined TSHR3

83 
unsigned long x=(unsigned long)(SHR3>>1) ; 
84 
unsigned long y=(unsigned long)(SHR3>>1) ; 
85 
#elif defined TMWC

86 
unsigned long x=(unsigned long)(MWC>>1) ; 
87 
unsigned long y=(unsigned long)(MWC>>1) ; 
88 
#elif defined TKISS

89 
unsigned long x=(unsigned long)(KISS>>1) ; 
90 
unsigned long y=(unsigned long)(KISS>>1) ; 
91 
#endif

92 
#elif defined TFP32

93 
#define THEONE 1.0f 
94 
#if defined TCONG

95 
float x=CONGfp ;

96 
float y=CONGfp ;

97 
#elif defined TSHR3

98 
float x=SHR3fp ;

99 
float y=SHR3fp ;

100 
#elif defined TMWC

101 
float x=MWCfp ;

102 
float y=MWCfp ;

103 
#elif defined TKISS

104 
float x=KISSfp ;

105 
float y=KISSfp ;

106 
#endif

107 
#elif defined TFP64

108 
#define THEONE 1.0f 
109 
#if defined TCONG

110 
double x=(double)CONGfp ; 
111 
double y=(double)CONGfp ; 
112 
#elif defined TSHR3

113 
double x=(double)SHR3fp ; 
114 
double y=(double)SHR3fp ; 
115 
#elif defined TMWC

116 
double x=(double)MWCfp ; 
117 
double y=(double)MWCfp ; 
118 
#elif defined TKISS

119 
double x=(double)KISSfp ; 
120 
double y=(double)KISSfp ; 
121 
#endif

122 
#endif

123  
124 
// Matching test

125 
unsigned long inside=((x*x+y*y) < THEONE) ? 1:0; 
126 
total+=inside; 
127  
128 
} 
129  
130 
return(total);

131 
} 
132  
133 
LENGTH splitter(LENGTH iterations,int seed_w,int seed_z,int ParallelRate) { 
134  
135 
LENGTH inside[1048576],insides=0; 
136 
int i;

137 
struct timeval tv1,tv2;

138 
struct timezone tz;

139 
LENGTH IterationsEach=((iterations%ParallelRate)==0)?iterations/ParallelRate:iterations/ParallelRate+1; 
140 

141 
#if _OPENACC

142 
acc_init(acc_device_nvidia); 
143 
#endif

144  
145 
gettimeofday(&tv1, &tz); 
146 
#pragma omp parallel for shared(ParallelRate,inside) 
147 
#pragma acc kernels loop

148 
for (int i=0 ; i<ParallelRate; i++) { 
149 
inside[i]=MainLoopGlobal(IterationsEach,seed_w+i,seed_z+i); 
150 
} 
151  
152 
gettimeofday(&tv2, &tz); 
153 

154 
for (int i=0 ; i<ParallelRate; i++) { 
155 
printf("\tFound %lld for case %i\n",(long long)inside[i],i); 
156 
insides+=inside[i]; 
157 
} 
158 
printf("\n");

159  
160 
double elapsed=(double)((tv2.tv_sectv1.tv_sec) * 1000000L + 
161 
(tv2.tv_usectv1.tv_usec))/1000000;

162  
163 
double itops=(double)(ParallelRate*IterationsEach)/elapsed; 
164 

165 
printf("ParallelRate %i\nElapsed Time %.2f\nItops %.0f\n",ParallelRate,elapsed,itops);

166 

167 
return(insides);

168 
} 
169 

170 
int main(int argc, char *argv[]) { 
171  
172 
unsigned int seed_w=110271,seed_z=101008,ParallelRate=PARALLELRATE; 
173 
LENGTH iterations=ITERATIONS; 
174 
LENGTH insides=0;

175  
176 
if (argc > 1) { 
177 
iterations=(LENGTH)atoll(argv[1]);

178 
ParallelRate=atoi(argv[2]);

179 
} 
180 
else {

181 
printf("\n\tPi : Estimate Pi with Monte Carlo exploration\n\n");

182 
printf("\t\t#1 : number of iterations (default 1 billion)\n");

183 
printf("\t\t#2 : Parallel Rate (default 1024)\n\n");

184 
} 
185  
186 
printf ("\n\tInformation about architecture:\n\n");

187  
188 
printf ("\tSizeof int = %lld bytes.\n", (long long)sizeof(int)); 
189 
printf ("\tSizeof long = %lld bytes.\n", (long long)sizeof(long)); 
190 
printf ("\tSizeof long long = %lld bytes.\n\n", (long long)sizeof(long long)); 
191  
192 
printf ("\tMax int = %u\n", INT_MAX);

193 
printf ("\tMax long = %ld\n", LONG_MAX);

194 
printf ("\tMax long long = %lld\n\n", LLONG_MAX);

195  
196 
insides=splitter(iterations,seed_w,seed_z,ParallelRate); 
197  
198 
LENGTH total=((iterations%ParallelRate)==0)?iterations:(iterations/ParallelRate+1)*ParallelRate; 
199  
200 
printf("Inside/Total %ld %ld\nPi estimation %f\n\n",insides,total,(4.*(float)insides/total)); 
201 

202 
} 