root / ETSN / MySteps_openacc.c @ 289
Historique | Voir | Annoter | Télécharger (3,03 ko)
1 | 289 | equemene | /* Simple SillySum function in C and OpenMP/C */
|
---|---|---|---|
2 | 289 | equemene | /* compilation with : gcc -O3 -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35 -lm" -o MySteps_openacc MySteps_openacc.c -lm */
|
3 | 289 | equemene | |
4 | 289 | equemene | #include <math.h> |
5 | 289 | equemene | #include <stdio.h> |
6 | 289 | equemene | #include <stdlib.h> |
7 | 289 | equemene | #include <sys/time.h> |
8 | 289 | equemene | |
9 | 289 | equemene | #define PI 3.141592653589793 |
10 | 289 | equemene | |
11 | 289 | equemene | #define MYFLOAT float |
12 | 289 | equemene | |
13 | 289 | equemene | #pragma acc routine
|
14 | 289 | equemene | MYFLOAT MySillyFunction(MYFLOAT x) |
15 | 289 | equemene | { |
16 | 289 | equemene | return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); |
17 | 289 | equemene | } |
18 | 289 | equemene | |
19 | 289 | equemene | void MySillySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size) |
20 | 289 | equemene | { |
21 | 289 | equemene | for (uint i=0; i<size;i++) |
22 | 289 | equemene | { |
23 | 289 | equemene | MYFLOAT ai=a[i]; |
24 | 289 | equemene | MYFLOAT bi=b[i]; |
25 | 289 | equemene | |
26 | 289 | equemene | for (int c=0;c<calls;c++) |
27 | 289 | equemene | { |
28 | 289 | equemene | ai=MySillyFunction(ai); |
29 | 289 | equemene | bi=MySillyFunction(bi); |
30 | 289 | equemene | } |
31 | 289 | equemene | |
32 | 289 | equemene | res[i] = ai + bi; |
33 | 289 | equemene | } |
34 | 289 | equemene | } |
35 | 289 | equemene | |
36 | 289 | equemene | void MySillySumOpenACC(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size) |
37 | 289 | equemene | { |
38 | 289 | equemene | #pragma acc data copyin(a[0:size],b[0:size]),copyout(res[0:size]) |
39 | 289 | equemene | #pragma acc parallel loop
|
40 | 289 | equemene | for (uint i=0; i<size;i++) |
41 | 289 | equemene | { |
42 | 289 | equemene | MYFLOAT ai=a[i]; |
43 | 289 | equemene | MYFLOAT bi=b[i]; |
44 | 289 | equemene | |
45 | 289 | equemene | for (int c=0;c<calls;c++) |
46 | 289 | equemene | { |
47 | 289 | equemene | ai=MySillyFunction(ai); |
48 | 289 | equemene | bi=MySillyFunction(bi); |
49 | 289 | equemene | } |
50 | 289 | equemene | |
51 | 289 | equemene | res[i] = ai + bi; |
52 | 289 | equemene | } |
53 | 289 | equemene | } |
54 | 289 | equemene | |
55 | 289 | equemene | MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size)
|
56 | 289 | equemene | { |
57 | 289 | equemene | MYFLOAT norm=0.;
|
58 | 289 | equemene | |
59 | 289 | equemene | for (int i=0;i<size;i++) |
60 | 289 | equemene | { |
61 | 289 | equemene | norm+=pow(a[i]-b[i],2);
|
62 | 289 | equemene | } |
63 | 289 | equemene | |
64 | 289 | equemene | return(sqrt(norm));
|
65 | 289 | equemene | } |
66 | 289 | equemene | |
67 | 289 | equemene | void MyPrint(MYFLOAT *a,int size) |
68 | 289 | equemene | { |
69 | 289 | equemene | printf("[");
|
70 | 289 | equemene | for (int i=0;i<size;i++) |
71 | 289 | equemene | { |
72 | 289 | equemene | printf(" %.8e ",a[i]);
|
73 | 289 | equemene | } |
74 | 289 | equemene | printf("]\n");
|
75 | 289 | equemene | } |
76 | 289 | equemene | |
77 | 289 | equemene | int main(int argc,char *argv[]) |
78 | 289 | equemene | { |
79 | 289 | equemene | float *a,*b,*res,*resacc;
|
80 | 289 | equemene | int size=1024; |
81 | 289 | equemene | int calls=1; |
82 | 289 | equemene | struct timeval tv1,tv2;
|
83 | 289 | equemene | |
84 | 289 | equemene | if (argc > 1) { |
85 | 289 | equemene | size=(int)atoll(argv[1]); |
86 | 289 | equemene | calls=(int)atoll(argv[2]); |
87 | 289 | equemene | } |
88 | 289 | equemene | else {
|
89 | 289 | equemene | printf("\n\tPi : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\t\t#2 : calls (default 1)\n\n");
|
90 | 289 | equemene | } |
91 | 289 | equemene | |
92 | 289 | equemene | printf("%i %i\n",size,calls);
|
93 | 289 | equemene | |
94 | 289 | equemene | a=(float*)malloc(size*sizeof(MYFLOAT)); |
95 | 289 | equemene | b=(float*)malloc(size*sizeof(MYFLOAT)); |
96 | 289 | equemene | res=(float*)malloc(size*sizeof(MYFLOAT)); |
97 | 289 | equemene | resacc=(float*)malloc(size*sizeof(MYFLOAT)); |
98 | 289 | equemene | |
99 | 289 | equemene | srand(110271);
|
100 | 289 | equemene | |
101 | 289 | equemene | for (int i=0;i<size;i++) |
102 | 289 | equemene | { |
103 | 289 | equemene | a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
104 | 289 | equemene | b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
105 | 289 | equemene | res[i]=0.;
|
106 | 289 | equemene | resacc[i]=0.;
|
107 | 289 | equemene | } |
108 | 289 | equemene | |
109 | 289 | equemene | gettimeofday(&tv1, NULL);
|
110 | 289 | equemene | MySillySum(res,a,b,calls,size); |
111 | 289 | equemene | gettimeofday(&tv2, NULL);
|
112 | 289 | equemene | |
113 | 289 | equemene | MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
|
114 | 289 | equemene | (tv2.tv_usec-tv1.tv_usec))/1000000;
|
115 | 289 | equemene | |
116 | 289 | equemene | gettimeofday(&tv1, NULL);
|
117 | 289 | equemene | MySillySumOpenACC(resacc,a,b,calls,size); |
118 | 289 | equemene | gettimeofday(&tv2, NULL);
|
119 | 289 | equemene | |
120 | 289 | equemene | MYFLOAT elapsedAcc=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
|
121 | 289 | equemene | (tv2.tv_usec-tv1.tv_usec))/1000000;
|
122 | 289 | equemene | |
123 | 289 | equemene | MYFLOAT MyChecker=MyNorm(res,resacc,size); |
124 | 289 | equemene | printf("Norm: %.8e\n",MyChecker);
|
125 | 289 | equemene | |
126 | 289 | equemene | #ifdef VERBOSE
|
127 | 289 | equemene | MyPrint(res,size); |
128 | 289 | equemene | MyPrint(resacc,size); |
129 | 289 | equemene | #endif
|
130 | 289 | equemene | |
131 | 289 | equemene | printf("Elapsed Time: %.3f\n",elapsed);
|
132 | 289 | equemene | printf("OpenACC Elapsed Time: %.3f\n",elapsedAcc);
|
133 | 289 | equemene | |
134 | 289 | equemene | printf("NaiveRate: %.i\n",(int)((float)size/elapsed)); |
135 | 289 | equemene | printf("OpenACCRate: %.i\n",(int)((float)size/elapsedAcc)); |
136 | 289 | equemene | |
137 | 289 | equemene | printf("OpenACCRatio: %.3f\n",elapsed/elapsedAcc);
|
138 | 289 | equemene | |
139 | 289 | equemene | free(a); |
140 | 289 | equemene | free(b); |
141 | 289 | equemene | free(res); |
142 | 289 | equemene | free(resacc); |
143 | 289 | equemene | } |