1 |
|
/* Simple SillySum function in C and OpenMP/C */
|
2 |
|
/* compilation with : gcc -O3 -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35 -lm" -o MySteps_openacc MySteps_openacc.c -lm */
|
3 |
|
|
4 |
|
#include <math.h>
|
5 |
|
#include <stdio.h>
|
6 |
|
#include <stdlib.h>
|
7 |
|
#include <sys/time.h>
|
8 |
|
|
9 |
|
#define PI 3.141592653589793
|
10 |
|
|
11 |
|
#define MYFLOAT float
|
12 |
|
|
13 |
|
#pragma acc routine
|
14 |
|
MYFLOAT MySillyFunction(MYFLOAT x)
|
15 |
|
{
|
16 |
|
return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2));
|
17 |
|
}
|
18 |
|
|
19 |
|
void MySillySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size)
|
20 |
|
{
|
21 |
|
for (uint i=0; i<size;i++)
|
22 |
|
{
|
23 |
|
MYFLOAT ai=a[i];
|
24 |
|
MYFLOAT bi=b[i];
|
25 |
|
|
26 |
|
for (int c=0;c<calls;c++)
|
27 |
|
{
|
28 |
|
ai=MySillyFunction(ai);
|
29 |
|
bi=MySillyFunction(bi);
|
30 |
|
}
|
31 |
|
|
32 |
|
res[i] = ai + bi;
|
33 |
|
}
|
34 |
|
}
|
35 |
|
|
36 |
|
void MySillySumOpenACC(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size)
|
37 |
|
{
|
38 |
|
#pragma acc data copyin(a[0:size],b[0:size]),copyout(res[0:size])
|
39 |
|
#pragma acc parallel loop
|
40 |
|
for (uint i=0; i<size;i++)
|
41 |
|
{
|
42 |
|
MYFLOAT ai=a[i];
|
43 |
|
MYFLOAT bi=b[i];
|
44 |
|
|
45 |
|
for (int c=0;c<calls;c++)
|
46 |
|
{
|
47 |
|
ai=MySillyFunction(ai);
|
48 |
|
bi=MySillyFunction(bi);
|
49 |
|
}
|
50 |
|
|
51 |
|
res[i] = ai + bi;
|
52 |
|
}
|
53 |
|
}
|
54 |
|
|
55 |
|
MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size)
|
56 |
|
{
|
57 |
|
MYFLOAT norm=0.;
|
58 |
|
|
59 |
|
for (int i=0;i<size;i++)
|
60 |
|
{
|
61 |
|
norm+=pow(a[i]-b[i],2);
|
62 |
|
}
|
63 |
|
|
64 |
|
return(sqrt(norm));
|
65 |
|
}
|
66 |
|
|
67 |
|
void MyPrint(MYFLOAT *a,int size)
|
68 |
|
{
|
69 |
|
printf("[");
|
70 |
|
for (int i=0;i<size;i++)
|
71 |
|
{
|
72 |
|
printf(" %.8e ",a[i]);
|
73 |
|
}
|
74 |
|
printf("]\n");
|
75 |
|
}
|
76 |
|
|
77 |
|
int main(int argc,char *argv[])
|
78 |
|
{
|
79 |
|
float *a,*b,*res,*resacc;
|
80 |
|
int size=1024;
|
81 |
|
int calls=1;
|
82 |
|
struct timeval tv1,tv2;
|
83 |
|
|
84 |
|
if (argc > 1) {
|
85 |
|
size=(int)atoll(argv[1]);
|
86 |
|
calls=(int)atoll(argv[2]);
|
87 |
|
}
|
88 |
|
else {
|
89 |
|
printf("\n\tPi : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\t\t#2 : calls (default 1)\n\n");
|
90 |
|
}
|
91 |
|
|
92 |
|
printf("%i %i\n",size,calls);
|
93 |
|
|
94 |
|
a=(float*)malloc(size*sizeof(MYFLOAT));
|
95 |
|
b=(float*)malloc(size*sizeof(MYFLOAT));
|
96 |
|
res=(float*)malloc(size*sizeof(MYFLOAT));
|
97 |
|
resacc=(float*)malloc(size*sizeof(MYFLOAT));
|
98 |
|
|
99 |
|
srand(110271);
|
100 |
|
|
101 |
|
for (int i=0;i<size;i++)
|
102 |
|
{
|
103 |
|
a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
|
104 |
|
b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
|
105 |
|
res[i]=0.;
|
106 |
|
resacc[i]=0.;
|
107 |
|
}
|
108 |
|
|
109 |
|
gettimeofday(&tv1, NULL);
|
110 |
|
MySillySum(res,a,b,calls,size);
|
111 |
|
gettimeofday(&tv2, NULL);
|
112 |
|
|
113 |
|
MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
|
114 |
|
(tv2.tv_usec-tv1.tv_usec))/1000000;
|
115 |
|
|
116 |
|
gettimeofday(&tv1, NULL);
|
117 |
|
MySillySumOpenACC(resacc,a,b,calls,size);
|
118 |
|
gettimeofday(&tv2, NULL);
|
119 |
|
|
120 |
|
MYFLOAT elapsedAcc=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
|
121 |
|
(tv2.tv_usec-tv1.tv_usec))/1000000;
|
122 |
|
|
123 |
|
MYFLOAT MyChecker=MyNorm(res,resacc,size);
|
124 |
|
printf("Norm: %.8e\n",MyChecker);
|
125 |
|
|
126 |
|
#ifdef VERBOSE
|
127 |
|
MyPrint(res,size);
|
128 |
|
MyPrint(resacc,size);
|
129 |
|
#endif
|
130 |
|
|
131 |
|
printf("Elapsed Time: %.3f\n",elapsed);
|
132 |
|
printf("OpenACC Elapsed Time: %.3f\n",elapsedAcc);
|
133 |
|
|
134 |
|
printf("NaiveRate: %.i\n",(int)((float)size/elapsed));
|
135 |
|
printf("OpenACCRate: %.i\n",(int)((float)size/elapsedAcc));
|
136 |
|
|
137 |
|
printf("OpenACCRatio: %.3f\n",elapsed/elapsedAcc);
|
138 |
|
|
139 |
|
free(a);
|
140 |
|
free(b);
|
141 |
|
free(res);
|
142 |
|
free(resacc);
|
143 |
|
}
|
144 |
|
|