Révision 296
ETSN/MySteps.c (revision 296) | ||
---|---|---|
1 |
/* Simple SillySum function in C */ |
|
2 |
/* compilation with : gcc -O3 -o MySteps MySteps.c -lm */ |
|
3 |
|
|
4 |
#include <math.h> |
|
5 |
#include <stdio.h> |
|
6 |
#include <stdlib.h> |
|
7 |
#include <sys/time.h> |
|
8 |
|
|
9 |
#define PI 3.141592653589793 |
|
10 |
|
|
11 |
#define MYFLOAT float |
|
12 |
|
|
13 |
MYFLOAT MySillyFunction(MYFLOAT x) |
|
14 |
{ |
|
15 |
return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); |
|
16 |
} |
|
17 |
|
|
18 |
void MySillySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size) |
|
19 |
{ |
|
20 |
for (uint i=0; i<size;i++) |
|
21 |
{ |
|
22 |
MYFLOAT ai=a[i]; |
|
23 |
MYFLOAT bi=b[i]; |
|
24 |
|
|
25 |
for (int c=0;c<calls;c++) |
|
26 |
{ |
|
27 |
ai=MySillyFunction(ai); |
|
28 |
bi=MySillyFunction(bi); |
|
29 |
} |
|
30 |
|
|
31 |
res[i] = ai + bi; |
|
32 |
} |
|
33 |
} |
|
34 |
|
|
35 |
void MySillySumOMP(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size) |
|
36 |
{ |
|
37 |
#pragma omp parallel for |
|
38 |
for (uint i=0; i<size;i++) |
|
39 |
{ |
|
40 |
MYFLOAT ai=a[i]; |
|
41 |
MYFLOAT bi=b[i]; |
|
42 |
|
|
43 |
for (int c=0;c<calls;c++) |
|
44 |
{ |
|
45 |
ai=MySillyFunction(ai); |
|
46 |
bi=MySillyFunction(bi); |
|
47 |
} |
|
48 |
|
|
49 |
res[i] = ai + bi; |
|
50 |
} |
|
51 |
} |
|
52 |
|
|
53 |
MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size) |
|
54 |
{ |
|
55 |
MYFLOAT norm=0.; |
|
56 |
|
|
57 |
for (int i=0;i<size;i++) |
|
58 |
{ |
|
59 |
norm+=pow(a[i]-b[i],2); |
|
60 |
} |
|
61 |
|
|
62 |
return(sqrt(norm)); |
|
63 |
} |
|
64 |
|
|
65 |
void MyPrint(MYFLOAT *a,int size) |
|
66 |
{ |
|
67 |
printf("["); |
|
68 |
for (int i=0;i<size;i++) |
|
69 |
{ |
|
70 |
printf(" %.8e ",a[i]); |
|
71 |
} |
|
72 |
printf("]\n"); |
|
73 |
} |
|
74 |
|
|
75 |
int main(int argc,char *argv[]) |
|
76 |
{ |
|
77 |
float *a,*b,*res,*resacc; |
|
78 |
int size=1024; |
|
79 |
int calls=1; |
|
80 |
struct timeval tv1,tv2; |
|
81 |
|
|
82 |
if (argc > 1) { |
|
83 |
size=(int)atoll(argv[1]); |
|
84 |
calls=(int)atoll(argv[2]); |
|
85 |
} |
|
86 |
else { |
|
87 |
printf("\n\tPi : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\t\t#2 : calls (default 1)\n\n"); |
|
88 |
} |
|
89 |
|
|
90 |
printf("%i %i\n",size,calls); |
|
91 |
|
|
92 |
a=(float*)malloc(size*sizeof(MYFLOAT)); |
|
93 |
b=(float*)malloc(size*sizeof(MYFLOAT)); |
|
94 |
res=(float*)malloc(size*sizeof(MYFLOAT)); |
|
95 |
resacc=(float*)malloc(size*sizeof(MYFLOAT)); |
|
96 |
|
|
97 |
srand(110271); |
|
98 |
|
|
99 |
for (int i=0;i<size;i++) |
|
100 |
{ |
|
101 |
a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
102 |
b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
103 |
res[i]=0.; |
|
104 |
resacc[i]=0.; |
|
105 |
} |
|
106 |
|
|
107 |
gettimeofday(&tv1, NULL); |
|
108 |
MySillySum(res,a,b,calls,size); |
|
109 |
gettimeofday(&tv2, NULL); |
|
110 |
|
|
111 |
MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
112 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
113 |
|
|
114 |
#ifdef VERBOSE |
|
115 |
MyPrint(res,size); |
|
116 |
MyPrint(resacc,size); |
|
117 |
#endif |
|
118 |
|
|
119 |
printf("Elapsed Time: %.3f\n",elapsed); |
|
120 |
|
|
121 |
printf("NaiveRate: %.i\n",(int)((float)size/elapsed)); |
|
122 |
|
|
123 |
free(a); |
|
124 |
free(b); |
|
125 |
free(res); |
|
126 |
free(resacc); |
|
127 |
} |
|
128 |
|
ETSN/MySteps_openmp.c (revision 296) | ||
---|---|---|
1 |
/* Simple SillySum function in C and OpenMP/C */ |
|
2 |
/* compilation with : gcc -fopenmp -O3 -o MySteps_openmp MySteps_openmp.c -lm -lgomp */ |
|
3 |
|
|
4 |
#include <math.h> |
|
5 |
#include <stdio.h> |
|
6 |
#include <stdlib.h> |
|
7 |
#include <sys/time.h> |
|
8 |
|
|
9 |
#define PI 3.141592653589793 |
|
10 |
|
|
11 |
#define MYFLOAT float |
|
12 |
|
|
13 |
MYFLOAT MySillyFunction(MYFLOAT x) |
|
14 |
{ |
|
15 |
return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); |
|
16 |
} |
|
17 |
|
|
18 |
void MySillySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size) |
|
19 |
{ |
|
20 |
for (uint i=0; i<size;i++) |
|
21 |
{ |
|
22 |
MYFLOAT ai=a[i]; |
|
23 |
MYFLOAT bi=b[i]; |
|
24 |
|
|
25 |
for (int c=0;c<calls;c++) |
|
26 |
{ |
|
27 |
ai=MySillyFunction(ai); |
|
28 |
bi=MySillyFunction(bi); |
|
29 |
} |
|
30 |
|
|
31 |
res[i] = ai + bi; |
|
32 |
} |
|
33 |
} |
|
34 |
|
|
35 |
void MySillySumOMP(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size) |
|
36 |
{ |
|
37 |
#pragma omp parallel for |
|
38 |
for (uint i=0; i<size;i++) |
|
39 |
{ |
|
40 |
MYFLOAT ai=a[i]; |
|
41 |
MYFLOAT bi=b[i]; |
|
42 |
|
|
43 |
for (int c=0;c<calls;c++) |
|
44 |
{ |
|
45 |
ai=MySillyFunction(ai); |
|
46 |
bi=MySillyFunction(bi); |
|
47 |
} |
|
48 |
|
|
49 |
res[i] = ai + bi; |
|
50 |
} |
|
51 |
} |
|
52 |
|
|
53 |
MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size) |
|
54 |
{ |
|
55 |
MYFLOAT norm=0.; |
|
56 |
|
|
57 |
for (int i=0;i<size;i++) |
|
58 |
{ |
|
59 |
norm+=pow(a[i]-b[i],2); |
|
60 |
} |
|
61 |
|
|
62 |
return(sqrt(norm)); |
|
63 |
} |
|
64 |
|
|
65 |
void MyPrint(MYFLOAT *a,int size) |
|
66 |
{ |
|
67 |
printf("["); |
|
68 |
for (int i=0;i<size;i++) |
|
69 |
{ |
|
70 |
printf(" %.8e ",a[i]); |
|
71 |
} |
|
72 |
printf("]\n"); |
|
73 |
} |
|
74 |
|
|
75 |
int main(int argc,char *argv[]) |
|
76 |
{ |
|
77 |
float *a,*b,*res,*resacc; |
|
78 |
int size=1024; |
|
79 |
int calls=1; |
|
80 |
struct timeval tv1,tv2; |
|
81 |
|
|
82 |
if (argc > 1) { |
|
83 |
size=(int)atoll(argv[1]); |
|
84 |
calls=(int)atoll(argv[2]); |
|
85 |
} |
|
86 |
else { |
|
87 |
printf("\n\tPi : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\t\t#2 : calls (default 1)\n\n"); |
|
88 |
} |
|
89 |
|
|
90 |
printf("%i %i\n",size,calls); |
|
91 |
|
|
92 |
a=(float*)malloc(size*sizeof(MYFLOAT)); |
|
93 |
b=(float*)malloc(size*sizeof(MYFLOAT)); |
|
94 |
res=(float*)malloc(size*sizeof(MYFLOAT)); |
|
95 |
resacc=(float*)malloc(size*sizeof(MYFLOAT)); |
|
96 |
|
|
97 |
srand(110271); |
|
98 |
|
|
99 |
for (int i=0;i<size;i++) |
|
100 |
{ |
|
101 |
a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
102 |
b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
103 |
res[i]=0.; |
|
104 |
resacc[i]=0.; |
|
105 |
} |
|
106 |
|
|
107 |
gettimeofday(&tv1, NULL); |
|
108 |
MySillySum(res,a,b,calls,size); |
|
109 |
gettimeofday(&tv2, NULL); |
|
110 |
|
|
111 |
MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
112 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
113 |
|
|
114 |
gettimeofday(&tv1, NULL); |
|
115 |
MySillySumOMP(resacc,a,b,calls,size); |
|
116 |
gettimeofday(&tv2, NULL); |
|
117 |
|
|
118 |
MYFLOAT elapsedAcc=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
119 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
120 |
|
|
121 |
MYFLOAT MyChecker=MyNorm(res,resacc,size); |
|
122 |
printf("Norm: %.8e\n",MyChecker); |
|
123 |
|
|
124 |
#ifdef VERBOSE |
|
125 |
MyPrint(res,size); |
|
126 |
MyPrint(resacc,size); |
|
127 |
#endif |
|
128 |
|
|
129 |
printf("Elapsed Time: %.3f\n",elapsed); |
|
130 |
printf("OMP Elapsed Time: %.3f\n",elapsedAcc); |
|
131 |
|
|
132 |
printf("NaiveRate: %.i\n",(int)((float)size/elapsed)); |
|
133 |
printf("OMPRate: %.i\n",(int)((float)size/elapsedAcc)); |
|
134 |
|
|
135 |
printf("AccRatio: %.3f\n",elapsed/elapsedAcc); |
|
136 |
|
|
137 |
free(a); |
|
138 |
free(b); |
|
139 |
free(res); |
|
140 |
free(resacc); |
|
141 |
} |
|
142 |
|
ETSN/MySteps_openacc.c (revision 296) | ||
---|---|---|
1 |
/* Simple SillySum function in C and OpenMP/C */ |
|
2 |
/* compilation with : gcc -O3 -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35 -lm" -o MySteps_openacc MySteps_openacc.c -lm */ |
|
3 |
|
|
4 |
#include <math.h> |
|
5 |
#include <stdio.h> |
|
6 |
#include <stdlib.h> |
|
7 |
#include <sys/time.h> |
|
8 |
|
|
9 |
#define PI 3.141592653589793 |
|
10 |
|
|
11 |
#define MYFLOAT float |
|
12 |
|
|
13 |
#pragma acc routine |
|
14 |
MYFLOAT MySillyFunction(MYFLOAT x) |
|
15 |
{ |
|
16 |
return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); |
|
17 |
} |
|
18 |
|
|
19 |
void MySillySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size) |
|
20 |
{ |
|
21 |
for (uint i=0; i<size;i++) |
|
22 |
{ |
|
23 |
MYFLOAT ai=a[i]; |
|
24 |
MYFLOAT bi=b[i]; |
|
25 |
|
|
26 |
for (int c=0;c<calls;c++) |
|
27 |
{ |
|
28 |
ai=MySillyFunction(ai); |
|
29 |
bi=MySillyFunction(bi); |
|
30 |
} |
|
31 |
|
|
32 |
res[i] = ai + bi; |
|
33 |
} |
|
34 |
} |
|
35 |
|
|
36 |
void MySillySumOpenACC(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size) |
|
37 |
{ |
|
38 |
#pragma acc data copyin(a[0:size],b[0:size]),copyout(res[0:size]) |
|
39 |
#pragma acc parallel loop |
|
40 |
for (uint i=0; i<size;i++) |
|
41 |
{ |
|
42 |
MYFLOAT ai=a[i]; |
|
43 |
MYFLOAT bi=b[i]; |
|
44 |
|
|
45 |
for (int c=0;c<calls;c++) |
|
46 |
{ |
|
47 |
ai=MySillyFunction(ai); |
|
48 |
bi=MySillyFunction(bi); |
|
49 |
} |
|
50 |
|
|
51 |
res[i] = ai + bi; |
|
52 |
} |
|
53 |
} |
|
54 |
|
|
55 |
MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size) |
|
56 |
{ |
|
57 |
MYFLOAT norm=0.; |
|
58 |
|
|
59 |
for (int i=0;i<size;i++) |
|
60 |
{ |
|
61 |
norm+=pow(a[i]-b[i],2); |
|
62 |
} |
|
63 |
|
|
64 |
return(sqrt(norm)); |
|
65 |
} |
|
66 |
|
|
67 |
void MyPrint(MYFLOAT *a,int size) |
|
68 |
{ |
|
69 |
printf("["); |
|
70 |
for (int i=0;i<size;i++) |
|
71 |
{ |
|
72 |
printf(" %.8e ",a[i]); |
|
73 |
} |
|
74 |
printf("]\n"); |
|
75 |
} |
|
76 |
|
|
77 |
int main(int argc,char *argv[]) |
|
78 |
{ |
|
79 |
float *a,*b,*res,*resacc; |
|
80 |
int size=1024; |
|
81 |
int calls=1; |
|
82 |
struct timeval tv1,tv2; |
|
83 |
|
|
84 |
if (argc > 1) { |
|
85 |
size=(int)atoll(argv[1]); |
|
86 |
calls=(int)atoll(argv[2]); |
|
87 |
} |
|
88 |
else { |
|
89 |
printf("\n\tPi : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\t\t#2 : calls (default 1)\n\n"); |
|
90 |
} |
|
91 |
|
|
92 |
printf("%i %i\n",size,calls); |
|
93 |
|
|
94 |
a=(float*)malloc(size*sizeof(MYFLOAT)); |
|
95 |
b=(float*)malloc(size*sizeof(MYFLOAT)); |
|
96 |
res=(float*)malloc(size*sizeof(MYFLOAT)); |
|
97 |
resacc=(float*)malloc(size*sizeof(MYFLOAT)); |
|
98 |
|
|
99 |
srand(110271); |
|
100 |
|
|
101 |
for (int i=0;i<size;i++) |
|
102 |
{ |
|
103 |
a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
104 |
b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
105 |
res[i]=0.; |
|
106 |
resacc[i]=0.; |
|
107 |
} |
|
108 |
|
|
109 |
gettimeofday(&tv1, NULL); |
|
110 |
MySillySum(res,a,b,calls,size); |
|
111 |
gettimeofday(&tv2, NULL); |
|
112 |
|
|
113 |
MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
114 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
115 |
|
|
116 |
gettimeofday(&tv1, NULL); |
|
117 |
MySillySumOpenACC(resacc,a,b,calls,size); |
|
118 |
gettimeofday(&tv2, NULL); |
|
119 |
|
|
120 |
MYFLOAT elapsedAcc=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
121 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
122 |
|
|
123 |
MYFLOAT MyChecker=MyNorm(res,resacc,size); |
|
124 |
printf("Norm: %.8e\n",MyChecker); |
|
125 |
|
|
126 |
#ifdef VERBOSE |
|
127 |
MyPrint(res,size); |
|
128 |
MyPrint(resacc,size); |
|
129 |
#endif |
|
130 |
|
|
131 |
printf("Elapsed Time: %.3f\n",elapsed); |
|
132 |
printf("OpenACC Elapsed Time: %.3f\n",elapsedAcc); |
|
133 |
|
|
134 |
printf("NaiveRate: %.i\n",(int)((float)size/elapsed)); |
|
135 |
printf("OpenACCRate: %.i\n",(int)((float)size/elapsedAcc)); |
|
136 |
|
|
137 |
printf("OpenACCRatio: %.3f\n",elapsed/elapsedAcc); |
|
138 |
|
|
139 |
free(a); |
|
140 |
free(b); |
|
141 |
free(res); |
|
142 |
free(resacc); |
|
143 |
} |
|
144 |
|
ETSN/MySteps_6_openacc.c (revision 296) | ||
---|---|---|
1 |
/* Simple SillySum function in C and OpenACC/C */ |
|
2 |
/* compilation with sequential compute : gcc -O3 -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35 -lm" -o MySteps_6_openacc MySteps_6_openacc.c -lm */ |
|
3 |
/* compilation without sequential compute : gcc -DNOSERIAL -O3 -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35 -lm" -o MySteps_6_openacc_NoSerial MySteps_6_openacc.c -lm */ |
|
4 |
|
|
5 |
#include <math.h> |
|
6 |
#include <stdio.h> |
|
7 |
#include <stdlib.h> |
|
8 |
#include <sys/time.h> |
|
9 |
|
|
10 |
#define PI 3.141592653589793 |
|
11 |
|
|
12 |
#define MYFLOAT float |
|
13 |
|
|
14 |
#pragma acc routine |
|
15 |
MYFLOAT MySillyFunction(MYFLOAT x) |
|
16 |
{ |
|
17 |
return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); |
|
18 |
} |
|
19 |
|
|
20 |
void MySillySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size) |
|
21 |
{ |
|
22 |
for (uint i=0; i<size;i++) |
|
23 |
{ |
|
24 |
MYFLOAT ai=a[i]; |
|
25 |
MYFLOAT bi=b[i]; |
|
26 |
|
|
27 |
for (int c=0;c<calls;c++) |
|
28 |
{ |
|
29 |
ai=MySillyFunction(ai); |
|
30 |
bi=MySillyFunction(bi); |
|
31 |
} |
|
32 |
|
|
33 |
res[i] = ai + bi; |
|
34 |
} |
|
35 |
} |
|
36 |
|
|
37 |
void MySillySumOpenACC(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size) |
|
38 |
{ |
|
39 |
#pragma acc data copyin(a[0:size],b[0:size]),copyout(res[0:size]) |
|
40 |
#pragma acc parallel loop |
|
41 |
for (uint i=0; i<size;i++) |
|
42 |
{ |
|
43 |
MYFLOAT ai=a[i]; |
|
44 |
MYFLOAT bi=b[i]; |
|
45 |
|
|
46 |
for (int c=0;c<calls;c++) |
|
47 |
{ |
|
48 |
ai=MySillyFunction(ai); |
|
49 |
bi=MySillyFunction(bi); |
|
50 |
} |
|
51 |
|
|
52 |
res[i] = ai + bi; |
|
53 |
} |
|
54 |
} |
|
55 |
|
|
56 |
MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size) |
|
57 |
{ |
|
58 |
MYFLOAT norm=0.; |
|
59 |
|
|
60 |
for (int i=0;i<size;i++) |
|
61 |
{ |
|
62 |
norm+=pow(a[i]-b[i],2); |
|
63 |
} |
|
64 |
|
|
65 |
return(sqrt(norm)); |
|
66 |
} |
|
67 |
|
|
68 |
void MyPrint(MYFLOAT *a,int size) |
|
69 |
{ |
|
70 |
printf("["); |
|
71 |
for (int i=0;i<size;i++) |
|
72 |
{ |
|
73 |
printf(" %.8e ",a[i]); |
|
74 |
} |
|
75 |
printf("]\n"); |
|
76 |
} |
|
77 |
|
|
78 |
int main(int argc,char *argv[]) |
|
79 |
{ |
|
80 |
float *a,*b,*res,*resacc; |
|
81 |
int size=1024; |
|
82 |
int calls=1; |
|
83 |
struct timeval tv1,tv2; |
|
84 |
|
|
85 |
if (argc > 1) { |
|
86 |
size=(int)atoll(argv[1]); |
|
87 |
calls=(int)atoll(argv[2]); |
|
88 |
} |
|
89 |
else { |
|
90 |
printf("\n\tPi : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\t\t#2 : calls (default 1)\n\n"); |
|
91 |
} |
|
92 |
|
|
93 |
printf("%i %i\n",size,calls); |
|
94 |
|
|
95 |
a=(float*)malloc(size*sizeof(MYFLOAT)); |
|
96 |
b=(float*)malloc(size*sizeof(MYFLOAT)); |
|
97 |
res=(float*)malloc(size*sizeof(MYFLOAT)); |
|
98 |
resacc=(float*)malloc(size*sizeof(MYFLOAT)); |
|
99 |
|
|
100 |
srand(110271); |
|
101 |
|
|
102 |
for (int i=0;i<size;i++) |
|
103 |
{ |
|
104 |
a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
105 |
b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
106 |
res[i]=0.; |
|
107 |
resacc[i]=0.; |
|
108 |
} |
|
109 |
|
|
110 |
#ifndef NOSERIAL |
|
111 |
gettimeofday(&tv1, NULL); |
|
112 |
MySillySum(res,a,b,calls,size); |
|
113 |
gettimeofday(&tv2, NULL); |
|
114 |
#endif |
|
115 |
|
|
116 |
MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
117 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
118 |
|
|
119 |
gettimeofday(&tv1, NULL); |
|
120 |
MySillySumOpenACC(resacc,a,b,calls,size); |
|
121 |
gettimeofday(&tv2, NULL); |
|
122 |
|
|
123 |
MYFLOAT elapsedAcc=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
124 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
125 |
|
|
126 |
#ifndef NOSERIAL |
|
127 |
MYFLOAT MyChecker=MyNorm(res,resacc,size); |
|
128 |
printf("Norm: %.8e\n",MyChecker); |
|
129 |
#endif |
|
130 |
|
|
131 |
#ifdef VERBOSE |
|
132 |
MyPrint(res,size); |
|
133 |
MyPrint(resacc,size); |
|
134 |
#endif |
|
135 |
|
|
136 |
#ifndef NOSERIAL |
|
137 |
printf("Elapsed Time: %.3f\n",elapsed); |
|
138 |
printf("OpenACC Elapsed Time: %.3f\n",elapsedAcc); |
|
139 |
#endif |
|
140 |
|
|
141 |
#ifndef NOSERIAL |
|
142 |
printf("NaiveRate: %.lld\n",(unsigned long)((float)size/elapsed)); |
|
143 |
#endif |
|
144 |
printf("OpenACCRate: %.lld\n",(unsigned long)((float)size/elapsedAcc)); |
|
145 |
|
|
146 |
#ifndef NOSERIAL |
|
147 |
printf("OpenACCRatio: %.3f\n",elapsed/elapsedAcc); |
|
148 |
#endif |
|
149 |
|
|
150 |
free(a); |
|
151 |
free(b); |
|
152 |
free(res); |
|
153 |
free(resacc); |
|
154 |
} |
|
155 |
|
ETSN/MySteps_1.c (revision 296) | ||
---|---|---|
1 |
/* Simple Sum function in C */ |
|
2 |
/* compilation with : gcc -O3 -o MySteps_1 MySteps_1.c -lm */ |
|
3 |
|
|
4 |
#include <math.h> |
|
5 |
#include <stdio.h> |
|
6 |
#include <stdlib.h> |
|
7 |
#include <sys/time.h> |
|
8 |
|
|
9 |
#define PI 3.141592653589793 |
|
10 |
|
|
11 |
#define MYFLOAT float |
|
12 |
|
|
13 |
void MySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b, int size) |
|
14 |
{ |
|
15 |
for (uint i=0; i<size;i++) |
|
16 |
{ |
|
17 |
res[i] = a[i] + b[i]; |
|
18 |
} |
|
19 |
} |
|
20 |
|
|
21 |
MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size) |
|
22 |
{ |
|
23 |
MYFLOAT norm=0.; |
|
24 |
|
|
25 |
for (int i=0;i<size;i++) |
|
26 |
{ |
|
27 |
norm+=pow(a[i]-b[i],2); |
|
28 |
} |
|
29 |
|
|
30 |
return(sqrt(norm)); |
|
31 |
} |
|
32 |
|
|
33 |
void MyPrint(MYFLOAT *a,int size) |
|
34 |
{ |
|
35 |
printf("["); |
|
36 |
for (int i=0;i<size;i++) |
|
37 |
{ |
|
38 |
printf(" %.8e ",a[i]); |
|
39 |
} |
|
40 |
printf("]\n"); |
|
41 |
} |
|
42 |
|
|
43 |
int main(int argc,char *argv[]) |
|
44 |
{ |
|
45 |
float *a,*b,*res,*resacc; |
|
46 |
int size=1024; |
|
47 |
struct timeval tv1,tv2; |
|
48 |
|
|
49 |
if (argc > 1) { |
|
50 |
size=(int)atoll(argv[1]); |
|
51 |
} |
|
52 |
else { |
|
53 |
printf("\n\tMySteps : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\n"); |
|
54 |
} |
|
55 |
|
|
56 |
printf("%i %i\n",size); |
|
57 |
|
|
58 |
a=(float*)malloc(size*sizeof(MYFLOAT)); |
|
59 |
b=(float*)malloc(size*sizeof(MYFLOAT)); |
|
60 |
res=(float*)malloc(size*sizeof(MYFLOAT)); |
|
61 |
|
|
62 |
srand(110271); |
|
63 |
|
|
64 |
for (int i=0;i<size;i++) |
|
65 |
{ |
|
66 |
a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
67 |
b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
68 |
res[i]=0.; |
|
69 |
} |
|
70 |
|
|
71 |
gettimeofday(&tv1, NULL); |
|
72 |
MySum(res,a,b,size); |
|
73 |
gettimeofday(&tv2, NULL); |
|
74 |
|
|
75 |
MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
76 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
77 |
|
|
78 |
#ifdef VERBOSE |
|
79 |
MyPrint(res,size); |
|
80 |
#endif |
|
81 |
|
|
82 |
printf("Elapsed Time: %.3f\n",elapsed); |
|
83 |
|
|
84 |
printf("NaiveRate: %.lld\n",(unsigned long)((float)size/elapsed)); |
|
85 |
|
|
86 |
free(a); |
|
87 |
free(b); |
|
88 |
free(res); |
|
89 |
} |
|
90 |
|
ETSN/MyDFT_10.py (revision 296) | ||
---|---|---|
6 | 6 |
|
7 | 7 |
# |
8 | 8 |
def NumpyFFT(x,y): |
9 |
xy=x+1.j*y
|
|
9 |
xy=np.csingle(x+1.j*y)
|
|
10 | 10 |
XY=np.fft.fft(xy) |
11 |
print(XY) |
|
11 | 12 |
return(XY.real,XY.imag) |
12 | 13 |
|
13 | 14 |
# |
... | ... | |
18 | 19 |
import gpyfft |
19 | 20 |
from gpyfft.fft import FFT |
20 | 21 |
|
22 |
TimeIn=time.time() |
|
21 | 23 |
Id=0 |
22 | 24 |
HasXPU=False |
23 | 25 |
for platform in cl.get_platforms(): |
... | ... | |
32 | 34 |
if HasXPU==False: |
33 | 35 |
print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1)) |
34 | 36 |
sys.exit() |
37 |
Elapsed=time.time()-TimeIn |
|
38 |
print("Selection of device : %.3f" % Elapsed) |
|
35 | 39 |
|
40 |
TimeIn=time.time() |
|
36 | 41 |
try: |
37 | 42 |
ctx = cl.Context(devices=[XPU]) |
38 | 43 |
queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE) |
39 | 44 |
except: |
40 | 45 |
print("Crash during context creation") |
46 |
Elapsed=time.time()-TimeIn |
|
47 |
print("Context initialisation : %.3f" % Elapsed) |
|
41 | 48 |
|
42 |
XY_gpu = cla.to_device(queue, x+1.j*y) |
|
49 |
TimeIn=time.time() |
|
50 |
XY_gpu = cla.to_device(queue, np.csingle(x+1.j*y)) |
|
51 |
Elapsed=time.time()-TimeIn |
|
52 |
print("Copy from Host to Device : %.3f" % Elapsed) |
|
43 | 53 |
|
44 |
transform = FFT(ctx, queue, XY_gpu)
|
|
45 |
|
|
54 |
TimeIn=time.time()
|
|
55 |
transform = FFT(ctx, queue, XY_gpu) |
|
46 | 56 |
event, = transform.enqueue() |
47 | 57 |
event.wait() |
48 |
|
|
58 |
Elapsed=time.time()-TimeIn |
|
59 |
print("Compute FFT : %.3f" % Elapsed) |
|
60 |
TimeIn=time.time() |
|
49 | 61 |
XY = XY_gpu.get() |
62 |
Elapsed=time.time()-TimeIn |
|
63 |
print("Copy from Device to Host : %.3f" % Elapsed) |
|
64 |
print(XY) |
|
50 | 65 |
return(XY.real,XY.imag) |
51 | 66 |
|
52 | 67 |
# Naive Discrete Fourier Transform |
... | ... | |
451 | 466 |
m_np,n_np=NumpyFFT(a_np,b_np) |
452 | 467 |
NumpyFFTElapsed=time.time()-TimeIn |
453 | 468 |
NumpyFFTRate=int(SIZE/NumpyFFTElapsed) |
469 |
print("NumpyFFTElapsed: %i" % NumpyFFTElapsed) |
|
454 | 470 |
print("NumpyFFTRate: %i" % NumpyFFTRate) |
455 | 471 |
print("Precision: ",np.linalg.norm(m_np-C_np), |
456 | 472 |
np.linalg.norm(n_np-D_np)) |
... | ... | |
462 | 478 |
i_np,j_np=OpenCLFFT(a_np,b_np,Device) |
463 | 479 |
OpenCLFFTElapsed=time.time()-TimeIn |
464 | 480 |
OpenCLFFTRate=int(SIZE/OpenCLFFTElapsed) |
481 |
print("OpenCLElapsed: %i" % OpenCLFFTElapsed) |
|
465 | 482 |
print("OpenCLRate: %i" % OpenCLFFTRate) |
466 | 483 |
print("Precision: ",np.linalg.norm(i_np-C_np), |
467 | 484 |
np.linalg.norm(j_np-D_np)) |
468 | 485 |
|
486 |
if OpenCLFFTMethod and NumpyFFTMethod: |
|
487 |
print("NumpyOpenCLRatio: %f" % (OpenCLFFTRate/NumpyFFTRate)) |
ETSN/MySteps_6.c (revision 296) | ||
---|---|---|
1 |
/* Simple SillySum function in C */ |
|
2 |
/* compilation with : gcc -O3 -o MySteps_6 MySteps_6.c -lm */ |
|
3 |
|
|
4 |
#include <math.h> |
|
5 |
#include <stdio.h> |
|
6 |
#include <stdlib.h> |
|
7 |
#include <sys/time.h> |
|
8 |
|
|
9 |
#define PI 3.141592653589793 |
|
10 |
|
|
11 |
#define MYFLOAT float |
|
12 |
|
|
13 |
MYFLOAT MySillyFunction(MYFLOAT x) |
|
14 |
{ |
|
15 |
return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); |
|
16 |
} |
|
17 |
|
|
18 |
void MySillySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size) |
|
19 |
{ |
|
20 |
for (uint i=0; i<size;i++) |
|
21 |
{ |
|
22 |
MYFLOAT ai=a[i]; |
|
23 |
MYFLOAT bi=b[i]; |
|
24 |
|
|
25 |
for (int c=0;c<calls;c++) |
|
26 |
{ |
|
27 |
ai=MySillyFunction(ai); |
|
28 |
bi=MySillyFunction(bi); |
|
29 |
} |
|
30 |
|
|
31 |
res[i] = ai + bi; |
|
32 |
} |
|
33 |
} |
|
34 |
|
|
35 |
MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size) |
|
36 |
{ |
|
37 |
MYFLOAT norm=0.; |
|
38 |
|
|
39 |
for (int i=0;i<size;i++) |
|
40 |
{ |
|
41 |
norm+=pow(a[i]-b[i],2); |
|
42 |
} |
|
43 |
|
|
44 |
return(sqrt(norm)); |
|
45 |
} |
|
46 |
|
|
47 |
void MyPrint(MYFLOAT *a,int size) |
|
48 |
{ |
|
49 |
printf("["); |
|
50 |
for (int i=0;i<size;i++) |
|
51 |
{ |
|
52 |
printf(" %.8e ",a[i]); |
|
53 |
} |
|
54 |
printf("]\n"); |
|
55 |
} |
|
56 |
|
|
57 |
int main(int argc,char *argv[]) |
|
58 |
{ |
|
59 |
float *a,*b,*res,*resacc; |
|
60 |
int size=1024; |
|
61 |
int calls=1; |
|
62 |
struct timeval tv1,tv2; |
|
63 |
|
|
64 |
if (argc > 1) { |
|
65 |
size=(int)atoll(argv[1]); |
|
66 |
calls=(int)atoll(argv[2]); |
|
67 |
} |
|
68 |
else { |
|
69 |
printf("\n\tMySteps : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\t\t#2 : calls (default 1)\n\n"); |
|
70 |
} |
|
71 |
|
|
72 |
printf("%i %i\n",size,calls); |
|
73 |
|
|
74 |
a=(float*)malloc(size*sizeof(MYFLOAT)); |
|
75 |
b=(float*)malloc(size*sizeof(MYFLOAT)); |
|
76 |
res=(float*)malloc(size*sizeof(MYFLOAT)); |
|
77 |
resacc=(float*)malloc(size*sizeof(MYFLOAT)); |
|
78 |
|
|
79 |
srand(110271); |
|
80 |
|
|
81 |
for (int i=0;i<size;i++) |
|
82 |
{ |
|
83 |
a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
84 |
b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
85 |
res[i]=0.; |
|
86 |
resacc[i]=0.; |
|
87 |
} |
|
88 |
|
|
89 |
gettimeofday(&tv1, NULL); |
|
90 |
MySillySum(res,a,b,calls,size); |
|
91 |
gettimeofday(&tv2, NULL); |
|
92 |
|
|
93 |
MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
94 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
95 |
|
|
96 |
#ifdef VERBOSE |
|
97 |
MyPrint(res,size); |
|
98 |
MyPrint(resacc,size); |
|
99 |
#endif |
|
100 |
|
|
101 |
printf("Elapsed Time: %.3f\n",elapsed); |
|
102 |
|
|
103 |
printf("NaiveRate: %.lld\n",(unsigned long)((float)size/elapsed)); |
|
104 |
|
|
105 |
free(a); |
|
106 |
free(b); |
|
107 |
free(res); |
|
108 |
free(resacc); |
|
109 |
} |
|
110 |
|
ETSN/MySteps_2.py (revision 296) | ||
---|---|---|
196 | 196 |
# Check on CPU with Numpy: |
197 | 197 |
print(res_cl - res_np) |
198 | 198 |
print(np.linalg.norm(res_cl - res_np)) |
199 |
assert np.allclose(res_np, res_cl,rtol=1e-4) |
|
199 |
assert np.allclose(res_cl, res_np,rtol=1e-4) |
ETSN/MySteps_6.py (revision 296) | ||
---|---|---|
289 | 289 |
Device=0 |
290 | 290 |
Calls=1 |
291 | 291 |
Threads=1 |
292 |
|
|
292 |
Serial=True |
|
293 |
|
|
293 | 294 |
import getopt |
294 | 295 |
|
295 |
HowToUse='%s -g <CUDA/OpenCL> -s <SizeOfVector> -d <DeviceId> -c <SillyCalls> -t <Threads>' |
|
296 |
HowToUse='%s -n -g <CUDA/OpenCL> -s <SizeOfVector> -d <DeviceId> -c <SillyCalls> -t <Threads>'
|
|
296 | 297 |
|
297 | 298 |
try: |
298 |
opts, args = getopt.getopt(sys.argv[1:],"hg:s:d:c:t:",["gpustyle=","size=","device=","calls=","threads="]) |
|
299 |
opts, args = getopt.getopt(sys.argv[1:],"hng:s:d:c:t:",["gpustyle=","size=","device=","calls=","threads="])
|
|
299 | 300 |
except getopt.GetoptError: |
300 | 301 |
print(HowToUse % sys.argv[0]) |
301 | 302 |
sys.exit(2) |
... | ... | |
347 | 348 |
GpuStyle = arg |
348 | 349 |
elif opt in ("-s", "--size"): |
349 | 350 |
SIZE = int(arg) |
351 |
elif opt in ("-n"): |
|
352 |
Serial = False |
|
350 | 353 |
|
351 | 354 |
print("Device Selection : %i" % Device) |
352 | 355 |
print("GpuStyle used : %s" % GpuStyle) |
353 | 356 |
print("Size of complex vector : %i" % SIZE) |
354 | 357 |
print("Number of silly calls : %i" % Calls) |
355 | 358 |
print("Number of Threads : %i" % Threads) |
359 |
print("Serial compute : %i" % Serial) |
|
356 | 360 |
|
357 | 361 |
if GpuStyle=='CUDA': |
358 | 362 |
try: |
... | ... | |
391 | 395 |
b_np = np.random.rand(SIZE).astype(np.float32) |
392 | 396 |
|
393 | 397 |
# Native Implementation |
394 |
TimeIn=time.time() |
|
395 |
res_np=NativeSillyAddition(a_np,b_np,Calls) |
|
396 |
NativeElapsed=time.time()-TimeIn |
|
397 |
NativeRate=int(SIZE/NativeElapsed) |
|
398 |
print("NativeRate: %i" % NativeRate) |
|
398 |
if Serial: |
|
399 |
TimeIn=time.time() |
|
400 |
res_np=NativeSillyAddition(a_np,b_np,Calls) |
|
401 |
NativeElapsed=time.time()-TimeIn |
|
402 |
NativeRate=int(SIZE/NativeElapsed) |
|
403 |
print("NativeRate: %i" % NativeRate) |
|
399 | 404 |
|
400 | 405 |
# OpenCL Implementation |
401 | 406 |
if GpuStyle=='OpenCL' or GpuStyle=='all': |
... | ... | |
406 | 411 |
OpenCLRate=int(SIZE/OpenCLElapsed) |
407 | 412 |
print("OpenCLRate: %i" % OpenCLRate) |
408 | 413 |
# Check on OpenCL with Numpy: |
409 |
print(res_cl - res_np) |
|
410 |
print(np.linalg.norm(res_cl - res_np)) |
|
411 |
try: |
|
412 |
assert np.allclose(res_np, res_cl) |
|
413 |
except: |
|
414 |
print("Results between Native & OpenCL seem to be too different!") |
|
414 |
if Serial: |
|
415 |
print(res_cl - res_np) |
|
416 |
print(np.linalg.norm(res_cl - res_np)) |
|
417 |
try: |
|
418 |
assert np.allclose(res_np, res_cl) |
|
419 |
except: |
|
420 |
print("Results between Native & OpenCL seem to be too different!") |
|
415 | 421 |
|
416 |
print("OpenCLvsNative ratio: %f" % (OpenCLRate/NativeRate)) |
|
422 |
print("OpenCLvsNative ratio: %f" % (OpenCLRate/NativeRate))
|
|
417 | 423 |
|
418 | 424 |
# CUDA Implementation |
419 | 425 |
if GpuStyle=='CUDA' or GpuStyle=='all': |
... | ... | |
423 | 429 |
CUDARate=int(SIZE/CUDAElapsed) |
424 | 430 |
print("CUDARate: %i" % CUDARate) |
425 | 431 |
# Check on CUDA with Numpy: |
426 |
print(res_cuda - res_np) |
|
427 |
print(np.linalg.norm(res_cuda - res_np)) |
|
428 |
try: |
|
429 |
assert np.allclose(res_np, res_cuda) |
|
430 |
except: |
|
431 |
print("Results between Native & CUDA seem to be too different!") |
|
432 |
if Serial: |
|
433 |
print(res_cuda - res_np) |
|
434 |
print(np.linalg.norm(res_cuda - res_np)) |
|
435 |
try: |
|
436 |
assert np.allclose(res_np, res_cuda) |
|
437 |
except: |
|
438 |
print("Results between Native & CUDA seem to be too different!") |
|
432 | 439 |
|
433 |
print("CUDAvsNative ratio: %f" % (CUDARate/NativeRate)) |
|
440 |
print("CUDAvsNative ratio: %f" % (CUDARate/NativeRate))
|
|
434 | 441 |
|
435 | 442 |
|
436 | 443 |
|
ETSN/MySteps_1_openmp.c (revision 296) | ||
---|---|---|
1 |
/* Simple Sum function in C and OpenMP/C */ |
|
2 |
/* compilation with sequential compute : gcc -fopenmp -O3 -o MySteps_1_openmp MySteps_1_openmp.c -lm -lgomp */ |
|
3 |
/* compilation without sequential compute : gcc -DNOSERIAL -fopenmp -O3 -o MySteps_1_openmp_1_NoSerial MySteps_1_openmp.c -lm -lgomp */ |
|
4 |
|
|
5 |
#include <math.h> |
|
6 |
#include <stdio.h> |
|
7 |
#include <stdlib.h> |
|
8 |
#include <sys/time.h> |
|
9 |
|
|
10 |
#define PI 3.141592653589793 |
|
11 |
|
|
12 |
#define MYFLOAT float |
|
13 |
|
|
14 |
void MySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b, int size) |
|
15 |
{ |
|
16 |
for (uint i=0; i<size;i++) |
|
17 |
{ |
|
18 |
res[i] = a[i] + b[i]; |
|
19 |
} |
|
20 |
} |
|
21 |
|
|
22 |
void MySillySumOMP(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b, int size) |
|
23 |
{ |
|
24 |
#pragma omp parallel for |
|
25 |
for (uint i=0; i<size;i++) |
|
26 |
{ |
|
27 |
res[i] = a[i] + b[i]; |
|
28 |
} |
|
29 |
} |
|
30 |
|
|
31 |
MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size) |
|
32 |
{ |
|
33 |
MYFLOAT norm=0.; |
|
34 |
|
|
35 |
for (int i=0;i<size;i++) |
|
36 |
{ |
|
37 |
norm+=pow(a[i]-b[i],2); |
|
38 |
} |
|
39 |
|
|
40 |
return(sqrt(norm)); |
|
41 |
} |
|
42 |
|
|
43 |
void MyPrint(MYFLOAT *a,int size) |
|
44 |
{ |
|
45 |
printf("["); |
|
46 |
for (int i=0;i<size;i++) |
|
47 |
{ |
|
48 |
printf(" %.8e ",a[i]); |
|
49 |
} |
|
50 |
printf("]\n"); |
|
51 |
} |
|
52 |
|
|
53 |
int main(int argc,char *argv[]) |
|
54 |
{ |
|
55 |
float *a,*b,*res,*resacc; |
|
56 |
int size=1024; |
|
57 |
struct timeval tv1,tv2; |
|
58 |
|
|
59 |
if (argc > 1) { |
|
60 |
size=(int)atoll(argv[1]); |
|
61 |
} |
|
62 |
else { |
|
63 |
printf("\n\tPi : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\n"); |
|
64 |
} |
|
65 |
|
|
66 |
printf("%i\n",size); |
|
67 |
|
|
68 |
a=(float*)malloc(size*sizeof(MYFLOAT)); |
|
69 |
b=(float*)malloc(size*sizeof(MYFLOAT)); |
|
70 |
res=(float*)malloc(size*sizeof(MYFLOAT)); |
|
71 |
resacc=(float*)malloc(size*sizeof(MYFLOAT)); |
|
72 |
|
|
73 |
srand(110271); |
|
74 |
|
|
75 |
for (int i=0;i<size;i++) |
|
76 |
{ |
|
77 |
a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
78 |
b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
79 |
res[i]=0.; |
|
80 |
resacc[i]=0.; |
|
81 |
} |
|
82 |
|
|
83 |
#ifndef NOSERIAL |
|
84 |
gettimeofday(&tv1, NULL); |
|
85 |
MySum(res,a,b,size); |
|
86 |
gettimeofday(&tv2, NULL); |
|
87 |
#endif |
|
88 |
|
|
89 |
MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
90 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
91 |
|
|
92 |
gettimeofday(&tv1, NULL); |
|
93 |
MySillySumOMP(resacc,a,b,size); |
|
94 |
gettimeofday(&tv2, NULL); |
|
95 |
|
|
96 |
MYFLOAT elapsedAcc=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
97 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
98 |
|
|
99 |
#ifndef NOSERIAL |
|
100 |
MYFLOAT MyChecker=MyNorm(res,resacc,size); |
|
101 |
printf("Norm: %.8e\n",MyChecker); |
|
102 |
#endif |
|
103 |
|
|
104 |
#ifdef VERBOSE |
|
105 |
MyPrint(res,size); |
|
106 |
MyPrint(resacc,size); |
|
107 |
#endif |
|
108 |
|
|
109 |
#ifndef NOSERIAL |
|
110 |
printf("Elapsed Time: %.3f\n",elapsed); |
|
111 |
#endif |
|
112 |
|
|
113 |
printf("OMP Elapsed Time: %.3f\n",elapsedAcc); |
|
114 |
|
|
115 |
#ifndef NOSERIAL |
|
116 |
printf("NaiveRate: %.lld\n",(unsigned long)((float)size/elapsed)); |
|
117 |
#endif |
|
118 |
printf("OMPRate: %.lld\n",(unsigned long)((float)size/elapsedAcc)); |
|
119 |
|
|
120 |
#ifndef NOSERIAL |
|
121 |
printf("AccRatio: %.3f\n",elapsed/elapsedAcc); |
|
122 |
#endif |
|
123 |
|
|
124 |
free(a); |
|
125 |
free(b); |
|
126 |
free(res); |
|
127 |
free(resacc); |
|
128 |
} |
|
129 |
|
ETSN/MySteps_6_openmp.c (revision 296) | ||
---|---|---|
1 |
/* Simple SillySum function in C and OpenMP/C */ |
|
2 |
/* compilation with sequential compute : gcc -fopenmp -O3 -o MySteps_6_openmp MySteps_6_openmp.c -lm -lgomp */ |
|
3 |
/* compilation without sequential compute : gcc -DNOSERIAL -fopenmp -O3 -o MySteps_6_openmp_NoSerial MySteps_6_openmp.c -lm -lgomp */ |
|
4 |
|
|
5 |
#include <math.h> |
|
6 |
#include <stdio.h> |
|
7 |
#include <stdlib.h> |
|
8 |
#include <sys/time.h> |
|
9 |
|
|
10 |
#define PI 3.141592653589793 |
|
11 |
|
|
12 |
#define MYFLOAT float |
|
13 |
|
|
14 |
MYFLOAT MySillyFunction(MYFLOAT x) |
|
15 |
{ |
|
16 |
return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); |
|
17 |
} |
|
18 |
|
|
19 |
void MySillySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size) |
|
20 |
{ |
|
21 |
for (uint i=0; i<size;i++) |
|
22 |
{ |
|
23 |
MYFLOAT ai=a[i]; |
|
24 |
MYFLOAT bi=b[i]; |
|
25 |
|
|
26 |
for (int c=0;c<calls;c++) |
|
27 |
{ |
|
28 |
ai=MySillyFunction(ai); |
|
29 |
bi=MySillyFunction(bi); |
|
30 |
} |
|
31 |
|
|
32 |
res[i] = ai + bi; |
|
33 |
} |
|
34 |
} |
|
35 |
|
|
36 |
void MySillySumOMP(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size) |
|
37 |
{ |
|
38 |
#pragma omp parallel for |
|
39 |
for (uint i=0; i<size;i++) |
|
40 |
{ |
|
41 |
MYFLOAT ai=a[i]; |
|
42 |
MYFLOAT bi=b[i]; |
|
43 |
|
|
44 |
for (int c=0;c<calls;c++) |
|
45 |
{ |
|
46 |
ai=MySillyFunction(ai); |
|
47 |
bi=MySillyFunction(bi); |
|
48 |
} |
|
49 |
|
|
50 |
res[i] = ai + bi; |
|
51 |
} |
|
52 |
} |
|
53 |
|
|
54 |
MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size) |
|
55 |
{ |
|
56 |
MYFLOAT norm=0.; |
|
57 |
|
|
58 |
for (int i=0;i<size;i++) |
|
59 |
{ |
|
60 |
norm+=pow(a[i]-b[i],2); |
|
61 |
} |
|
62 |
|
|
63 |
return(sqrt(norm)); |
|
64 |
} |
|
65 |
|
|
66 |
void MyPrint(MYFLOAT *a,int size) |
|
67 |
{ |
|
68 |
printf("["); |
|
69 |
for (int i=0;i<size;i++) |
|
70 |
{ |
|
71 |
printf(" %.8e ",a[i]); |
|
72 |
} |
|
73 |
printf("]\n"); |
|
74 |
} |
|
75 |
|
|
76 |
int main(int argc,char *argv[]) |
|
77 |
{ |
|
78 |
float *a,*b,*res,*resacc; |
|
79 |
int size=1024; |
|
80 |
int calls=1; |
|
81 |
struct timeval tv1,tv2; |
|
82 |
|
|
83 |
if (argc > 1) { |
|
84 |
size=(int)atoll(argv[1]); |
|
85 |
calls=(int)atoll(argv[2]); |
|
86 |
} |
|
87 |
else { |
|
88 |
printf("\n\tPi : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\t\t#2 : calls (default 1)\n\n"); |
|
89 |
} |
|
90 |
|
|
91 |
printf("%i %i\n",size,calls); |
|
92 |
|
|
93 |
a=(float*)malloc(size*sizeof(MYFLOAT)); |
|
94 |
b=(float*)malloc(size*sizeof(MYFLOAT)); |
|
95 |
res=(float*)malloc(size*sizeof(MYFLOAT)); |
|
96 |
resacc=(float*)malloc(size*sizeof(MYFLOAT)); |
|
97 |
|
|
98 |
srand(110271); |
|
99 |
|
|
100 |
for (int i=0;i<size;i++) |
|
101 |
{ |
|
102 |
a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
103 |
b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
104 |
res[i]=0.; |
|
105 |
resacc[i]=0.; |
|
106 |
} |
|
107 |
|
|
108 |
#ifndef NOSERIAL |
|
109 |
gettimeofday(&tv1, NULL); |
|
110 |
MySillySum(res,a,b,calls,size); |
|
111 |
gettimeofday(&tv2, NULL); |
|
112 |
#endif |
|
113 |
|
|
114 |
MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
115 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
116 |
|
|
117 |
gettimeofday(&tv1, NULL); |
|
118 |
MySillySumOMP(resacc,a,b,calls,size); |
|
119 |
gettimeofday(&tv2, NULL); |
|
120 |
|
|
121 |
MYFLOAT elapsedAcc=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
122 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
123 |
|
|
124 |
#ifndef NOSERIAL |
|
125 |
MYFLOAT MyChecker=MyNorm(res,resacc,size); |
|
126 |
printf("Norm: %.8e\n",MyChecker); |
|
127 |
#endif |
|
128 |
|
|
129 |
#ifdef VERBOSE |
|
130 |
MyPrint(res,size); |
|
131 |
MyPrint(resacc,size); |
|
132 |
#endif |
|
133 |
|
|
134 |
#ifndef NOSERIAL |
|
135 |
printf("Elapsed Time: %.3f\n",elapsed); |
|
136 |
#endif |
|
137 |
|
|
138 |
printf("OMP Elapsed Time: %.3f\n",elapsedAcc); |
|
139 |
|
|
140 |
#ifndef NOSERIAL |
|
141 |
printf("NaiveRate: %.lld\n",(unsigned long)((float)size/elapsed)); |
|
142 |
#endif |
|
143 |
printf("OMPRate: %.lld\n",(unsigned long)((float)size/elapsedAcc)); |
|
144 |
|
|
145 |
#ifndef NOSERIAL |
|
146 |
printf("AccRatio: %.3f\n",elapsed/elapsedAcc); |
|
147 |
#endif |
|
148 |
|
|
149 |
free(a); |
|
150 |
free(b); |
|
151 |
free(res); |
|
152 |
free(resacc); |
|
153 |
} |
|
154 |
|
ETSN/MySteps_1_openacc.c (revision 296) | ||
---|---|---|
1 |
/* Simple Sum function in C and OpenACC/C */ |
|
2 |
/* compilation with sequential compute : gcc -O3 -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35 -lm" -o MySteps_1_openacc MySteps_1_openacc.c -lm */ |
|
3 |
/* compilation without sequential compute : gcc -DNOSERIAL -O3 -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35 -lm" -o MySteps_1_openacc_NoSerial MySteps_1_openacc.c -lm */ |
|
4 |
|
|
5 |
#include <math.h> |
|
6 |
#include <stdio.h> |
|
7 |
#include <stdlib.h> |
|
8 |
#include <sys/time.h> |
|
9 |
|
|
10 |
#define PI 3.141592653589793 |
|
11 |
|
|
12 |
#define MYFLOAT float |
|
13 |
|
|
14 |
/* #pragma acc routine */ |
|
15 |
/* MYFLOAT MySillyFunction(MYFLOAT x) */ |
|
16 |
/* { */ |
|
17 |
/* return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); */ |
|
18 |
/* } */ |
|
19 |
|
|
20 |
void MySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b, int size) |
|
21 |
{ |
|
22 |
for (uint i=0; i<size;i++) |
|
23 |
{ |
|
24 |
res[i] = a[i] + b[i]; |
|
25 |
} |
|
26 |
} |
|
27 |
|
|
28 |
void MySumOpenACC(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b, int size) |
|
29 |
{ |
|
30 |
#pragma acc data copyin(a[0:size],b[0:size]),copyout(res[0:size]) |
|
31 |
#pragma acc parallel loop |
|
32 |
for (uint i=0; i<size;i++) |
|
33 |
{ |
|
34 |
res[i] = a[i] + b[i]; |
|
35 |
} |
|
36 |
} |
|
37 |
|
|
38 |
MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size) |
|
39 |
{ |
|
40 |
MYFLOAT norm=0.; |
|
41 |
|
|
42 |
for (int i=0;i<size;i++) |
|
43 |
{ |
|
44 |
norm+=pow(a[i]-b[i],2); |
|
45 |
} |
|
46 |
|
|
47 |
return(sqrt(norm)); |
|
48 |
} |
|
49 |
|
|
50 |
void MyPrint(MYFLOAT *a,int size) |
|
51 |
{ |
|
52 |
printf("["); |
|
53 |
for (int i=0;i<size;i++) |
|
54 |
{ |
|
55 |
printf(" %.8e ",a[i]); |
|
56 |
} |
|
57 |
printf("]\n"); |
|
58 |
} |
|
59 |
|
|
60 |
int main(int argc,char *argv[]) |
|
61 |
{ |
|
62 |
float *a,*b,*res,*resacc; |
|
63 |
int size=1024; |
|
64 |
struct timeval tv1,tv2; |
|
65 |
|
|
66 |
if (argc > 1) { |
|
67 |
size=(int)atoll(argv[1]); |
|
68 |
} |
|
69 |
else { |
|
70 |
printf("\n\tPi : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\n"); |
|
71 |
} |
|
72 |
|
|
73 |
printf("%i\n",size); |
|
74 |
|
|
75 |
a=(float*)malloc(size*sizeof(MYFLOAT)); |
|
76 |
b=(float*)malloc(size*sizeof(MYFLOAT)); |
|
77 |
res=(float*)malloc(size*sizeof(MYFLOAT)); |
|
78 |
resacc=(float*)malloc(size*sizeof(MYFLOAT)); |
|
79 |
|
|
80 |
srand(110271); |
|
81 |
|
|
82 |
for (int i=0;i<size;i++) |
|
83 |
{ |
|
84 |
a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
85 |
b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX; |
|
86 |
res[i]=0.; |
|
87 |
resacc[i]=0.; |
|
88 |
} |
|
89 |
|
|
90 |
#ifndef NOSERIAL |
|
91 |
gettimeofday(&tv1, NULL); |
|
92 |
MySum(res,a,b,size); |
|
93 |
gettimeofday(&tv2, NULL); |
|
94 |
#endif |
|
95 |
|
|
96 |
MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
97 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
98 |
|
|
99 |
gettimeofday(&tv1, NULL); |
|
100 |
MySumOpenACC(resacc,a,b,size); |
|
101 |
gettimeofday(&tv2, NULL); |
|
102 |
|
|
103 |
MYFLOAT elapsedAcc=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
104 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
105 |
|
|
106 |
#ifndef NOSERIAL |
|
107 |
MYFLOAT MyChecker=MyNorm(res,resacc,size); |
|
108 |
printf("Norm: %.8e\n",MyChecker); |
|
109 |
#endif |
|
110 |
|
|
111 |
#ifdef VERBOSE |
|
112 |
MyPrint(res,size); |
|
113 |
MyPrint(resacc,size); |
|
114 |
#endif |
|
115 |
|
|
116 |
#ifndef NOSERIAL |
|
117 |
printf("Elapsed Time: %.3f\n",elapsed); |
|
118 |
printf("OpenACC Elapsed Time: %.3f\n",elapsedAcc); |
|
119 |
#endif |
|
120 |
|
|
121 |
#ifndef NOSERIAL |
|
122 |
printf("NaiveRate: %.lld\n",(unsigned long)((float)size/elapsed)); |
|
123 |
#endif |
|
124 |
printf("OpenACCRate: %.lld\n",(unsigned long)((float)size/elapsedAcc)); |
|
125 |
|
|
126 |
#ifndef NOSERIAL |
|
127 |
printf("OpenACCRatio: %.3f\n",elapsed/elapsedAcc); |
|
128 |
#endif |
|
129 |
|
|
130 |
free(a); |
|
131 |
free(b); |
|
132 |
free(res); |
|
133 |
free(resacc); |
|
134 |
} |
|
135 |
|
Formats disponibles : Unified diff