root / ETSN / MyDFT_openacc.c @ 285
Historique | Voir | Annoter | Télécharger (2,86 ko)
1 | 285 | equemene | /* Simple Discrete Fourier Transform implemented in C and OpenACC/C */
|
---|---|---|---|
2 | 285 | equemene | /* compilation with : gcc -O3 -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35 -lm" -o MyDFT_openacc MyDFT_openacc.c -lm */
|
3 | 285 | equemene | |
4 | 285 | equemene | #include <math.h> |
5 | 285 | equemene | #include <stdio.h> |
6 | 285 | equemene | #include <stdlib.h> |
7 | 285 | equemene | #include <openacc.h> |
8 | 285 | equemene | #include <sys/time.h> |
9 | 285 | equemene | |
10 | 285 | equemene | #define PI 3.141592653589793 |
11 | 285 | equemene | |
12 | 285 | equemene | #define MYFLOAT float |
13 | 285 | equemene | |
14 | 285 | equemene | void MyDFT(MYFLOAT *A, MYFLOAT *B, MYFLOAT *a, MYFLOAT *b,int size) |
15 | 285 | equemene | { |
16 | 285 | equemene | for (uint j=0;j<size;j++) |
17 | 285 | equemene | { |
18 | 285 | equemene | MYFLOAT At=0.,Bt=0.; |
19 | 285 | equemene | for (uint i=0; i<size;i++) |
20 | 285 | equemene | { |
21 | 285 | equemene | At+=a[i]*cos(2.*PI*(MYFLOAT)(j*i)/(MYFLOAT)size)-b[i]*sin(2.*PI*(MYFLOAT)(j*i)/(MYFLOAT)size); |
22 | 285 | equemene | Bt+=a[i]*sin(2.*PI*(MYFLOAT)(j*i)/(MYFLOAT)size)+b[i]*cos(2.*PI*(MYFLOAT)(j*i)/(MYFLOAT)size); |
23 | 285 | equemene | } |
24 | 285 | equemene | A[j]=At; |
25 | 285 | equemene | B[j]=Bt; |
26 | 285 | equemene | } |
27 | 285 | equemene | } |
28 | 285 | equemene | |
29 | 285 | equemene | void MyDFTOpenACC(MYFLOAT *A, MYFLOAT *B, MYFLOAT *a, MYFLOAT *b,int size) |
30 | 285 | equemene | { |
31 | 285 | equemene | |
32 | 285 | equemene | #pragma acc data copy(a[0:size],b[0:size],A[0:size],B[0:size]) |
33 | 285 | equemene | #pragma acc parallel loop
|
34 | 285 | equemene | for (uint j=0;j<size;j++) |
35 | 285 | equemene | { |
36 | 285 | equemene | MYFLOAT At=0.,Bt=0.; |
37 | 285 | equemene | #pragma acc seq reduction(+: At) reduction(+: Bt)
|
38 | 285 | equemene | { |
39 | 285 | equemene | for (uint i=0; i<size;i++) |
40 | 285 | equemene | { |
41 | 285 | equemene | At+=a[i]*cos(2.*PI*(MYFLOAT)(j*i)/(MYFLOAT)size)-b[i]*sin(2.*PI*(MYFLOAT)(j*i)/(MYFLOAT)size); |
42 | 285 | equemene | Bt+=a[i]*sin(2.*PI*(MYFLOAT)(j*i)/(MYFLOAT)size)+b[i]*cos(2.*PI*(MYFLOAT)(j*i)/(MYFLOAT)size); |
43 | 285 | equemene | } |
44 | 285 | equemene | A[j]=At; |
45 | 285 | equemene | B[j]=Bt; |
46 | 285 | equemene | } |
47 | 285 | equemene | } |
48 | 285 | equemene | } |
49 | 285 | equemene | |
50 | 285 | equemene | int main(int argc,char *argv[]) |
51 | 285 | equemene | { |
52 | 285 | equemene | float *a,*b,*A,*B;
|
53 | 285 | equemene | int size=1024; |
54 | 285 | equemene | struct timeval tv1,tv2;
|
55 | 285 | equemene | |
56 | 285 | equemene | if (argc > 1) { |
57 | 285 | equemene | size=(int)atoll(argv[1]); |
58 | 285 | equemene | } |
59 | 285 | equemene | else {
|
60 | 285 | equemene | printf("\n\tPi : Estimate DFT\n\n\t\t#1 : size (default 1024)\n\n");
|
61 | 285 | equemene | } |
62 | 285 | equemene | |
63 | 285 | equemene | a=(MYFLOAT*)malloc(size*sizeof(MYFLOAT));
|
64 | 285 | equemene | b=(MYFLOAT*)malloc(size*sizeof(MYFLOAT));
|
65 | 285 | equemene | A=(MYFLOAT*)malloc(size*sizeof(MYFLOAT));
|
66 | 285 | equemene | B=(MYFLOAT*)malloc(size*sizeof(MYFLOAT));
|
67 | 285 | equemene | |
68 | 285 | equemene | for (int i=0;i<size;i++) |
69 | 285 | equemene | { |
70 | 285 | equemene | a[i]=1.;
|
71 | 285 | equemene | b[i]=1.;
|
72 | 285 | equemene | A[i]=0.;
|
73 | 285 | equemene | A[i]=0.;
|
74 | 285 | equemene | } |
75 | 285 | equemene | |
76 | 285 | equemene | /* gettimeofday(&tv1, NULL); */
|
77 | 285 | equemene | /* MyDFT(A,B,a,b,size); */
|
78 | 285 | equemene | /* gettimeofday(&tv2, NULL); */
|
79 | 285 | equemene | |
80 | 285 | equemene | MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
|
81 | 285 | equemene | (tv2.tv_usec-tv1.tv_usec))/1000000;
|
82 | 285 | equemene | |
83 | 285 | equemene | gettimeofday(&tv1, NULL);
|
84 | 285 | equemene | MyDFTOpenACC(A,B,a,b,size); |
85 | 285 | equemene | gettimeofday(&tv2, NULL);
|
86 | 285 | equemene | |
87 | 285 | equemene | MYFLOAT elapsedOpenACC=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
|
88 | 285 | equemene | (tv2.tv_usec-tv1.tv_usec))/1000000;
|
89 | 285 | equemene | |
90 | 285 | equemene | /* printf("A=["); */
|
91 | 285 | equemene | /* for (int i=0;i<size;i++) */
|
92 | 285 | equemene | /* { */
|
93 | 285 | equemene | /* printf("%.2f ",A[i]); */
|
94 | 285 | equemene | /* } */
|
95 | 285 | equemene | /* printf(" ]\n\n"); */
|
96 | 285 | equemene | |
97 | 285 | equemene | /* printf("B=["); */
|
98 | 285 | equemene | /* for (int i=0;i<size;i++) */
|
99 | 285 | equemene | /* { */
|
100 | 285 | equemene | /* printf("%.2f ",B[i]); */
|
101 | 285 | equemene | /* } */
|
102 | 285 | equemene | /* printf(" ]\n\n"); */
|
103 | 285 | equemene | |
104 | 285 | equemene | printf("\nA[0]=%.3f A[%i]=%.3f\n",A[0],size-1,A[size-1]); |
105 | 285 | equemene | printf("B[0]=%.3f B[%i]=%.3f\n\n",B[0],size-1,B[size-1]); |
106 | 285 | equemene | |
107 | 285 | equemene | printf("Elapsed Time: %.3f\n",elapsed);
|
108 | 285 | equemene | printf("OpenACC Elapsed Time: %.3f\n",elapsedOpenACC);
|
109 | 285 | equemene | |
110 | 285 | equemene | printf("NaiveRate: %.i\n",(int)((float)size/elapsed)); |
111 | 285 | equemene | printf("OpenACCRate: %.i\n",(int)((float)size/elapsedOpenACC)); |
112 | 285 | equemene | |
113 | 285 | equemene | free(a); |
114 | 285 | equemene | free(b); |
115 | 285 | equemene | free(A); |
116 | 285 | equemene | free(B); |
117 | 285 | equemene | } |