Révision 285
ETSN/MyDFT_openacc.c (revision 285) | ||
---|---|---|
1 |
/* Simple Discrete Fourier Transform implemented in C and OpenACC/C */ |
|
2 |
/* compilation with : gcc -O3 -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35 -lm" -o MyDFT_openacc MyDFT_openacc.c -lm */ |
|
3 |
|
|
4 |
#include <math.h> |
|
5 |
#include <stdio.h> |
|
6 |
#include <stdlib.h> |
|
7 |
#include <openacc.h> |
|
8 |
#include <sys/time.h> |
|
9 |
|
|
10 |
#define PI 3.141592653589793 |
|
11 |
|
|
12 |
#define MYFLOAT float |
|
13 |
|
|
14 |
void MyDFT(MYFLOAT *A, MYFLOAT *B, MYFLOAT *a, MYFLOAT *b,int size) |
|
15 |
{ |
|
16 |
for (uint j=0;j<size;j++) |
|
17 |
{ |
|
18 |
MYFLOAT At=0.,Bt=0.; |
|
19 |
for (uint i=0; i<size;i++) |
|
20 |
{ |
|
21 |
At+=a[i]*cos(2.*PI*(MYFLOAT)(j*i)/(MYFLOAT)size)-b[i]*sin(2.*PI*(MYFLOAT)(j*i)/(MYFLOAT)size); |
|
22 |
Bt+=a[i]*sin(2.*PI*(MYFLOAT)(j*i)/(MYFLOAT)size)+b[i]*cos(2.*PI*(MYFLOAT)(j*i)/(MYFLOAT)size); |
|
23 |
} |
|
24 |
A[j]=At; |
|
25 |
B[j]=Bt; |
|
26 |
} |
|
27 |
} |
|
28 |
|
|
29 |
void MyDFTOpenACC(MYFLOAT *A, MYFLOAT *B, MYFLOAT *a, MYFLOAT *b,int size) |
|
30 |
{ |
|
31 |
|
|
32 |
#pragma acc data copy(a[0:size],b[0:size],A[0:size],B[0:size]) |
|
33 |
#pragma acc parallel loop |
|
34 |
for (uint j=0;j<size;j++) |
|
35 |
{ |
|
36 |
MYFLOAT At=0.,Bt=0.; |
|
37 |
#pragma acc seq reduction(+: At) reduction(+: Bt) |
|
38 |
{ |
|
39 |
for (uint i=0; i<size;i++) |
|
40 |
{ |
|
41 |
At+=a[i]*cos(2.*PI*(MYFLOAT)(j*i)/(MYFLOAT)size)-b[i]*sin(2.*PI*(MYFLOAT)(j*i)/(MYFLOAT)size); |
|
42 |
Bt+=a[i]*sin(2.*PI*(MYFLOAT)(j*i)/(MYFLOAT)size)+b[i]*cos(2.*PI*(MYFLOAT)(j*i)/(MYFLOAT)size); |
|
43 |
} |
|
44 |
A[j]=At; |
|
45 |
B[j]=Bt; |
|
46 |
} |
|
47 |
} |
|
48 |
} |
|
49 |
|
|
50 |
int main(int argc,char *argv[]) |
|
51 |
{ |
|
52 |
float *a,*b,*A,*B; |
|
53 |
int size=1024; |
|
54 |
struct timeval tv1,tv2; |
|
55 |
|
|
56 |
if (argc > 1) { |
|
57 |
size=(int)atoll(argv[1]); |
|
58 |
} |
|
59 |
else { |
|
60 |
printf("\n\tPi : Estimate DFT\n\n\t\t#1 : size (default 1024)\n\n"); |
|
61 |
} |
|
62 |
|
|
63 |
a=(MYFLOAT*)malloc(size*sizeof(MYFLOAT)); |
|
64 |
b=(MYFLOAT*)malloc(size*sizeof(MYFLOAT)); |
|
65 |
A=(MYFLOAT*)malloc(size*sizeof(MYFLOAT)); |
|
66 |
B=(MYFLOAT*)malloc(size*sizeof(MYFLOAT)); |
|
67 |
|
|
68 |
for (int i=0;i<size;i++) |
|
69 |
{ |
|
70 |
a[i]=1.; |
|
71 |
b[i]=1.; |
|
72 |
A[i]=0.; |
|
73 |
A[i]=0.; |
|
74 |
} |
|
75 |
|
|
76 |
/* gettimeofday(&tv1, NULL); */ |
|
77 |
/* MyDFT(A,B,a,b,size); */ |
|
78 |
/* gettimeofday(&tv2, NULL); */ |
|
79 |
|
|
80 |
MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
81 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
82 |
|
|
83 |
gettimeofday(&tv1, NULL); |
|
84 |
MyDFTOpenACC(A,B,a,b,size); |
|
85 |
gettimeofday(&tv2, NULL); |
|
86 |
|
|
87 |
MYFLOAT elapsedOpenACC=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L + |
|
88 |
(tv2.tv_usec-tv1.tv_usec))/1000000; |
|
89 |
|
|
90 |
/* printf("A=["); */ |
|
91 |
/* for (int i=0;i<size;i++) */ |
|
92 |
/* { */ |
|
93 |
/* printf("%.2f ",A[i]); */ |
|
94 |
/* } */ |
|
95 |
/* printf(" ]\n\n"); */ |
|
96 |
|
|
97 |
/* printf("B=["); */ |
|
98 |
/* for (int i=0;i<size;i++) */ |
|
99 |
/* { */ |
|
100 |
/* printf("%.2f ",B[i]); */ |
|
101 |
/* } */ |
|
102 |
/* printf(" ]\n\n"); */ |
|
103 |
|
|
104 |
printf("\nA[0]=%.3f A[%i]=%.3f\n",A[0],size-1,A[size-1]); |
|
105 |
printf("B[0]=%.3f B[%i]=%.3f\n\n",B[0],size-1,B[size-1]); |
|
106 |
|
|
107 |
printf("Elapsed Time: %.3f\n",elapsed); |
|
108 |
printf("OpenACC Elapsed Time: %.3f\n",elapsedOpenACC); |
|
109 |
|
|
110 |
printf("NaiveRate: %.i\n",(int)((float)size/elapsed)); |
|
111 |
printf("OpenACCRate: %.i\n",(int)((float)size/elapsedOpenACC)); |
|
112 |
|
|
113 |
free(a); |
|
114 |
free(b); |
|
115 |
free(A); |
|
116 |
free(B); |
|
117 |
} |
|
118 |
|
Formats disponibles : Unified diff