Révision 2

FFT/Makefile (revision 2)
1
SOURCE=FFT2D.c
2

  
3
CC=gcc
4
CFLAGS=-Wall -O3
5
LDFLAGS=-lm
6
CUDADIR=/opt/cuda
7
CUDASRC=$(CUDADIR)/src
8
CUDAINC=$(CUDADIR)/include
9
CUDALIB=$(CUDADIR)/lib64
10

  
11
ACML=/opt/acml
12
ACMLINC=$(ACML)/gfortran64_mp/include
13
ACMLLIB=$(ACML)/gfortran64_mp/lib
14

  
15
EXECUTABLE=fftw3 cufft
16

  
17
FORMAT=DOUBLE
18
#FORMAT=FLOAT
19

  
20
#DIRECTIVES=-D$(FORMAT) -DPRINT -DUNIT
21
#DIRECTIVES=-D$(FORMAT) -DUNIT -DRESULTS -DQUIET
22
DIRECTIVES=-DUNIT -DQUIET
23

  
24
all: $(EXECUTABLE)
25

  
26
fftw3: $(SOURCE)
27

  
28
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DFFTW3 $(LDFLAGS) \
29
		$(SOURCE) \
30
		-lm -lfftw3f_threads -lfftw3f -lpthread \
31
		-o $(SOURCE:.c=)_SP_$@
32

  
33
	$(CC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DFFTW3 $(LDFLAGS) \
34
		$(SOURCE) \
35
		-lm -lfftw3_threads -lfftw3 -lpthread \
36
		-o $(SOURCE:.c=)_DP_$@
37

  
38
cufft: $(SOURCE)
39

  
40
	$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DFLOAT \
41
		-DCUFFT $(LDFLAGS) \
42
		$(DIRECTIVES) $(SOURCE) -lcufft -o $(SOURCE:.c=)_SP_$@
43

  
44
	$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DDOUBLE \
45
		-DCUFFT $(LDFLAGS) \
46
		$(DIRECTIVES) $(SOURCE) -lcufft -o $(SOURCE:.c=)_DP_$@
47

  
48
clean: $(SOURCE)
49
	find . -name "$(SOURCE:.c=)_*" -exec rm {} \;
50
	find . -name "*~" -exec rm {} \;
51

  
52
check: $(EXECUTABLE)
53

  
54
	$(SOURCE:.c=)_SP_$(EXECUTABLE) 2 1
55

  
FFT/FFT2D.c (revision 2)
1
/* 
2
   Performs matrix multiply
3

  
4
   Thanks for help from aurel32@debian.org
5
*/
6

  
7
#include <stdio.h>
8
#include <math.h>
9
#include <stdlib.h>
10
#include <sys/time.h>
11
#include <string.h>
12

  
13
#ifdef CUFFT
14
#include <complex.h>
15
#include <cufft.h>
16
#else
17
#include <fftw3.h>
18
#endif
19

  
20
#ifdef DOUBLE
21
#define LENGTH double
22
#else
23
#define LENGTH float
24
#endif
25

  
26
// kind of plan estimations for FFTW3
27
#define ESTIMATE 1
28
#define MEASURE 2
29
#define PATIENT 3
30
#define EXHAUSTIVE 4
31

  
32
// Default values
33
#define NTHREADS 1
34
#define NLOOPS 10
35
#define DIMENSION 1024
36

  
37
// Redefine FFTW function calls 
38
#ifdef FLOAT
39
#define FFTW_COMPLEX fftwf_complex
40
#define FFTW_PLAN fftwf_plan
41
#define FFTW_INIT_THREADS fftwf_init_threads
42
#define FFTW_PLAN_WITH_NTHREADS fftwf_plan_with_nthreads
43
#define FFTW_MALLOC fftwf_malloc
44
#define	FFTW_PLAN_DFT_2D fftwf_plan_dft_2d
45
#define FFTW_EXECUTE fftwf_execute
46
#define FFTW_DESTROY_PLAN fftwf_destroy_plan
47
#define FFTW_FREE fftwf_free
48
#elif DOUBLE
49
#define FFTW_COMPLEX fftw_complex
50
#define FFTW_PLAN fftw_plan
51
#define FFTW_INIT_THREADS fftw_init_threads
52
#define FFTW_PLAN_WITH_NTHREADS fftw_plan_with_nthreads
53
#define FFTW_MALLOC fftw_malloc
54
#define	FFTW_PLAN_DFT_2D fftw_plan_dft_2d
55
#define FFTW_EXECUTE fftw_execute
56
#define FFTW_DESTROY_PLAN fftw_destroy_plan
57
#define FFTW_FREE fftw_free
58
#endif
59

  
60
int main(int argc,char **argv)
61
{
62
  int i;
63
  int nloops=NLOOPS,nthreads=NTHREADS,size;
64
  int type_plan=0;
65
  double dplan=0.,dcompute=0.;
66
  
67
  struct timeval tv1,tv2;
68
  struct timezone tz;
69
    
70
  if ((argc==1)||
71
      (strcmp(argv[1],"-h")==0)||
72
      (strcmp(argv[1],"--help")==0))
73
    {
74
      printf("\n\tCalculate fake FFTW images\n\n"
75
	     "\tParameters to give :\n\n"
76
	     "\t1> Plan ESTIMATE MEASURE PATIENT EXHAUSTIVE\n"
77
	     "\t2> Size of Image (2^n)\n"
78
	     "\t3> Number of Loops (integer)\n"
79
	     "\t4> Number of Threads (integer)\n\n");
80
      
81
      return(0);
82
    }
83

  
84
  // initialization of FFTW threads
85
  
86
  if (atoi(argv[4])<1) {
87
    nthreads=1;
88
  }
89
  else {
90
    nthreads=atoi(argv[4]);
91
  }
92
  
93
  if (atoi(argv[3])<1) {
94
    nloops=1;
95
  }
96
  else {
97
    nloops=atoi(argv[3]);
98
  }
99
  
100
  if (atoi(argv[2])<1) {
101
    size=2;
102
  }
103
  else {
104
    size=atoi(argv[2]);
105
  }
106
    
107
  printf("%i %i\n",size,nthreads);
108
        
109
#ifdef CUFFT
110
  cufftHandle plan;
111
  cufftComplex *in, *devin;
112

  
113
  size_t arraySize = sizeof(cufftComplex)*size*size;
114
  cudaMallocHost((void**) &in, arraySize);
115
  cudaMalloc((void**) &devin, arraySize);
116
    
117
  // initialisation of arrays
118
  for (i = 0; i < size*size; i++) {
119
    in[i].x=1.;
120
    in[i].y=0.;
121
  }
122
  in[0].x=1.;
123
  in[0].y=0.;
124

  
125
  // First timer to start plan
126
  gettimeofday(&tv1, &tz);
127
  
128
  // Plan & copy to device
129
  cufftPlan2d(&plan, size, size, CUFFT_C2C);
130
  cudaMemcpy(devin, in, arraySize, cudaMemcpyHostToDevice);
131

  
132
  // Second timer to end plan      
133
  gettimeofday(&tv2, &tz);	
134
  dplan=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +		\
135
		     (tv2.tv_usec-tv1.tv_usec))/1000000.;  
136

  
137
  // First timer to start process
138
  gettimeofday(&tv1, &tz);
139

  
140
  // Process
141
  for (i=0;i<nloops;i++) {
142
    cufftExecC2C(plan, devin, devin, CUFFT_FORWARD);
143
  }
144
    
145
  cudaMemcpy(in, devin, arraySize, cudaMemcpyDeviceToHost);
146

  
147
  printf("%i=%f,%f\n",0,in[0].x,in[0].y);
148

  
149
  // Second timer to end process
150
  gettimeofday(&tv2, &tz);
151

  
152
  cufftDestroy(plan);
153
  cudaFreeHost(in);
154
  cudaFree(devin);
155

  
156
#elif FFTW3      
157

  
158
  FFTW_COMPLEX *in;
159
  FFTW_PLAN p;
160

  
161
  FFTW_INIT_THREADS();
162
  FFTW_PLAN_WITH_NTHREADS(nthreads);
163

  
164
  in = (FFTW_COMPLEX*) FFTW_MALLOC(sizeof(FFTW_COMPLEX)*size*size);
165

  
166
  // First timer to start plan
167
  gettimeofday(&tv1, &tz);
168
  
169
  if (strcmp(argv[1],"MEASURE")==0) {
170
    p = FFTW_PLAN_DFT_2D(size,size,in,in,FFTW_FORWARD,FFTW_MEASURE);
171
    type_plan=MEASURE;
172
  }
173
  
174
  if (strcmp(argv[1],"PATIENT")==0) {
175
    p = FFTW_PLAN_DFT_2D(size,size,in,in,FFTW_FORWARD,FFTW_PATIENT);
176
    type_plan=PATIENT;
177
  }
178
  
179
  if (strcmp(argv[1],"EXHAUSTIVE")==0) {
180
    p = FFTW_PLAN_DFT_2D(size,size,in,in,FFTW_FORWARD,FFTW_EXHAUSTIVE);
181
    type_plan=EXHAUSTIVE;
182
  }
183
  
184
  if ((type_plan==0)||(strcmp(argv[1],"ESTIMATE")==0)) {
185
    p = FFTW_PLAN_DFT_2D(size,size,in,in,FFTW_FORWARD,FFTW_ESTIMATE);
186
    type_plan=ESTIMATE;
187
  }
188
  // Second timer to end plan      
189
  gettimeofday(&tv2, &tz);	
190
  dplan=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +		\
191
		 (tv2.tv_usec-tv1.tv_usec))/1000000.;  
192
  
193
  // initialisation of arrays
194
  for (i = 0; i < size*size; i++) {
195
    (in[i])[0]=0.;
196
    (in[i])[1]=0.;
197
  }
198
  (in[0])[0]=1.;
199
  (in[0])[1]=0.;
200
  
201
  // First timer to start process
202
  gettimeofday(&tv1, &tz);
203
  for (i=0;i<nloops;i++) {
204
    FFTW_EXECUTE(p);
205
  }
206
  // Second timer to end process
207
  gettimeofday(&tv2, &tz);
208
  FFTW_DESTROY_PLAN(p);
209
  FFTW_FREE(in); 
210
#endif
211

  
212
  dcompute=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +		\
213
		    (tv2.tv_usec-tv1.tv_usec))/1000000./(double)nloops;  
214
  
215
  printf("%s,%i,%i,%i,%i,%lf,%lf\n",argv[1],
216
	 size,size,nthreads,nloops,dplan,dcompute);
217
  
218
  /* if ((file=fopen(argv[7],"a"))==NULL) */
219
  /* 	{ */
220
  /* 	  printf("Impossible to append result in %s file\n", */
221
  /* 		 argv[7]); */
222
  /* 	  return(0); */
223
  /* 	} */
224
  
225
  /* fprintf(file,"%s,%i,%i,%i,%i,%lf,%lf\n", */
226
  /* 	  argv[1],size,size,nthreads,nloops,dplan,dcompute); */
227
  
228
  /* fclose(file); */
229
  
230
  return 0;
231
}

Formats disponibles : Unified diff