Révision 303

ETSN/MyDFT2D.py (revision 303)
1
#!/usr/bin/env python3
2

  
3
import numpy as np
4
import pyopencl as cl
5
from numpy import pi,cos,sin
6

  
7
# Naive Discrete Fourier Transform
8
def MyDFT(x,y):
9
    size=x.shape[0]
10
    X=np.zeros(x.shape).astype(np.float32)
11
    Y=np.zeros(x.shape).astype(np.float32)
12
    for k in range(size):
13
        for l in range(size):
14
            for i in range(size):
15
                for j in range(size):
16
                    t=np.float32(2*pi*((i*k)/size+(l*j)/size))
17
                    X[k,l]+=x[i,j]*cos(t)+y[i,j]*sin(t)
18
                    Y[k,l]+=-x[i,j]*sin(t)+y[i,j]*cos(t)
19
    return(X,Y)
20

  
21
#
22
def NumpyFFT(x,y):
23
    xy=np.csingle(x+1.j*y)
24
    XY=np.fft.fft2(xy)
25
    return(XY.real,XY.imag)
26

  
27
def OpenCLFFT(x,y,device):
28
    import pyopencl as cl
29
    import pyopencl.array as cla
30
    import time
31
    import gpyfft
32
    from gpyfft.fft import FFT
33

  
34
    TimeIn=time.time()
35
    Id=0
36
    HasXPU=False
37
    for platform in cl.get_platforms():
38
        for device in platform.get_devices():
39
            if Id==Device:
40
                XPU=device
41
                print("CPU/GPU selected: ",device.name.lstrip())
42
                HasXPU=True
43
            Id+=1
44
            # print(Id)
45

  
46
    if HasXPU==False:
47
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
48
        sys.exit()           
49
    Elapsed=time.time()-TimeIn
50
    print("Selection of device : %.3f" % Elapsed)
51

  
52
    TimeIn=time.time()
53
    try:
54
        ctx = cl.Context(devices=[XPU])
55
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
56
    except:
57
        print("Crash during context creation")
58
    Elapsed=time.time()-TimeIn
59
    print("Context initialisation : %.3f" % Elapsed)
60

  
61
    TimeIn=time.time()
62
    XY_gpu = cla.to_device(queue, np.csingle(x+1.j*y))
63
    Elapsed=time.time()-TimeIn
64
    print("Copy from Host to Device : %.3f" % Elapsed)
65

  
66
    TimeIn=time.time()
67
    transform = FFT(ctx, queue, XY_gpu, axes=(0,1))
68
    event, = transform.enqueue()
69
    event.wait()
70
    Elapsed=time.time()-TimeIn
71
    print("Compute FFT : %.3f" % Elapsed)
72
    TimeIn=time.time()
73
    XY = XY_gpu.get()
74
    Elapsed=time.time()-TimeIn
75
    print("Copy from Device to Host : %.3f" % Elapsed)
76

  
77
    return(XY.real,XY.imag)
78

  
79
# # Numpy Discrete Fourier Transform
80
# def NumpyDFT(x,y):
81
#     size=x.shape[0]
82
#     X=np.zeros([size,size]).astype(np.float32)
83
#     Y=np.zeros([size,size]).astype(np.float32)
84
#     nj=np.multiply(2.0*np.pi/size,np.arange(size)).astype(np.float32)
85
#     for k in range(size):
86
#         for l in range(size):
87
#         X[k]=np.sum(np.subtract(np.multiply(np.cos(k*nj),x),np.multiply(np.sin(k*nj),y)))
88
#         Y[k]=np.sum(np.add(np.multiply(np.sin(k*nj),x),np.multiply(np.cos(k*nj),y)))
89
#     return(X,Y)
90

  
91
# Numba Discrete Fourier Transform
92
import numba
93
@numba.njit(parallel=True)
94
def NumbaDFT(x,y):
95
    size=x.shape[0]
96
    X=np.zeros(x.shape).astype(np.float32)
97
    Y=np.zeros(y.shape).astype(np.float32)
98
    for k in numba.prange(size):
99
        for l in numba.prange(size):
100
            for i in numba.prange(size):
101
                for j in numba.prange(size):
102
                    t=np.float32(2*pi*((i*k)/size+(l*j)/size))
103
                    X[k,l]+=x[i,j]*cos(t)+y[i,j]*sin(t)
104
                    Y[k,l]+=-x[i,j]*sin(t)+y[i,j]*cos(t)
105
    return(X,Y)
106

  
107
# OpenCL complete operation
108
def OpenCLDFT(a_np,b_np,Device):
109

  
110
    Id=0
111
    HasXPU=False
112
    for platform in cl.get_platforms():
113
        for device in platform.get_devices():
114
            if Id==Device:
115
                XPU=device
116
                print("CPU/GPU selected: ",device.name.lstrip())
117
                HasXPU=True
118
            Id+=1
119
            # print(Id)
120

  
121
    if HasXPU==False:
122
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
123
        sys.exit()           
124

  
125
    try:
126
        ctx = cl.Context(devices=[XPU])
127
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
128
    except:
129
        print("Crash during context creation")
130

  
131
    TimeIn=time.time()
132
    # Copy from Host to Device using pointers
133
    mf = cl.mem_flags
134
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
135
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
136
    Elapsed=time.time()-TimeIn
137
    print("Copy from Host 2 Device : %.3f" % Elapsed)
138

  
139
    TimeIn=time.time()
140
    # Definition of kernel under OpenCL
141
    prg = cl.Program(ctx, """
142

  
143
#define PI 3.141592653589793
144

  
145
__kernel void MyDFT(
146
    __global const float *a_g, __global const float *b_g, __global float *A_g, __global float *B_g)
147
{
148
  int gidx = get_global_id(0);
149
  int gidy = get_global_id(1);
150
  uint size = get_global_size(0);
151
  float A=0.,B=0.;
152
  for (uint i=0; i<size;i++) for (uint j=0; j<size;j++) 
153
  {
154
     float angle=2.*PI*((float)(gidx*i)/(float)size+
155
                        (float)(gidy*j)/(float)size);
156
     A+=a_g[i+size*j]*cos(angle)+b_g[i+size*j]*sin(angle);
157
     B+=-a_g[i+size*j]*sin(angle)+b_g[i+size*j]*cos(angle);
158
  }
159
  A_g[gidx+size*gidy]=A;
160
  B_g[gidx+size*gidy]=B;
161
}
162
""").build()
163
    Elapsed=time.time()-TimeIn
164
    print("Building kernels : %.3f" % Elapsed)
165
    
166
    TimeIn=time.time()
167
    # Memory allocation on Device for result
168
    A_ocl = np.empty_like(a_np)
169
    B_ocl = np.empty_like(a_np)
170
    Elapsed=time.time()-TimeIn
171
    print("Allocation on Host for results : %.3f" % Elapsed)
172

  
173
    A_g = cl.Buffer(ctx, mf.WRITE_ONLY, A_ocl.nbytes)
174
    B_g = cl.Buffer(ctx, mf.WRITE_ONLY, B_ocl.nbytes)
175
    Elapsed=time.time()-TimeIn
176
    print("Allocation on Device for results : %.3f" % Elapsed)
177

  
178
    TimeIn=time.time()
179
    # Synthesis of function "sillysum" inside Kernel Sources
180
    knl = prg.MyDFT  # Use this Kernel object for repeated calls
181
    Elapsed=time.time()-TimeIn
182
    print("Synthesis of kernel : %.3f" % Elapsed)
183

  
184
    TimeIn=time.time()
185
    # Call of kernel previously defined 
186
    CallCL=knl(queue, a_np.shape, None, a_g, b_g, A_g, B_g)
187
    # 
188
    CallCL.wait()
189
    Elapsed=time.time()-TimeIn
190
    print("Execution of kernel : %.3f" % Elapsed)
191

  
192
    TimeIn=time.time()
193
    # Copy from Device to Host
194
    cl.enqueue_copy(queue, A_ocl, A_g)
195
    cl.enqueue_copy(queue, B_ocl, B_g)
196
    Elapsed=time.time()-TimeIn
197
    print("Copy from Device 2 Host : %.3f" % Elapsed)
198

  
199
    # Liberation of memory
200
    a_g.release()
201
    b_g.release()
202
    A_g.release()
203
    B_g.release()
204
    
205
    return(A_ocl,B_ocl)
206

  
207
# CUDA complete operation
208
def CUDADFT(a_np,b_np,Device,Threads):
209
    # import pycuda.autoinit
210
    import pycuda.driver as drv
211
    from pycuda.compiler import SourceModule
212
    
213
    try:
214
        # For PyCUDA import
215
        import pycuda.driver as cuda
216
        from pycuda.compiler import SourceModule
217
        
218
        cuda.init()
219
        for Id in range(cuda.Device.count()):
220
            if Id==Device:
221
                XPU=cuda.Device(Id)
222
                print("GPU selected %s" % XPU.name())
223
        print
224

  
225
    except ImportError:
226
        print("Platform does not seem to support CUDA")
227

  
228
    Context=XPU.make_context()
229
        
230
    TimeIn=time.time()
231
    mod = SourceModule("""
232

  
233
#define PI 3.141592653589793
234

  
235
__global__ void MyDFT(float *A_g, float *B_g, const float *a_g,const float *b_g)
236
{
237
  const int gidx = blockIdx.x*blockDim.x+threadIdx.x;
238
  const int gidy = blockIdx.y*blockDim.y+threadIdx.y;
239
  uint sizex = gridDim.x*blockDim.x;
240
  uint sizey = gridDim.y*blockDim.y;
241
  uint size = gridDim.x*blockDim.x*gridDim.y*blockDim.y;
242
  float A=0.,B=0.;
243
  for (uint i=0; i<sizex;i++) for (uint j=0; j<sizey;j++) 
244
  {
245
     float angle=2.*PI*((float)(gidx*i)/(float)sizex+
246
                        (float)(gidy*j)/(float)sizey);
247
     A+=a_g[i+sizex*j]*cos(angle)+b_g[i+sizex*j]*sin(angle);
248
     B+=-a_g[i+sizex*j]*sin(angle)+b_g[i+sizex*j]*cos(angle);
249
  }
250
  A_g[gidx+sizey*gidy]=A;
251
  B_g[gidx+sizey*gidy]=B;
252
}
253

  
254
""")
255
    Elapsed=time.time()-TimeIn
256
    print("Definition of kernel : %.3f" % Elapsed)
257

  
258
    TimeIn=time.time()
259
    MyDFT = mod.get_function("MyDFT")
260
    Elapsed=time.time()-TimeIn
261
    print("Synthesis of kernel : %.3f" % Elapsed)
262

  
263
    TimeIn=time.time()
264
    A_np = np.zeros_like(a_np)
265
    B_np = np.zeros_like(a_np)
266
    Elapsed=time.time()-TimeIn
267
    print("Allocation on Host for results : %.3f" % Elapsed)
268

  
269
    Size=a_np.shape
270
    if (Size[0] % Threads != 0):
271
        print("Impossible : %i not multiple of %i..." % (Threads,Size[0]) )
272
        TimeIn=time.time()
273
        MyDFT(drv.Out(A_np), drv.Out(B_np), drv.In(a_np), drv.In(b_np),
274
              block=(1,1,1), grid=Size)
275
        Elapsed=time.time()-TimeIn
276
        print("Execution of kernel : %.3f" % Elapsed)
277
    else:
278
        Blocks=(int(Size[0]/Threads),int(Size[1]/Threads));
279
        TimeIn=time.time()
280
        MyDFT(drv.Out(A_np), drv.Out(B_np), drv.In(a_np), drv.In(b_np),
281
              block=(Threads,Threads,1), grid=Blocks)
282
        Elapsed=time.time()-TimeIn
283
        print("Execution of kernel : %.3f" % Elapsed)
284
        
285
    Context.pop()
286
    Context.detach()
287
    
288
    return(A_np,B_np)
289

  
290
import sys
291
import time
292

  
293
if __name__=='__main__':
294

  
295
    SIZE=4
296
    Device=0
297
    NaiveMethod=False
298
    NumpyMethod=False
299
    NumpyFFTMethod=True
300
    NumbaMethod=False
301
    OpenCLMethod=False
302
    OpenCLFFTMethod=True
303
    CUDAMethod=False
304
    Threads=1
305
    Verbose=False
306
    
307
    import getopt
308

  
309
    HowToUse='%s -v [Verbose] -n [Naive] -y [numpYFFT] -a [numbA] -o [OpenCL] -g [OpenCLFFT] -c [CUDA] -s <SizeOfVector> -d <DeviceId> -t <threads>'
310
    
311
    try:
312
        opts, args = getopt.getopt(sys.argv[1:],"gvnyaochs:d:t:",["size=","device="])
313
    except getopt.GetoptError:
314
        print(HowToUse % sys.argv[0])
315
        sys.exit(2)
316

  
317
    # List of Devices
318
    Devices=[]
319
    Alu={}
320
        
321
    for opt, arg in opts:
322
        if opt == '-h':
323
            print(HowToUse % sys.argv[0])
324

  
325
            print("\nInformations about devices detected under OpenCL API:")
326
            # For PyOpenCL import
327
            try:
328
                import pyopencl as cl
329
                Id=0
330
                for platform in cl.get_platforms():
331
                    for device in platform.get_devices():
332
                        #deviceType=cl.device_type.to_string(device.type)
333
                        deviceType="xPU"
334
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
335
                        Id=Id+1
336

  
337
            except:
338
                print("Your platform does not seem to support OpenCL")
339

  
340
            print("\nInformations about devices detected under CUDA API:")
341
            # For PyCUDA import
342
            try:
343
                import pycuda.driver as cuda
344
                cuda.init()
345
                for Id in range(cuda.Device.count()):
346
                    device=cuda.Device(Id)
347
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
348
                print
349
            except:
350
                print("Your platform does not seem to support CUDA")
351
        
352
            sys.exit()
353
        
354
        elif opt in ("-d", "--device"):
355
            Device=int(arg)
356
        elif opt in ("-s", "--size"):
357
            SIZE = int(arg)
358
        elif opt in ("-t", "--threads"):
359
            Threads = int(arg)
360
        elif opt in ("-n"):
361
            NaiveMethod=True
362
        elif opt in ("-y"):
363
            NumpyFFTMethod=True
364
        elif opt in ("-a"):
365
            NumbaMethod=True
366
        elif opt in ("-o"):
367
            OpenCLMethod=True
368
        elif opt in ("-g"):
369
            OpenCLFFTMethod=True
370
        elif opt in ("-c"):
371
            CUDAMethod=True
372
        elif opt in ("-v"):
373
            Verbose=True
374

  
375
    print("Device Selection : %i" % Device)
376
    print("Size of complex vector : %i" % SIZE)
377
    print("Verbosity %s " % Verbose )
378
    print("DFT Naive computation %s " % NaiveMethod )
379
    print("DFT Numpy computation %s " % NumpyMethod )
380
    print("FFT Numpy computation %s " % NumpyFFTMethod )
381
    print("DFT Numba computation %s " % NumbaMethod )
382
    print("DFT OpenCL computation %s " % OpenCLMethod )
383
    print("FFT OpenCL computation %s " % OpenCLFFTMethod )
384
    print("DFT CUDA computation %s " % CUDAMethod )
385

  
386
    if CUDAMethod:
387
        try:
388
            # For PyCUDA import
389
            import pycuda.driver as cuda
390
            
391
            cuda.init()
392
            for Id in range(cuda.Device.count()):
393
                device=cuda.Device(Id)
394
                print("Device #%i of type GPU : %s" % (Id,device.name()))
395
                if Id in Devices:
396
                    Alu[Id]='GPU'
397
            
398
        except ImportError:
399
            print("Platform does not seem to support CUDA")
400

  
401
    if OpenCLMethod:
402
        try:
403
            # For PyOpenCL import
404
            import pyopencl as cl
405
            Id=0
406
            for platform in cl.get_platforms():
407
                for device in platform.get_devices():
408
                    #deviceType=cl.device_type.to_string(device.type)
409
                    deviceType="xPU"
410
                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
411

  
412
                    if Id in Devices:
413
                    # Set the Alu as detected Device Type
414
                        Alu[Id]=deviceType
415
                    Id=Id+1
416
        except ImportError:
417
            print("Platform does not seem to support OpenCL")
418

  
419
    
420
        
421
    a_np = np.ones([SIZE,SIZE]).astype(np.float32)
422
    b_np = np.ones([SIZE,SIZE]).astype(np.float32)
423
    # a_np = np.zeros([SIZE,SIZE]).astype(np.float32)
424
    # b_np = np.zeros([SIZE,SIZE]).astype(np.float32)
425
    # a_np[0,0]=1;
426

  
427
    np.set_printoptions(precision=1,suppress=True)
428

  
429
    # print(a_np+1.j*b_np)
430
    
431
    # print(np.fft.fft2(a_np+1.j*b_np))
432
    
433
    C_np = np.zeros([SIZE,SIZE]).astype(np.float32)
434
    D_np = np.zeros([SIZE,SIZE]).astype(np.float32)
435
    C_np[0,0] = np.float32(SIZE*SIZE)
436
    D_np[0,0] = np.float32(SIZE*SIZE)
437
    
438
    # Native & Naive Implementation
439
    if NaiveMethod:
440
        print("Performing naive implementation")
441
        TimeIn=time.time()
442
        c_np,d_np=MyDFT(a_np,b_np)
443
        NativeElapsed=time.time()-TimeIn
444
        NativeRate=int(SIZE*SIZE/NativeElapsed)
445
        print("NativeElapsed: %i" % NativeElapsed)
446
        print("NativeRate: %i" % NativeRate)
447
        print("Precision: ",np.linalg.norm(c_np-C_np),
448
              np.linalg.norm(d_np-D_np))
449
        if Verbose:
450
            print(c_np+1.j*d_np)
451
        
452
    # Native & Numpy Implementation
453
    if NumpyFFTMethod:
454
        print("Performing Numpy FFT implementation")
455
        TimeIn=time.time()
456
        e_np,f_np=NumpyFFT(a_np,b_np)
457
        NumpyFFTElapsed=time.time()-TimeIn
458
        NumpyFFTRate=int(SIZE*SIZE/NumpyFFTElapsed)
459
        print("NumpyFFTElapsed: %i" % NumpyFFTElapsed)
460
        print("NumpyFFTRate: %i" % NumpyFFTRate)
461
        print("Precision: ",np.linalg.norm(e_np-C_np),
462
              np.linalg.norm(f_np-D_np)) 
463
        if Verbose:
464
            print(e_np+1.j*f_np)
465
        
466
    # Native & Numba Implementation
467
    if NumbaMethod:
468
        print("Performing Numba implementation")
469
        TimeIn=time.time()
470
        g_np,h_np=NumbaDFT(a_np,b_np)
471
        NumbaElapsed=time.time()-TimeIn
472
        NumbaRate=int(SIZE*SIZE/NumbaElapsed)
473
        print("NumbaElapsed: %i" % NumbaElapsed)
474
        print("NumbaRate: %i" % NumbaRate)
475
        print("Precision: ",np.linalg.norm(g_np-C_np),
476
              np.linalg.norm(h_np-D_np)) 
477
        if Verbose:
478
            print(g_np+1.j*h_np)
479
    
480
    # OpenCL Implementation
481
    if OpenCLMethod:
482
        print("Performing OpenCL implementation")
483
        TimeIn=time.time()
484
        i_np,j_np=OpenCLDFT(a_np,b_np,Device)
485
        OpenCLElapsed=time.time()-TimeIn
486
        OpenCLRate=int(SIZE*SIZE/OpenCLElapsed)
487
        print("OpenCLElapsed: %i" % OpenCLElapsed)
488
        print("OpenCLRate: %i" % OpenCLRate)
489
        print("Precision: ",np.linalg.norm(i_np-C_np),
490
              np.linalg.norm(j_np-D_np))
491
        if Verbose:
492
            print(i_np+1.j*j_np)
493
        
494
    # CUDA Implementation
495
    if CUDAMethod:
496
        print("Performing CUDA implementation")
497
        TimeIn=time.time()
498
        k_np,l_np=CUDADFT(a_np,b_np,Device,Threads)
499
        CUDAElapsed=time.time()-TimeIn
500
        CUDARate=int(SIZE*SIZE/CUDAElapsed)
501
        print("CUDAElapsed: %i" % CUDAElapsed)
502
        print("CUDARate: %i" % CUDARate)
503
        print("Precision: ",np.linalg.norm(k_np-C_np),
504
              np.linalg.norm(l_np-D_np)) 
505
        if Verbose:
506
            print(k_np+1.j*l_np)
507

  
508
    # OpenCL Implementation
509
    if OpenCLFFTMethod:
510
        print("Performing OpenCL FFT implementation")
511
        TimeIn=time.time()
512
        m_np,n_np=OpenCLFFT(a_np,b_np,Device)
513
        OpenCLFFTElapsed=time.time()-TimeIn
514
        OpenCLFFTRate=int(SIZE*SIZE/OpenCLFFTElapsed)
515
        print("OpenCLFFTElapsed: %i" % OpenCLFFTElapsed)
516
        print("OpenCLFFTRate: %i" % OpenCLFFTRate)
517
        print("Precision: ",np.linalg.norm(m_np-C_np),
518
              np.linalg.norm(n_np-D_np))
519
        if Verbose:
520
            print(m_np+1.j*n_np)
521
        
0 522

  
ETSN/MyDFT_10.py (revision 303)
8 8
def NumpyFFT(x,y):
9 9
    xy=np.csingle(x+1.j*y)
10 10
    XY=np.fft.fft(xy)
11
    print(XY)
12 11
    return(XY.real,XY.imag)
13 12

  
14 13
#
......
52 51
    print("Copy from Host to Device : %.3f" % Elapsed)
53 52

  
54 53
    TimeIn=time.time()
55
    transform = FFT(ctx, queue, XY_gpu)   
54
    transform = FFT(ctx, queue, XY_gpu)
56 55
    event, = transform.enqueue()
57 56
    event.wait()
58 57
    Elapsed=time.time()-TimeIn
......
61 60
    XY = XY_gpu.get()
62 61
    Elapsed=time.time()-TimeIn
63 62
    print("Copy from Device to Host : %.3f" % Elapsed)
64
    print(XY)
65 63
    return(XY.real,XY.imag)
66 64

  
67 65
# Naive Discrete Fourier Transform
......
283 281
    Device=0
284 282
    NaiveMethod=False
285 283
    NumpyFFTMethod=True
286
    OpenCLFFTMethod=False
284
    OpenCLFFTMethod=True
287 285
    NumpyMethod=False
288 286
    NumbaMethod=False
289 287
    OpenCLMethod=False
290
    CUDAMethod=True
288
    CUDAMethod=False
291 289
    Threads=1
290
    Verbose=True
292 291
    
293 292
    import getopt
294 293

  
295 294
    HowToUse='%s -n [Naive] -y [numpY] -a [numbA] -o [OpenCL] -c [CUDA] -s <SizeOfVector> -d <DeviceId> -t <threads>'
296 295
    
297 296
    try:
298
        opts, args = getopt.getopt(sys.argv[1:],"nyaochs:d:t:",["size=","device="])
297
        opts, args = getopt.getopt(sys.argv[1:],"vnyaochs:d:t:",["size=","device="])
299 298
    except getopt.GetoptError:
300 299
        print(HowToUse % sys.argv[0])
301 300
        sys.exit(2)
......
361 360
    print("FFT Numpy computation %s " % NumpyFFTMethod )
362 361
    print("DFT Numba computation %s " % NumbaMethod )
363 362
    print("DFT OpenCL computation %s " % OpenCLMethod )
363
    print("FFT OpenCL computation %s " % OpenCLFFTMethod )
364 364
    print("DFT CUDA computation %s " % CUDAMethod )
365 365
    
366 366
    if CUDAMethod:
......
476 476
    
477 477
    # OpenCL Implementation
478 478
    if OpenCLFFTMethod:
479
        print("Performing OpenCL implementation")
479
        print("Performing OpenCLFFT implementation")
480 480
        TimeIn=time.time()
481 481
        i_np,j_np=OpenCLFFT(a_np,b_np,Device)
482 482
        OpenCLFFTElapsed=time.time()-TimeIn
483 483
        OpenCLFFTRate=int(SIZE/OpenCLFFTElapsed)
484
        print("OpenCLElapsed: %i" % OpenCLFFTElapsed)
485
        print("OpenCLRate: %i" % OpenCLFFTRate)
484
        print("OpenCLFFTElapsed: %i" % OpenCLFFTElapsed)
485
        print("OpenCLFFTRate: %i" % OpenCLFFTRate)
486 486
        print("Precision: ",np.linalg.norm(i_np-C_np),
487 487
              np.linalg.norm(j_np-D_np)) 
488 488
    

Formats disponibles : Unified diff