Statistiques
| Révision :

root / ETSN / MyDFT_10.py @ 295

Historique | Voir | Annoter | Télécharger (14,39 ko)

1 281 equemene
#!/usr/bin/env python3
2 281 equemene
3 281 equemene
import numpy as np
4 281 equemene
import pyopencl as cl
5 281 equemene
from numpy import pi,cos,sin
6 281 equemene
7 281 equemene
#
8 281 equemene
def NumpyFFT(x,y):
9 281 equemene
    xy=x+1.j*y
10 281 equemene
    XY=np.fft.fft(xy)
11 281 equemene
    return(XY.real,XY.imag)
12 281 equemene
13 281 equemene
#
14 281 equemene
def OpenCLFFT(x,y,device):
15 281 equemene
    import pyopencl as cl
16 281 equemene
    import pyopencl.array as cla
17 281 equemene
    import time
18 281 equemene
    import gpyfft
19 281 equemene
    from gpyfft.fft import FFT
20 281 equemene
21 281 equemene
    Id=0
22 281 equemene
    HasXPU=False
23 281 equemene
    for platform in cl.get_platforms():
24 281 equemene
        for device in platform.get_devices():
25 281 equemene
            if Id==Device:
26 281 equemene
                XPU=device
27 281 equemene
                print("CPU/GPU selected: ",device.name.lstrip())
28 281 equemene
                HasXPU=True
29 281 equemene
            Id+=1
30 281 equemene
            # print(Id)
31 281 equemene
32 281 equemene
    if HasXPU==False:
33 281 equemene
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
34 281 equemene
        sys.exit()
35 281 equemene
36 281 equemene
    try:
37 281 equemene
        ctx = cl.Context(devices=[XPU])
38 281 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
39 281 equemene
    except:
40 281 equemene
        print("Crash during context creation")
41 281 equemene
42 281 equemene
    XY_gpu = cla.to_device(queue, x+1.j*y)
43 281 equemene
44 281 equemene
    transform = FFT(ctx, queue, XY_gpu)
45 281 equemene
46 281 equemene
    event, = transform.enqueue()
47 281 equemene
    event.wait()
48 281 equemene
49 281 equemene
    XY = XY_gpu.get()
50 281 equemene
    return(XY.real,XY.imag)
51 281 equemene
52 281 equemene
# Naive Discrete Fourier Transform
53 281 equemene
def MyDFT(x,y):
54 281 equemene
    size=x.shape[0]
55 281 equemene
    X=np.zeros(size).astype(np.float32)
56 281 equemene
    Y=np.zeros(size).astype(np.float32)
57 281 equemene
    for i in range(size):
58 281 equemene
        for j in range(size):
59 281 equemene
            X[i]=X[i]+x[j]*cos(2.*pi*i*j/size)-y[j]*sin(2.*pi*i*j/size)
60 281 equemene
            Y[i]=Y[i]+x[j]*sin(2.*pi*i*j/size)+y[j]*cos(2.*pi*i*j/size)
61 281 equemene
    return(X,Y)
62 281 equemene
63 281 equemene
# Numpy Discrete Fourier Transform
64 281 equemene
def NumpyDFT(x,y):
65 281 equemene
    size=x.shape[0]
66 281 equemene
    X=np.zeros(size).astype(np.float32)
67 281 equemene
    Y=np.zeros(size).astype(np.float32)
68 281 equemene
    nj=np.multiply(2.0*np.pi/size,np.arange(size)).astype(np.float32)
69 281 equemene
    for i in range(size):
70 281 equemene
        X[i]=np.sum(np.subtract(np.multiply(np.cos(i*nj),x),np.multiply(np.sin(i*nj),y)))
71 281 equemene
        Y[i]=np.sum(np.add(np.multiply(np.sin(i*nj),x),np.multiply(np.cos(i*nj),y)))
72 281 equemene
    return(X,Y)
73 281 equemene
74 281 equemene
# Numba Discrete Fourier Transform
75 281 equemene
import numba
76 281 equemene
@numba.njit(parallel=True)
77 281 equemene
def NumbaDFT(x,y):
78 281 equemene
    size=x.shape[0]
79 281 equemene
    X=np.zeros(size).astype(np.float32)
80 281 equemene
    Y=np.zeros(size).astype(np.float32)
81 281 equemene
    nj=np.multiply(2.0*np.pi/size,np.arange(size)).astype(np.float32)
82 281 equemene
    for i in numba.prange(size):
83 281 equemene
        X[i]=np.sum(np.subtract(np.multiply(np.cos(i*nj),x),np.multiply(np.sin(i*nj),y)))
84 281 equemene
        Y[i]=np.sum(np.add(np.multiply(np.sin(i*nj),x),np.multiply(np.cos(i*nj),y)))
85 281 equemene
    return(X,Y)
86 281 equemene
87 281 equemene
# OpenCL complete operation
88 281 equemene
def OpenCLDFT(a_np,b_np,Device):
89 281 equemene
90 281 equemene
    Id=0
91 281 equemene
    HasXPU=False
92 281 equemene
    for platform in cl.get_platforms():
93 281 equemene
        for device in platform.get_devices():
94 281 equemene
            if Id==Device:
95 281 equemene
                XPU=device
96 281 equemene
                print("CPU/GPU selected: ",device.name.lstrip())
97 281 equemene
                HasXPU=True
98 281 equemene
            Id+=1
99 281 equemene
            # print(Id)
100 281 equemene
101 281 equemene
    if HasXPU==False:
102 281 equemene
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
103 281 equemene
        sys.exit()
104 281 equemene
105 281 equemene
    try:
106 281 equemene
        ctx = cl.Context(devices=[XPU])
107 281 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
108 281 equemene
    except:
109 281 equemene
        print("Crash during context creation")
110 281 equemene
111 281 equemene
    TimeIn=time.time()
112 281 equemene
    # Copy from Host to Device using pointers
113 281 equemene
    mf = cl.mem_flags
114 281 equemene
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
115 281 equemene
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
116 281 equemene
    Elapsed=time.time()-TimeIn
117 281 equemene
    print("Copy from Host 2 Device : %.3f" % Elapsed)
118 281 equemene
119 281 equemene
    TimeIn=time.time()
120 281 equemene
    # Definition of kernel under OpenCL
121 281 equemene
    prg = cl.Program(ctx, """
122 281 equemene

123 281 equemene
#define PI 3.141592653589793
124 281 equemene

125 281 equemene
__kernel void MyDFT(
126 281 equemene
    __global const float *a_g, __global const float *b_g, __global float *A_g, __global float *B_g)
127 281 equemene
{
128 281 equemene
  int gid = get_global_id(0);
129 281 equemene
  uint size = get_global_size(0);
130 281 equemene
  float A=0.,B=0.;
131 281 equemene
  for (uint i=0; i<size;i++)
132 281 equemene
  {
133 281 equemene
     A+=a_g[i]*cos(2.*PI*(float)(gid*i)/(float)size)-b_g[i]*sin(2.*PI*(float)(gid*i)/(float)size);
134 281 equemene
     B+=a_g[i]*sin(2.*PI*(float)(gid*i)/(float)size)+b_g[i]*cos(2.*PI*(float)(gid*i)/(float)size);
135 281 equemene
  }
136 281 equemene
  A_g[gid]=A;
137 281 equemene
  B_g[gid]=B;
138 281 equemene
}
139 281 equemene
""").build()
140 281 equemene
    Elapsed=time.time()-TimeIn
141 281 equemene
    print("Building kernels : %.3f" % Elapsed)
142 281 equemene
143 281 equemene
    TimeIn=time.time()
144 281 equemene
    # Memory allocation on Device for result
145 281 equemene
    A_ocl = np.empty_like(a_np)
146 281 equemene
    B_ocl = np.empty_like(a_np)
147 281 equemene
    Elapsed=time.time()-TimeIn
148 281 equemene
    print("Allocation on Host for results : %.3f" % Elapsed)
149 281 equemene
150 281 equemene
    A_g = cl.Buffer(ctx, mf.WRITE_ONLY, A_ocl.nbytes)
151 281 equemene
    B_g = cl.Buffer(ctx, mf.WRITE_ONLY, B_ocl.nbytes)
152 281 equemene
    Elapsed=time.time()-TimeIn
153 281 equemene
    print("Allocation on Device for results : %.3f" % Elapsed)
154 281 equemene
155 281 equemene
    TimeIn=time.time()
156 281 equemene
    # Synthesis of function "sillysum" inside Kernel Sources
157 281 equemene
    knl = prg.MyDFT  # Use this Kernel object for repeated calls
158 281 equemene
    Elapsed=time.time()-TimeIn
159 281 equemene
    print("Synthesis of kernel : %.3f" % Elapsed)
160 281 equemene
161 281 equemene
    TimeIn=time.time()
162 281 equemene
    # Call of kernel previously defined
163 281 equemene
    CallCL=knl(queue, a_np.shape, None, a_g, b_g, A_g, B_g)
164 281 equemene
    #
165 281 equemene
    CallCL.wait()
166 281 equemene
    Elapsed=time.time()-TimeIn
167 281 equemene
    print("Execution of kernel : %.3f" % Elapsed)
168 281 equemene
169 281 equemene
    TimeIn=time.time()
170 281 equemene
    # Copy from Device to Host
171 281 equemene
    cl.enqueue_copy(queue, A_ocl, A_g)
172 281 equemene
    cl.enqueue_copy(queue, B_ocl, B_g)
173 281 equemene
    Elapsed=time.time()-TimeIn
174 281 equemene
    print("Copy from Device 2 Host : %.3f" % Elapsed)
175 281 equemene
176 281 equemene
    # Liberation of memory
177 281 equemene
    a_g.release()
178 281 equemene
    b_g.release()
179 281 equemene
    A_g.release()
180 281 equemene
    B_g.release()
181 281 equemene
182 281 equemene
    return(A_ocl,B_ocl)
183 281 equemene
184 281 equemene
# CUDA complete operation
185 281 equemene
def CUDADFT(a_np,b_np,Device,Threads):
186 281 equemene
    # import pycuda.autoinit
187 281 equemene
    import pycuda.driver as drv
188 281 equemene
    from pycuda.compiler import SourceModule
189 281 equemene
190 281 equemene
    try:
191 281 equemene
        # For PyCUDA import
192 281 equemene
        import pycuda.driver as cuda
193 281 equemene
        from pycuda.compiler import SourceModule
194 281 equemene
195 281 equemene
        cuda.init()
196 281 equemene
        for Id in range(cuda.Device.count()):
197 281 equemene
            if Id==Device:
198 281 equemene
                XPU=cuda.Device(Id)
199 281 equemene
                print("GPU selected %s" % XPU.name())
200 281 equemene
        print
201 281 equemene
202 281 equemene
    except ImportError:
203 281 equemene
        print("Platform does not seem to support CUDA")
204 281 equemene
205 281 equemene
    Context=XPU.make_context()
206 281 equemene
207 281 equemene
    TimeIn=time.time()
208 281 equemene
    mod = SourceModule("""
209 281 equemene

210 281 equemene
#define PI 3.141592653589793
211 281 equemene

212 281 equemene
__global__ void MyDFT(float *A_g, float *B_g, const float *a_g,const float *b_g)
213 281 equemene
{
214 281 equemene
  const int gid = blockIdx.x*blockDim.x+threadIdx.x;
215 281 equemene
  uint size = gridDim.x*blockDim.x;
216 281 equemene
  float A=0.,B=0.;
217 281 equemene
  for (uint i=0; i<size;i++)
218 281 equemene
  {
219 281 equemene
     A+=a_g[i]*cos(2.*PI*(float)(gid*i)/(float)size)-b_g[i]*sin(2.*PI*(float)(gid*i)/(float)size);
220 281 equemene
     B+=a_g[i]*sin(2.*PI*(float)(gid*i)/(float)size)+b_g[i]*cos(2.*PI*(float)(gid*i)/(float)size);
221 281 equemene
  }
222 281 equemene
  A_g[gid]=A;
223 281 equemene
  B_g[gid]=B;
224 281 equemene
}
225 281 equemene

226 281 equemene
""")
227 281 equemene
    Elapsed=time.time()-TimeIn
228 281 equemene
    print("Definition of kernel : %.3f" % Elapsed)
229 281 equemene
230 281 equemene
    TimeIn=time.time()
231 281 equemene
    MyDFT = mod.get_function("MyDFT")
232 281 equemene
    Elapsed=time.time()-TimeIn
233 281 equemene
    print("Synthesis of kernel : %.3f" % Elapsed)
234 281 equemene
235 281 equemene
    TimeIn=time.time()
236 281 equemene
    A_np = np.zeros_like(a_np)
237 281 equemene
    B_np = np.zeros_like(a_np)
238 281 equemene
    Elapsed=time.time()-TimeIn
239 281 equemene
    print("Allocation on Host for results : %.3f" % Elapsed)
240 281 equemene
241 281 equemene
    Size=a_np.size
242 281 equemene
    if (Size % Threads != 0):
243 281 equemene
        print("Impossible : %i not multiple of %i..." % (Threads,Size) )
244 281 equemene
        TimeIn=time.time()
245 281 equemene
        MyDFT(drv.Out(A_np), drv.Out(B_np), drv.In(a_np), drv.In(b_np),
246 281 equemene
              block=(1,1,1), grid=(a_np.size,1))
247 281 equemene
        Elapsed=time.time()-TimeIn
248 281 equemene
        print("Execution of kernel : %.3f" % Elapsed)
249 281 equemene
    else:
250 281 equemene
        Blocks=int(Size/Threads)
251 281 equemene
        TimeIn=time.time()
252 281 equemene
        MyDFT(drv.Out(A_np), drv.Out(B_np), drv.In(a_np), drv.In(b_np),
253 281 equemene
              block=(Threads,1,1), grid=(Blocks,1))
254 281 equemene
        Elapsed=time.time()-TimeIn
255 281 equemene
        print("Execution of kernel : %.3f" % Elapsed)
256 281 equemene
257 281 equemene
    Context.pop()
258 281 equemene
    Context.detach()
259 281 equemene
260 281 equemene
    return(A_np,B_np)
261 281 equemene
262 281 equemene
import sys
263 281 equemene
import time
264 281 equemene
265 281 equemene
if __name__=='__main__':
266 281 equemene
267 281 equemene
    SIZE=1024
268 281 equemene
    Device=0
269 281 equemene
    NaiveMethod=False
270 281 equemene
    NumpyFFTMethod=True
271 281 equemene
    OpenCLFFTMethod=True
272 281 equemene
    NumpyMethod=False
273 281 equemene
    NumbaMethod=False
274 281 equemene
    OpenCLMethod=False
275 281 equemene
    CUDAMethod=False
276 281 equemene
    Threads=1
277 281 equemene
278 281 equemene
    import getopt
279 281 equemene
280 281 equemene
    HowToUse='%s -n [Naive] -y [numpY] -a [numbA] -o [OpenCL] -c [CUDA] -s <SizeOfVector> -d <DeviceId> -t <threads>'
281 281 equemene
282 281 equemene
    try:
283 281 equemene
        opts, args = getopt.getopt(sys.argv[1:],"nyaochs:d:t:",["size=","device="])
284 281 equemene
    except getopt.GetoptError:
285 281 equemene
        print(HowToUse % sys.argv[0])
286 281 equemene
        sys.exit(2)
287 281 equemene
288 281 equemene
    # List of Devices
289 281 equemene
    Devices=[]
290 281 equemene
    Alu={}
291 281 equemene
292 281 equemene
    for opt, arg in opts:
293 281 equemene
        if opt == '-h':
294 281 equemene
            print(HowToUse % sys.argv[0])
295 281 equemene
296 281 equemene
            print("\nInformations about devices detected under OpenCL API:")
297 281 equemene
            # For PyOpenCL import
298 281 equemene
            try:
299 281 equemene
                import pyopencl as cl
300 281 equemene
                Id=0
301 281 equemene
                for platform in cl.get_platforms():
302 281 equemene
                    for device in platform.get_devices():
303 281 equemene
                        #deviceType=cl.device_type.to_string(device.type)
304 281 equemene
                        deviceType="xPU"
305 281 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
306 281 equemene
                        Id=Id+1
307 281 equemene
308 281 equemene
            except:
309 281 equemene
                print("Your platform does not seem to support OpenCL")
310 281 equemene
311 281 equemene
            print("\nInformations about devices detected under CUDA API:")
312 281 equemene
            # For PyCUDA import
313 281 equemene
            try:
314 281 equemene
                import pycuda.driver as cuda
315 281 equemene
                cuda.init()
316 281 equemene
                for Id in range(cuda.Device.count()):
317 281 equemene
                    device=cuda.Device(Id)
318 281 equemene
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
319 281 equemene
                print
320 281 equemene
            except:
321 281 equemene
                print("Your platform does not seem to support CUDA")
322 281 equemene
323 281 equemene
            sys.exit()
324 281 equemene
325 281 equemene
        elif opt in ("-d", "--device"):
326 281 equemene
            Device=int(arg)
327 281 equemene
        elif opt in ("-s", "--size"):
328 281 equemene
            SIZE = int(arg)
329 281 equemene
        elif opt in ("-t", "--threads"):
330 281 equemene
            Threads = int(arg)
331 281 equemene
        elif opt in ("-n"):
332 281 equemene
            NaiveMethod=True
333 281 equemene
        elif opt in ("-y"):
334 281 equemene
            NumpyMethod=True
335 281 equemene
        elif opt in ("-a"):
336 281 equemene
            NumbaMethod=True
337 281 equemene
        elif opt in ("-o"):
338 281 equemene
            OpenCLMethod=True
339 281 equemene
        elif opt in ("-c"):
340 281 equemene
            CUDAMethod=True
341 281 equemene
342 281 equemene
    print("Device Selection : %i" % Device)
343 281 equemene
    print("Size of complex vector : %i" % SIZE)
344 281 equemene
    print("DFT Naive computation %s " % NaiveMethod )
345 281 equemene
    print("DFT Numpy computation %s " % NumpyMethod )
346 281 equemene
    print("DFT Numba computation %s " % NumbaMethod )
347 281 equemene
    print("DFT OpenCL computation %s " % OpenCLMethod )
348 281 equemene
    print("DFT CUDA computation %s " % CUDAMethod )
349 281 equemene
350 281 equemene
    if CUDAMethod:
351 281 equemene
        try:
352 281 equemene
            # For PyCUDA import
353 281 equemene
            import pycuda.driver as cuda
354 281 equemene
355 281 equemene
            cuda.init()
356 281 equemene
            for Id in range(cuda.Device.count()):
357 281 equemene
                device=cuda.Device(Id)
358 281 equemene
                print("Device #%i of type GPU : %s" % (Id,device.name()))
359 281 equemene
                if Id in Devices:
360 281 equemene
                    Alu[Id]='GPU'
361 281 equemene
362 281 equemene
        except ImportError:
363 281 equemene
            print("Platform does not seem to support CUDA")
364 281 equemene
365 281 equemene
    if OpenCLMethod:
366 281 equemene
        try:
367 281 equemene
            # For PyOpenCL import
368 281 equemene
            import pyopencl as cl
369 281 equemene
            Id=0
370 281 equemene
            for platform in cl.get_platforms():
371 281 equemene
                for device in platform.get_devices():
372 281 equemene
                    #deviceType=cl.device_type.to_string(device.type)
373 281 equemene
                    deviceType="xPU"
374 281 equemene
                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
375 281 equemene
376 281 equemene
                    if Id in Devices:
377 281 equemene
                    # Set the Alu as detected Device Type
378 281 equemene
                        Alu[Id]=deviceType
379 281 equemene
                    Id=Id+1
380 281 equemene
        except ImportError:
381 281 equemene
            print("Platform does not seem to support OpenCL")
382 281 equemene
383 281 equemene
384 281 equemene
385 281 equemene
    a_np = np.ones(SIZE).astype(np.float32)
386 281 equemene
    b_np = np.ones(SIZE).astype(np.float32)
387 281 equemene
388 281 equemene
    C_np = np.zeros(SIZE).astype(np.float32)
389 281 equemene
    D_np = np.zeros(SIZE).astype(np.float32)
390 281 equemene
    C_np[0] = np.float32(SIZE)
391 281 equemene
    D_np[0] = np.float32(SIZE)
392 281 equemene
393 281 equemene
    # Native & Naive Implementation
394 281 equemene
    if NaiveMethod:
395 281 equemene
        print("Performing naive implementation")
396 281 equemene
        TimeIn=time.time()
397 281 equemene
        c_np,d_np=MyDFT(a_np,b_np)
398 281 equemene
        NativeElapsed=time.time()-TimeIn
399 281 equemene
        NativeRate=int(SIZE/NativeElapsed)
400 281 equemene
        print("NativeRate: %i" % NativeRate)
401 281 equemene
        print("Precision: ",np.linalg.norm(c_np-C_np),
402 281 equemene
              np.linalg.norm(d_np-D_np))
403 281 equemene
404 281 equemene
    # Native & Numpy Implementation
405 281 equemene
    if NumpyMethod:
406 281 equemene
        print("Performing Numpy implementation")
407 281 equemene
        TimeIn=time.time()
408 281 equemene
        e_np,f_np=NumpyDFT(a_np,b_np)
409 281 equemene
        NumpyElapsed=time.time()-TimeIn
410 281 equemene
        NumpyRate=int(SIZE/NumpyElapsed)
411 281 equemene
        print("NumpyRate: %i" % NumpyRate)
412 281 equemene
        print("Precision: ",np.linalg.norm(e_np-C_np),
413 281 equemene
              np.linalg.norm(f_np-D_np))
414 281 equemene
415 281 equemene
    # Native & Numba Implementation
416 281 equemene
    if NumbaMethod:
417 281 equemene
        print("Performing Numba implementation")
418 281 equemene
        TimeIn=time.time()
419 281 equemene
        g_np,h_np=NumbaDFT(a_np,b_np)
420 281 equemene
        NumbaElapsed=time.time()-TimeIn
421 281 equemene
        NumbaRate=int(SIZE/NumbaElapsed)
422 281 equemene
        print("NumbaRate: %i" % NumbaRate)
423 281 equemene
        print("Precision: ",np.linalg.norm(g_np-C_np),
424 281 equemene
              np.linalg.norm(h_np-D_np))
425 281 equemene
426 281 equemene
    # OpenCL Implementation
427 281 equemene
    if OpenCLMethod:
428 281 equemene
        print("Performing OpenCL implementation")
429 281 equemene
        TimeIn=time.time()
430 281 equemene
        i_np,j_np=OpenCLDFT(a_np,b_np,Device)
431 281 equemene
        OpenCLElapsed=time.time()-TimeIn
432 281 equemene
        OpenCLRate=int(SIZE/OpenCLElapsed)
433 281 equemene
        print("OpenCLRate: %i" % OpenCLRate)
434 281 equemene
        print("Precision: ",np.linalg.norm(i_np-C_np),
435 281 equemene
              np.linalg.norm(j_np-D_np))
436 281 equemene
437 281 equemene
    # CUDA Implementation
438 281 equemene
    if CUDAMethod:
439 281 equemene
        print("Performing CUDA implementation")
440 281 equemene
        TimeIn=time.time()
441 281 equemene
        k_np,l_np=CUDADFT(a_np,b_np,Device,Threads)
442 281 equemene
        CUDAElapsed=time.time()-TimeIn
443 281 equemene
        CUDARate=int(SIZE/CUDAElapsed)
444 281 equemene
        print("CUDARate: %i" % CUDARate)
445 281 equemene
        print("Precision: ",np.linalg.norm(k_np-C_np),
446 281 equemene
              np.linalg.norm(l_np-D_np))
447 281 equemene
448 281 equemene
    if NumpyFFTMethod:
449 281 equemene
        print("Performing NumpyFFT implementation")
450 281 equemene
        TimeIn=time.time()
451 281 equemene
        m_np,n_np=NumpyFFT(a_np,b_np)
452 281 equemene
        NumpyFFTElapsed=time.time()-TimeIn
453 281 equemene
        NumpyFFTRate=int(SIZE/NumpyFFTElapsed)
454 281 equemene
        print("NumpyFFTRate: %i" % NumpyFFTRate)
455 281 equemene
        print("Precision: ",np.linalg.norm(m_np-C_np),
456 281 equemene
              np.linalg.norm(n_np-D_np))
457 281 equemene
458 281 equemene
    # OpenCL Implementation
459 281 equemene
    if OpenCLFFTMethod:
460 281 equemene
        print("Performing OpenCL implementation")
461 281 equemene
        TimeIn=time.time()
462 281 equemene
        i_np,j_np=OpenCLFFT(a_np,b_np,Device)
463 281 equemene
        OpenCLFFTElapsed=time.time()-TimeIn
464 281 equemene
        OpenCLFFTRate=int(SIZE/OpenCLFFTElapsed)
465 281 equemene
        print("OpenCLRate: %i" % OpenCLFFTRate)
466 281 equemene
        print("Precision: ",np.linalg.norm(i_np-C_np),
467 281 equemene
              np.linalg.norm(j_np-D_np))