Statistiques
| Révision :

root / ETSN / MyDFT_10.py

Historique | Voir | Annoter | Télécharger (17,05 ko)

1 281 equemene
#!/usr/bin/env python3
2 281 equemene
3 281 equemene
import numpy as np
4 281 equemene
import pyopencl as cl
5 281 equemene
from numpy import pi,cos,sin
6 281 equemene
7 281 equemene
#
8 281 equemene
def NumpyFFT(x,y):
9 296 equemene
    xy=np.csingle(x+1.j*y)
10 281 equemene
    XY=np.fft.fft(xy)
11 281 equemene
    return(XY.real,XY.imag)
12 281 equemene
13 281 equemene
#
14 281 equemene
def OpenCLFFT(x,y,device):
15 281 equemene
    import pyopencl as cl
16 281 equemene
    import pyopencl.array as cla
17 281 equemene
    import time
18 281 equemene
    import gpyfft
19 281 equemene
    from gpyfft.fft import FFT
20 281 equemene
21 296 equemene
    TimeIn=time.time()
22 281 equemene
    Id=0
23 281 equemene
    HasXPU=False
24 281 equemene
    for platform in cl.get_platforms():
25 281 equemene
        for device in platform.get_devices():
26 281 equemene
            if Id==Device:
27 281 equemene
                XPU=device
28 281 equemene
                print("CPU/GPU selected: ",device.name.lstrip())
29 281 equemene
                HasXPU=True
30 281 equemene
            Id+=1
31 281 equemene
            # print(Id)
32 281 equemene
33 281 equemene
    if HasXPU==False:
34 281 equemene
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
35 281 equemene
        sys.exit()
36 296 equemene
    Elapsed=time.time()-TimeIn
37 296 equemene
    print("Selection of device : %.3f" % Elapsed)
38 281 equemene
39 296 equemene
    TimeIn=time.time()
40 281 equemene
    try:
41 281 equemene
        ctx = cl.Context(devices=[XPU])
42 281 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
43 281 equemene
    except:
44 281 equemene
        print("Crash during context creation")
45 296 equemene
    Elapsed=time.time()-TimeIn
46 296 equemene
    print("Context initialisation : %.3f" % Elapsed)
47 281 equemene
48 296 equemene
    TimeIn=time.time()
49 296 equemene
    XY_gpu = cla.to_device(queue, np.csingle(x+1.j*y))
50 296 equemene
    Elapsed=time.time()-TimeIn
51 296 equemene
    print("Copy from Host to Device : %.3f" % Elapsed)
52 281 equemene
53 296 equemene
    TimeIn=time.time()
54 303 equemene
    transform = FFT(ctx, queue, XY_gpu)
55 281 equemene
    event, = transform.enqueue()
56 281 equemene
    event.wait()
57 296 equemene
    Elapsed=time.time()-TimeIn
58 296 equemene
    print("Compute FFT : %.3f" % Elapsed)
59 296 equemene
    TimeIn=time.time()
60 281 equemene
    XY = XY_gpu.get()
61 296 equemene
    Elapsed=time.time()-TimeIn
62 296 equemene
    print("Copy from Device to Host : %.3f" % Elapsed)
63 281 equemene
    return(XY.real,XY.imag)
64 281 equemene
65 281 equemene
# Naive Discrete Fourier Transform
66 281 equemene
def MyDFT(x,y):
67 281 equemene
    size=x.shape[0]
68 281 equemene
    X=np.zeros(size).astype(np.float32)
69 281 equemene
    Y=np.zeros(size).astype(np.float32)
70 281 equemene
    for i in range(size):
71 281 equemene
        for j in range(size):
72 300 equemene
            X[i]=X[i]+x[j]*cos(2.*pi*i*j/size)+y[j]*sin(2.*pi*i*j/size)
73 300 equemene
            Y[i]=Y[i]-x[j]*sin(2.*pi*i*j/size)+y[j]*cos(2.*pi*i*j/size)
74 281 equemene
    return(X,Y)
75 281 equemene
76 281 equemene
# Numpy Discrete Fourier Transform
77 281 equemene
def NumpyDFT(x,y):
78 281 equemene
    size=x.shape[0]
79 281 equemene
    X=np.zeros(size).astype(np.float32)
80 281 equemene
    Y=np.zeros(size).astype(np.float32)
81 281 equemene
    nj=np.multiply(2.0*np.pi/size,np.arange(size)).astype(np.float32)
82 281 equemene
    for i in range(size):
83 300 equemene
        X[i]=np.sum(np.add(np.multiply(np.cos(i*nj),x),np.multiply(np.sin(i*nj),y)))
84 300 equemene
        Y[i]=np.sum(-np.subtract(np.multiply(np.sin(i*nj),x),np.multiply(np.cos(i*nj),y)))
85 281 equemene
    return(X,Y)
86 281 equemene
87 281 equemene
# Numba Discrete Fourier Transform
88 281 equemene
import numba
89 281 equemene
@numba.njit(parallel=True)
90 281 equemene
def NumbaDFT(x,y):
91 281 equemene
    size=x.shape[0]
92 281 equemene
    X=np.zeros(size).astype(np.float32)
93 281 equemene
    Y=np.zeros(size).astype(np.float32)
94 281 equemene
    nj=np.multiply(2.0*np.pi/size,np.arange(size)).astype(np.float32)
95 281 equemene
    for i in numba.prange(size):
96 300 equemene
        X[i]=np.sum(np.add(np.multiply(np.cos(i*nj),x),np.multiply(np.sin(i*nj),y)))
97 300 equemene
        Y[i]=np.sum(-np.subtract(np.multiply(np.sin(i*nj),x),np.multiply(np.cos(i*nj),y)))
98 281 equemene
    return(X,Y)
99 281 equemene
100 281 equemene
# OpenCL complete operation
101 281 equemene
def OpenCLDFT(a_np,b_np,Device):
102 281 equemene
103 281 equemene
    Id=0
104 281 equemene
    HasXPU=False
105 281 equemene
    for platform in cl.get_platforms():
106 281 equemene
        for device in platform.get_devices():
107 281 equemene
            if Id==Device:
108 281 equemene
                XPU=device
109 281 equemene
                print("CPU/GPU selected: ",device.name.lstrip())
110 281 equemene
                HasXPU=True
111 281 equemene
            Id+=1
112 281 equemene
            # print(Id)
113 281 equemene
114 281 equemene
    if HasXPU==False:
115 281 equemene
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
116 281 equemene
        sys.exit()
117 281 equemene
118 281 equemene
    try:
119 281 equemene
        ctx = cl.Context(devices=[XPU])
120 281 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
121 281 equemene
    except:
122 281 equemene
        print("Crash during context creation")
123 281 equemene
124 281 equemene
    TimeIn=time.time()
125 281 equemene
    # Copy from Host to Device using pointers
126 281 equemene
    mf = cl.mem_flags
127 281 equemene
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
128 281 equemene
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
129 281 equemene
    Elapsed=time.time()-TimeIn
130 281 equemene
    print("Copy from Host 2 Device : %.3f" % Elapsed)
131 281 equemene
132 281 equemene
    TimeIn=time.time()
133 281 equemene
    # Definition of kernel under OpenCL
134 281 equemene
    prg = cl.Program(ctx, """
135 281 equemene

136 281 equemene
#define PI 3.141592653589793
137 281 equemene

138 281 equemene
__kernel void MyDFT(
139 281 equemene
    __global const float *a_g, __global const float *b_g, __global float *A_g, __global float *B_g)
140 281 equemene
{
141 281 equemene
  int gid = get_global_id(0);
142 281 equemene
  uint size = get_global_size(0);
143 281 equemene
  float A=0.,B=0.;
144 281 equemene
  for (uint i=0; i<size;i++)
145 281 equemene
  {
146 300 equemene
     A+=a_g[i]*cos(2.*PI*(float)(gid*i)/(float)size)+b_g[i]*sin(2.*PI*(float)(gid*i)/(float)size);
147 300 equemene
     B+=-a_g[i]*sin(2.*PI*(float)(gid*i)/(float)size)+b_g[i]*cos(2.*PI*(float)(gid*i)/(float)size);
148 281 equemene
  }
149 281 equemene
  A_g[gid]=A;
150 281 equemene
  B_g[gid]=B;
151 281 equemene
}
152 281 equemene
""").build()
153 281 equemene
    Elapsed=time.time()-TimeIn
154 281 equemene
    print("Building kernels : %.3f" % Elapsed)
155 281 equemene
156 281 equemene
    TimeIn=time.time()
157 281 equemene
    # Memory allocation on Device for result
158 281 equemene
    A_ocl = np.empty_like(a_np)
159 281 equemene
    B_ocl = np.empty_like(a_np)
160 281 equemene
    Elapsed=time.time()-TimeIn
161 281 equemene
    print("Allocation on Host for results : %.3f" % Elapsed)
162 281 equemene
163 281 equemene
    A_g = cl.Buffer(ctx, mf.WRITE_ONLY, A_ocl.nbytes)
164 281 equemene
    B_g = cl.Buffer(ctx, mf.WRITE_ONLY, B_ocl.nbytes)
165 281 equemene
    Elapsed=time.time()-TimeIn
166 281 equemene
    print("Allocation on Device for results : %.3f" % Elapsed)
167 281 equemene
168 281 equemene
    TimeIn=time.time()
169 281 equemene
    # Synthesis of function "sillysum" inside Kernel Sources
170 281 equemene
    knl = prg.MyDFT  # Use this Kernel object for repeated calls
171 281 equemene
    Elapsed=time.time()-TimeIn
172 281 equemene
    print("Synthesis of kernel : %.3f" % Elapsed)
173 281 equemene
174 281 equemene
    TimeIn=time.time()
175 281 equemene
    # Call of kernel previously defined
176 281 equemene
    CallCL=knl(queue, a_np.shape, None, a_g, b_g, A_g, B_g)
177 281 equemene
    #
178 281 equemene
    CallCL.wait()
179 281 equemene
    Elapsed=time.time()-TimeIn
180 281 equemene
    print("Execution of kernel : %.3f" % Elapsed)
181 281 equemene
182 281 equemene
    TimeIn=time.time()
183 281 equemene
    # Copy from Device to Host
184 281 equemene
    cl.enqueue_copy(queue, A_ocl, A_g)
185 281 equemene
    cl.enqueue_copy(queue, B_ocl, B_g)
186 281 equemene
    Elapsed=time.time()-TimeIn
187 281 equemene
    print("Copy from Device 2 Host : %.3f" % Elapsed)
188 281 equemene
189 281 equemene
    # Liberation of memory
190 281 equemene
    a_g.release()
191 281 equemene
    b_g.release()
192 281 equemene
    A_g.release()
193 281 equemene
    B_g.release()
194 281 equemene
195 281 equemene
    return(A_ocl,B_ocl)
196 281 equemene
197 281 equemene
# CUDA complete operation
198 281 equemene
def CUDADFT(a_np,b_np,Device,Threads):
199 281 equemene
    # import pycuda.autoinit
200 281 equemene
    import pycuda.driver as drv
201 281 equemene
    from pycuda.compiler import SourceModule
202 281 equemene
203 281 equemene
    try:
204 281 equemene
        # For PyCUDA import
205 281 equemene
        import pycuda.driver as cuda
206 281 equemene
        from pycuda.compiler import SourceModule
207 281 equemene
208 281 equemene
        cuda.init()
209 281 equemene
        for Id in range(cuda.Device.count()):
210 281 equemene
            if Id==Device:
211 281 equemene
                XPU=cuda.Device(Id)
212 281 equemene
                print("GPU selected %s" % XPU.name())
213 281 equemene
        print
214 281 equemene
215 281 equemene
    except ImportError:
216 281 equemene
        print("Platform does not seem to support CUDA")
217 281 equemene
218 281 equemene
    Context=XPU.make_context()
219 281 equemene
220 281 equemene
    TimeIn=time.time()
221 281 equemene
    mod = SourceModule("""
222 281 equemene

223 281 equemene
#define PI 3.141592653589793
224 281 equemene

225 281 equemene
__global__ void MyDFT(float *A_g, float *B_g, const float *a_g,const float *b_g)
226 281 equemene
{
227 281 equemene
  const int gid = blockIdx.x*blockDim.x+threadIdx.x;
228 281 equemene
  uint size = gridDim.x*blockDim.x;
229 281 equemene
  float A=0.,B=0.;
230 281 equemene
  for (uint i=0; i<size;i++)
231 281 equemene
  {
232 300 equemene
     A+=a_g[i]*cos(2.*PI*(float)(gid*i)/(float)size)+b_g[i]*sin(2.*PI*(float)(gid*i)/(float)size);
233 300 equemene
     B+=-a_g[i]*sin(2.*PI*(float)(gid*i)/(float)size)+b_g[i]*cos(2.*PI*(float)(gid*i)/(float)size);
234 281 equemene
  }
235 281 equemene
  A_g[gid]=A;
236 281 equemene
  B_g[gid]=B;
237 281 equemene
}
238 281 equemene

239 281 equemene
""")
240 281 equemene
    Elapsed=time.time()-TimeIn
241 281 equemene
    print("Definition of kernel : %.3f" % Elapsed)
242 281 equemene
243 281 equemene
    TimeIn=time.time()
244 281 equemene
    MyDFT = mod.get_function("MyDFT")
245 281 equemene
    Elapsed=time.time()-TimeIn
246 281 equemene
    print("Synthesis of kernel : %.3f" % Elapsed)
247 281 equemene
248 281 equemene
    TimeIn=time.time()
249 281 equemene
    A_np = np.zeros_like(a_np)
250 281 equemene
    B_np = np.zeros_like(a_np)
251 281 equemene
    Elapsed=time.time()-TimeIn
252 281 equemene
    print("Allocation on Host for results : %.3f" % Elapsed)
253 281 equemene
254 281 equemene
    Size=a_np.size
255 281 equemene
    if (Size % Threads != 0):
256 281 equemene
        print("Impossible : %i not multiple of %i..." % (Threads,Size) )
257 281 equemene
        TimeIn=time.time()
258 281 equemene
        MyDFT(drv.Out(A_np), drv.Out(B_np), drv.In(a_np), drv.In(b_np),
259 281 equemene
              block=(1,1,1), grid=(a_np.size,1))
260 281 equemene
        Elapsed=time.time()-TimeIn
261 281 equemene
        print("Execution of kernel : %.3f" % Elapsed)
262 281 equemene
    else:
263 281 equemene
        Blocks=int(Size/Threads)
264 281 equemene
        TimeIn=time.time()
265 281 equemene
        MyDFT(drv.Out(A_np), drv.Out(B_np), drv.In(a_np), drv.In(b_np),
266 281 equemene
              block=(Threads,1,1), grid=(Blocks,1))
267 281 equemene
        Elapsed=time.time()-TimeIn
268 281 equemene
        print("Execution of kernel : %.3f" % Elapsed)
269 281 equemene
270 281 equemene
    Context.pop()
271 281 equemene
    Context.detach()
272 281 equemene
273 281 equemene
    return(A_np,B_np)
274 281 equemene
275 281 equemene
import sys
276 281 equemene
import time
277 281 equemene
278 281 equemene
if __name__=='__main__':
279 281 equemene
280 281 equemene
    SIZE=1024
281 281 equemene
    Device=0
282 281 equemene
    NaiveMethod=False
283 281 equemene
    NumpyFFTMethod=True
284 303 equemene
    OpenCLFFTMethod=True
285 281 equemene
    NumpyMethod=False
286 281 equemene
    NumbaMethod=False
287 281 equemene
    OpenCLMethod=False
288 303 equemene
    CUDAMethod=False
289 281 equemene
    Threads=1
290 303 equemene
    Verbose=True
291 281 equemene
292 281 equemene
    import getopt
293 281 equemene
294 281 equemene
    HowToUse='%s -n [Naive] -y [numpY] -a [numbA] -o [OpenCL] -c [CUDA] -s <SizeOfVector> -d <DeviceId> -t <threads>'
295 281 equemene
296 281 equemene
    try:
297 303 equemene
        opts, args = getopt.getopt(sys.argv[1:],"vnyaochs:d:t:",["size=","device="])
298 281 equemene
    except getopt.GetoptError:
299 281 equemene
        print(HowToUse % sys.argv[0])
300 281 equemene
        sys.exit(2)
301 281 equemene
302 281 equemene
    # List of Devices
303 281 equemene
    Devices=[]
304 281 equemene
    Alu={}
305 281 equemene
306 281 equemene
    for opt, arg in opts:
307 281 equemene
        if opt == '-h':
308 281 equemene
            print(HowToUse % sys.argv[0])
309 281 equemene
310 281 equemene
            print("\nInformations about devices detected under OpenCL API:")
311 281 equemene
            # For PyOpenCL import
312 281 equemene
            try:
313 281 equemene
                import pyopencl as cl
314 281 equemene
                Id=0
315 281 equemene
                for platform in cl.get_platforms():
316 281 equemene
                    for device in platform.get_devices():
317 281 equemene
                        #deviceType=cl.device_type.to_string(device.type)
318 281 equemene
                        deviceType="xPU"
319 281 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
320 281 equemene
                        Id=Id+1
321 281 equemene
322 281 equemene
            except:
323 281 equemene
                print("Your platform does not seem to support OpenCL")
324 281 equemene
325 281 equemene
            print("\nInformations about devices detected under CUDA API:")
326 281 equemene
            # For PyCUDA import
327 281 equemene
            try:
328 281 equemene
                import pycuda.driver as cuda
329 281 equemene
                cuda.init()
330 281 equemene
                for Id in range(cuda.Device.count()):
331 281 equemene
                    device=cuda.Device(Id)
332 281 equemene
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
333 281 equemene
                print
334 281 equemene
            except:
335 281 equemene
                print("Your platform does not seem to support CUDA")
336 281 equemene
337 281 equemene
            sys.exit()
338 281 equemene
339 281 equemene
        elif opt in ("-d", "--device"):
340 281 equemene
            Device=int(arg)
341 281 equemene
        elif opt in ("-s", "--size"):
342 281 equemene
            SIZE = int(arg)
343 281 equemene
        elif opt in ("-t", "--threads"):
344 281 equemene
            Threads = int(arg)
345 281 equemene
        elif opt in ("-n"):
346 281 equemene
            NaiveMethod=True
347 281 equemene
        elif opt in ("-y"):
348 281 equemene
            NumpyMethod=True
349 281 equemene
        elif opt in ("-a"):
350 281 equemene
            NumbaMethod=True
351 281 equemene
        elif opt in ("-o"):
352 281 equemene
            OpenCLMethod=True
353 281 equemene
        elif opt in ("-c"):
354 281 equemene
            CUDAMethod=True
355 281 equemene
356 281 equemene
    print("Device Selection : %i" % Device)
357 281 equemene
    print("Size of complex vector : %i" % SIZE)
358 281 equemene
    print("DFT Naive computation %s " % NaiveMethod )
359 281 equemene
    print("DFT Numpy computation %s " % NumpyMethod )
360 300 equemene
    print("FFT Numpy computation %s " % NumpyFFTMethod )
361 281 equemene
    print("DFT Numba computation %s " % NumbaMethod )
362 281 equemene
    print("DFT OpenCL computation %s " % OpenCLMethod )
363 303 equemene
    print("FFT OpenCL computation %s " % OpenCLFFTMethod )
364 281 equemene
    print("DFT CUDA computation %s " % CUDAMethod )
365 281 equemene
366 281 equemene
    if CUDAMethod:
367 281 equemene
        try:
368 281 equemene
            # For PyCUDA import
369 281 equemene
            import pycuda.driver as cuda
370 281 equemene
371 281 equemene
            cuda.init()
372 281 equemene
            for Id in range(cuda.Device.count()):
373 281 equemene
                device=cuda.Device(Id)
374 281 equemene
                print("Device #%i of type GPU : %s" % (Id,device.name()))
375 281 equemene
                if Id in Devices:
376 281 equemene
                    Alu[Id]='GPU'
377 281 equemene
378 281 equemene
        except ImportError:
379 281 equemene
            print("Platform does not seem to support CUDA")
380 281 equemene
381 281 equemene
    if OpenCLMethod:
382 281 equemene
        try:
383 281 equemene
            # For PyOpenCL import
384 281 equemene
            import pyopencl as cl
385 281 equemene
            Id=0
386 281 equemene
            for platform in cl.get_platforms():
387 281 equemene
                for device in platform.get_devices():
388 281 equemene
                    #deviceType=cl.device_type.to_string(device.type)
389 281 equemene
                    deviceType="xPU"
390 281 equemene
                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
391 281 equemene
392 281 equemene
                    if Id in Devices:
393 281 equemene
                    # Set the Alu as detected Device Type
394 281 equemene
                        Alu[Id]=deviceType
395 281 equemene
                    Id=Id+1
396 281 equemene
        except ImportError:
397 281 equemene
            print("Platform does not seem to support OpenCL")
398 281 equemene
399 281 equemene
400 281 equemene
401 302 equemene
    a_np = np.ones(SIZE).astype(np.float32)
402 302 equemene
    b_np = np.ones(SIZE).astype(np.float32)
403 302 equemene
    # a_np = np.random.rand(SIZE).astype(np.float32)
404 302 equemene
    # b_np = np.random.rand(SIZE).astype(np.float32)
405 281 equemene
406 281 equemene
    C_np = np.zeros(SIZE).astype(np.float32)
407 281 equemene
    D_np = np.zeros(SIZE).astype(np.float32)
408 281 equemene
    C_np[0] = np.float32(SIZE)
409 281 equemene
    D_np[0] = np.float32(SIZE)
410 281 equemene
411 281 equemene
    # Native & Naive Implementation
412 281 equemene
    if NaiveMethod:
413 281 equemene
        print("Performing naive implementation")
414 281 equemene
        TimeIn=time.time()
415 281 equemene
        c_np,d_np=MyDFT(a_np,b_np)
416 281 equemene
        NativeElapsed=time.time()-TimeIn
417 281 equemene
        NativeRate=int(SIZE/NativeElapsed)
418 281 equemene
        print("NativeRate: %i" % NativeRate)
419 281 equemene
        print("Precision: ",np.linalg.norm(c_np-C_np),
420 281 equemene
              np.linalg.norm(d_np-D_np))
421 281 equemene
422 281 equemene
    # Native & Numpy Implementation
423 281 equemene
    if NumpyMethod:
424 281 equemene
        print("Performing Numpy implementation")
425 281 equemene
        TimeIn=time.time()
426 281 equemene
        e_np,f_np=NumpyDFT(a_np,b_np)
427 281 equemene
        NumpyElapsed=time.time()-TimeIn
428 281 equemene
        NumpyRate=int(SIZE/NumpyElapsed)
429 281 equemene
        print("NumpyRate: %i" % NumpyRate)
430 281 equemene
        print("Precision: ",np.linalg.norm(e_np-C_np),
431 281 equemene
              np.linalg.norm(f_np-D_np))
432 281 equemene
433 281 equemene
    # Native & Numba Implementation
434 281 equemene
    if NumbaMethod:
435 281 equemene
        print("Performing Numba implementation")
436 281 equemene
        TimeIn=time.time()
437 281 equemene
        g_np,h_np=NumbaDFT(a_np,b_np)
438 281 equemene
        NumbaElapsed=time.time()-TimeIn
439 281 equemene
        NumbaRate=int(SIZE/NumbaElapsed)
440 281 equemene
        print("NumbaRate: %i" % NumbaRate)
441 281 equemene
        print("Precision: ",np.linalg.norm(g_np-C_np),
442 281 equemene
              np.linalg.norm(h_np-D_np))
443 281 equemene
444 281 equemene
    # OpenCL Implementation
445 281 equemene
    if OpenCLMethod:
446 281 equemene
        print("Performing OpenCL implementation")
447 281 equemene
        TimeIn=time.time()
448 281 equemene
        i_np,j_np=OpenCLDFT(a_np,b_np,Device)
449 281 equemene
        OpenCLElapsed=time.time()-TimeIn
450 281 equemene
        OpenCLRate=int(SIZE/OpenCLElapsed)
451 281 equemene
        print("OpenCLRate: %i" % OpenCLRate)
452 281 equemene
        print("Precision: ",np.linalg.norm(i_np-C_np),
453 281 equemene
              np.linalg.norm(j_np-D_np))
454 281 equemene
455 281 equemene
    # CUDA Implementation
456 281 equemene
    if CUDAMethod:
457 281 equemene
        print("Performing CUDA implementation")
458 281 equemene
        TimeIn=time.time()
459 281 equemene
        k_np,l_np=CUDADFT(a_np,b_np,Device,Threads)
460 281 equemene
        CUDAElapsed=time.time()-TimeIn
461 281 equemene
        CUDARate=int(SIZE/CUDAElapsed)
462 281 equemene
        print("CUDARate: %i" % CUDARate)
463 281 equemene
        print("Precision: ",np.linalg.norm(k_np-C_np),
464 281 equemene
              np.linalg.norm(l_np-D_np))
465 281 equemene
466 281 equemene
    if NumpyFFTMethod:
467 281 equemene
        print("Performing NumpyFFT implementation")
468 281 equemene
        TimeIn=time.time()
469 281 equemene
        m_np,n_np=NumpyFFT(a_np,b_np)
470 281 equemene
        NumpyFFTElapsed=time.time()-TimeIn
471 281 equemene
        NumpyFFTRate=int(SIZE/NumpyFFTElapsed)
472 296 equemene
        print("NumpyFFTElapsed: %i" % NumpyFFTElapsed)
473 281 equemene
        print("NumpyFFTRate: %i" % NumpyFFTRate)
474 281 equemene
        print("Precision: ",np.linalg.norm(m_np-C_np),
475 281 equemene
              np.linalg.norm(n_np-D_np))
476 281 equemene
477 281 equemene
    # OpenCL Implementation
478 281 equemene
    if OpenCLFFTMethod:
479 303 equemene
        print("Performing OpenCLFFT implementation")
480 281 equemene
        TimeIn=time.time()
481 281 equemene
        i_np,j_np=OpenCLFFT(a_np,b_np,Device)
482 281 equemene
        OpenCLFFTElapsed=time.time()-TimeIn
483 281 equemene
        OpenCLFFTRate=int(SIZE/OpenCLFFTElapsed)
484 303 equemene
        print("OpenCLFFTElapsed: %i" % OpenCLFFTElapsed)
485 303 equemene
        print("OpenCLFFTRate: %i" % OpenCLFFTRate)
486 281 equemene
        print("Precision: ",np.linalg.norm(i_np-C_np),
487 281 equemene
              np.linalg.norm(j_np-D_np))
488 281 equemene
489 300 equemene
    if OpenCLMethod and NumpyFFTMethod:
490 300 equemene
        print(OpenCLMethod,NumpyFFTMethod)
491 300 equemene
        print("Precision: ",np.linalg.norm(m_np-i_np),
492 300 equemene
              np.linalg.norm(n_np-j_np))
493 300 equemene
        print((m_np-i_np),(n_np-j_np))
494 300 equemene
        print(i_np,j_np)
495 300 equemene
        print(m_np,n_np)
496 300 equemene
        print((i_np-m_np),(j_np-n_np))
497 300 equemene
498 300 equemene
    if CUDAMethod and NumpyFFTMethod:
499 300 equemene
        print(CUDAMethod,NumpyFFTMethod)
500 300 equemene
        print("Precision: ",np.linalg.norm(m_np-k_np),
501 300 equemene
              np.linalg.norm(n_np-l_np))
502 300 equemene
        print((m_np-k_np),(n_np-l_np))
503 300 equemene
        print(k_np,l_np)
504 300 equemene
        print(m_np,n_np)
505 300 equemene
        print((k_np-m_np),(l_np-n_np))
506 300 equemene
507 300 equemene
    if OpenCLMethod and NumpyMethod:
508 300 equemene
        print(OpenCLMethod,NumpyMethod)
509 300 equemene
        print("Precision: ",np.linalg.norm(e_np-i_np),
510 300 equemene
              np.linalg.norm(f_np-j_np))
511 300 equemene
        print((e_np-i_np),(f_np-j_np))
512 300 equemene
513 300 equemene
    if NumpyFFTMethod and NumpyMethod:
514 300 equemene
        print(NumpyFFTMethod,NumpyMethod)
515 300 equemene
        print("Precision: ",np.linalg.norm(e_np-m_np),
516 300 equemene
              np.linalg.norm(f_np-n_np))
517 300 equemene
        print(e_np,f_np)
518 300 equemene
        print(m_np,n_np)
519 300 equemene
        print((e_np-m_np),(f_np-n_np))
520 300 equemene
521 300 equemene
    if NumpyFFTMethod and NaiveMethod:
522 300 equemene
        print(NumpyFFTMethod,NaiveMethod)
523 300 equemene
        print("Precision: ",np.linalg.norm(c_np-m_np),
524 300 equemene
              np.linalg.norm(d_np-n_np))
525 300 equemene
        print(c_np,d_np)
526 300 equemene
        print(m_np,n_np)
527 300 equemene
        print((c_np-m_np),(d_np-n_np))
528 300 equemene
529 300 equemene
    if NumpyFFTMethod and NumbaMethod:
530 300 equemene
        print(NumpyFFTMethod,NumbaMethod)
531 300 equemene
        print("Precision: ",np.linalg.norm(g_np-m_np),
532 300 equemene
              np.linalg.norm(h_np-n_np))
533 300 equemene
        print(g_np,h_np)
534 300 equemene
        print(m_np,n_np)
535 300 equemene
        print((g_np-m_np),(h_np-n_np))
536 300 equemene
537 296 equemene
    if OpenCLFFTMethod and NumpyFFTMethod:
538 296 equemene
        print("NumpyOpenCLRatio: %f" % (OpenCLFFTRate/NumpyFFTRate))