Statistiques
| Révision :

root / ETSN / MyDFT_10.py @ 301

Historique | Voir | Annoter | Télécharger (17,04 ko)

1 281 equemene
#!/usr/bin/env python3
2 281 equemene
3 281 equemene
import numpy as np
4 281 equemene
import pyopencl as cl
5 281 equemene
from numpy import pi,cos,sin
6 281 equemene
7 281 equemene
#
8 281 equemene
def NumpyFFT(x,y):
9 296 equemene
    xy=np.csingle(x+1.j*y)
10 281 equemene
    XY=np.fft.fft(xy)
11 296 equemene
    print(XY)
12 281 equemene
    return(XY.real,XY.imag)
13 281 equemene
14 281 equemene
#
15 281 equemene
def OpenCLFFT(x,y,device):
16 281 equemene
    import pyopencl as cl
17 281 equemene
    import pyopencl.array as cla
18 281 equemene
    import time
19 281 equemene
    import gpyfft
20 281 equemene
    from gpyfft.fft import FFT
21 281 equemene
22 296 equemene
    TimeIn=time.time()
23 281 equemene
    Id=0
24 281 equemene
    HasXPU=False
25 281 equemene
    for platform in cl.get_platforms():
26 281 equemene
        for device in platform.get_devices():
27 281 equemene
            if Id==Device:
28 281 equemene
                XPU=device
29 281 equemene
                print("CPU/GPU selected: ",device.name.lstrip())
30 281 equemene
                HasXPU=True
31 281 equemene
            Id+=1
32 281 equemene
            # print(Id)
33 281 equemene
34 281 equemene
    if HasXPU==False:
35 281 equemene
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
36 281 equemene
        sys.exit()
37 296 equemene
    Elapsed=time.time()-TimeIn
38 296 equemene
    print("Selection of device : %.3f" % Elapsed)
39 281 equemene
40 296 equemene
    TimeIn=time.time()
41 281 equemene
    try:
42 281 equemene
        ctx = cl.Context(devices=[XPU])
43 281 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
44 281 equemene
    except:
45 281 equemene
        print("Crash during context creation")
46 296 equemene
    Elapsed=time.time()-TimeIn
47 296 equemene
    print("Context initialisation : %.3f" % Elapsed)
48 281 equemene
49 296 equemene
    TimeIn=time.time()
50 296 equemene
    XY_gpu = cla.to_device(queue, np.csingle(x+1.j*y))
51 296 equemene
    Elapsed=time.time()-TimeIn
52 296 equemene
    print("Copy from Host to Device : %.3f" % Elapsed)
53 281 equemene
54 296 equemene
    TimeIn=time.time()
55 296 equemene
    transform = FFT(ctx, queue, XY_gpu)
56 281 equemene
    event, = transform.enqueue()
57 281 equemene
    event.wait()
58 296 equemene
    Elapsed=time.time()-TimeIn
59 296 equemene
    print("Compute FFT : %.3f" % Elapsed)
60 296 equemene
    TimeIn=time.time()
61 281 equemene
    XY = XY_gpu.get()
62 296 equemene
    Elapsed=time.time()-TimeIn
63 296 equemene
    print("Copy from Device to Host : %.3f" % Elapsed)
64 296 equemene
    print(XY)
65 281 equemene
    return(XY.real,XY.imag)
66 281 equemene
67 281 equemene
# Naive Discrete Fourier Transform
68 281 equemene
def MyDFT(x,y):
69 281 equemene
    size=x.shape[0]
70 281 equemene
    X=np.zeros(size).astype(np.float32)
71 281 equemene
    Y=np.zeros(size).astype(np.float32)
72 281 equemene
    for i in range(size):
73 281 equemene
        for j in range(size):
74 300 equemene
            X[i]=X[i]+x[j]*cos(2.*pi*i*j/size)+y[j]*sin(2.*pi*i*j/size)
75 300 equemene
            Y[i]=Y[i]-x[j]*sin(2.*pi*i*j/size)+y[j]*cos(2.*pi*i*j/size)
76 281 equemene
    return(X,Y)
77 281 equemene
78 281 equemene
# Numpy Discrete Fourier Transform
79 281 equemene
def NumpyDFT(x,y):
80 281 equemene
    size=x.shape[0]
81 281 equemene
    X=np.zeros(size).astype(np.float32)
82 281 equemene
    Y=np.zeros(size).astype(np.float32)
83 281 equemene
    nj=np.multiply(2.0*np.pi/size,np.arange(size)).astype(np.float32)
84 281 equemene
    for i in range(size):
85 300 equemene
        X[i]=np.sum(np.add(np.multiply(np.cos(i*nj),x),np.multiply(np.sin(i*nj),y)))
86 300 equemene
        Y[i]=np.sum(-np.subtract(np.multiply(np.sin(i*nj),x),np.multiply(np.cos(i*nj),y)))
87 281 equemene
    return(X,Y)
88 281 equemene
89 281 equemene
# Numba Discrete Fourier Transform
90 281 equemene
import numba
91 281 equemene
@numba.njit(parallel=True)
92 281 equemene
def NumbaDFT(x,y):
93 281 equemene
    size=x.shape[0]
94 281 equemene
    X=np.zeros(size).astype(np.float32)
95 281 equemene
    Y=np.zeros(size).astype(np.float32)
96 281 equemene
    nj=np.multiply(2.0*np.pi/size,np.arange(size)).astype(np.float32)
97 281 equemene
    for i in numba.prange(size):
98 300 equemene
        X[i]=np.sum(np.add(np.multiply(np.cos(i*nj),x),np.multiply(np.sin(i*nj),y)))
99 300 equemene
        Y[i]=np.sum(-np.subtract(np.multiply(np.sin(i*nj),x),np.multiply(np.cos(i*nj),y)))
100 281 equemene
    return(X,Y)
101 281 equemene
102 281 equemene
# OpenCL complete operation
103 281 equemene
def OpenCLDFT(a_np,b_np,Device):
104 281 equemene
105 281 equemene
    Id=0
106 281 equemene
    HasXPU=False
107 281 equemene
    for platform in cl.get_platforms():
108 281 equemene
        for device in platform.get_devices():
109 281 equemene
            if Id==Device:
110 281 equemene
                XPU=device
111 281 equemene
                print("CPU/GPU selected: ",device.name.lstrip())
112 281 equemene
                HasXPU=True
113 281 equemene
            Id+=1
114 281 equemene
            # print(Id)
115 281 equemene
116 281 equemene
    if HasXPU==False:
117 281 equemene
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
118 281 equemene
        sys.exit()
119 281 equemene
120 281 equemene
    try:
121 281 equemene
        ctx = cl.Context(devices=[XPU])
122 281 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
123 281 equemene
    except:
124 281 equemene
        print("Crash during context creation")
125 281 equemene
126 281 equemene
    TimeIn=time.time()
127 281 equemene
    # Copy from Host to Device using pointers
128 281 equemene
    mf = cl.mem_flags
129 281 equemene
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
130 281 equemene
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
131 281 equemene
    Elapsed=time.time()-TimeIn
132 281 equemene
    print("Copy from Host 2 Device : %.3f" % Elapsed)
133 281 equemene
134 281 equemene
    TimeIn=time.time()
135 281 equemene
    # Definition of kernel under OpenCL
136 281 equemene
    prg = cl.Program(ctx, """
137 281 equemene

138 281 equemene
#define PI 3.141592653589793
139 281 equemene

140 281 equemene
__kernel void MyDFT(
141 281 equemene
    __global const float *a_g, __global const float *b_g, __global float *A_g, __global float *B_g)
142 281 equemene
{
143 281 equemene
  int gid = get_global_id(0);
144 281 equemene
  uint size = get_global_size(0);
145 281 equemene
  float A=0.,B=0.;
146 281 equemene
  for (uint i=0; i<size;i++)
147 281 equemene
  {
148 300 equemene
     A+=a_g[i]*cos(2.*PI*(float)(gid*i)/(float)size)+b_g[i]*sin(2.*PI*(float)(gid*i)/(float)size);
149 300 equemene
     B+=-a_g[i]*sin(2.*PI*(float)(gid*i)/(float)size)+b_g[i]*cos(2.*PI*(float)(gid*i)/(float)size);
150 281 equemene
  }
151 281 equemene
  A_g[gid]=A;
152 281 equemene
  B_g[gid]=B;
153 281 equemene
}
154 281 equemene
""").build()
155 281 equemene
    Elapsed=time.time()-TimeIn
156 281 equemene
    print("Building kernels : %.3f" % Elapsed)
157 281 equemene
158 281 equemene
    TimeIn=time.time()
159 281 equemene
    # Memory allocation on Device for result
160 281 equemene
    A_ocl = np.empty_like(a_np)
161 281 equemene
    B_ocl = np.empty_like(a_np)
162 281 equemene
    Elapsed=time.time()-TimeIn
163 281 equemene
    print("Allocation on Host for results : %.3f" % Elapsed)
164 281 equemene
165 281 equemene
    A_g = cl.Buffer(ctx, mf.WRITE_ONLY, A_ocl.nbytes)
166 281 equemene
    B_g = cl.Buffer(ctx, mf.WRITE_ONLY, B_ocl.nbytes)
167 281 equemene
    Elapsed=time.time()-TimeIn
168 281 equemene
    print("Allocation on Device for results : %.3f" % Elapsed)
169 281 equemene
170 281 equemene
    TimeIn=time.time()
171 281 equemene
    # Synthesis of function "sillysum" inside Kernel Sources
172 281 equemene
    knl = prg.MyDFT  # Use this Kernel object for repeated calls
173 281 equemene
    Elapsed=time.time()-TimeIn
174 281 equemene
    print("Synthesis of kernel : %.3f" % Elapsed)
175 281 equemene
176 281 equemene
    TimeIn=time.time()
177 281 equemene
    # Call of kernel previously defined
178 281 equemene
    CallCL=knl(queue, a_np.shape, None, a_g, b_g, A_g, B_g)
179 281 equemene
    #
180 281 equemene
    CallCL.wait()
181 281 equemene
    Elapsed=time.time()-TimeIn
182 281 equemene
    print("Execution of kernel : %.3f" % Elapsed)
183 281 equemene
184 281 equemene
    TimeIn=time.time()
185 281 equemene
    # Copy from Device to Host
186 281 equemene
    cl.enqueue_copy(queue, A_ocl, A_g)
187 281 equemene
    cl.enqueue_copy(queue, B_ocl, B_g)
188 281 equemene
    Elapsed=time.time()-TimeIn
189 281 equemene
    print("Copy from Device 2 Host : %.3f" % Elapsed)
190 281 equemene
191 281 equemene
    # Liberation of memory
192 281 equemene
    a_g.release()
193 281 equemene
    b_g.release()
194 281 equemene
    A_g.release()
195 281 equemene
    B_g.release()
196 281 equemene
197 281 equemene
    return(A_ocl,B_ocl)
198 281 equemene
199 281 equemene
# CUDA complete operation
200 281 equemene
def CUDADFT(a_np,b_np,Device,Threads):
201 281 equemene
    # import pycuda.autoinit
202 281 equemene
    import pycuda.driver as drv
203 281 equemene
    from pycuda.compiler import SourceModule
204 281 equemene
205 281 equemene
    try:
206 281 equemene
        # For PyCUDA import
207 281 equemene
        import pycuda.driver as cuda
208 281 equemene
        from pycuda.compiler import SourceModule
209 281 equemene
210 281 equemene
        cuda.init()
211 281 equemene
        for Id in range(cuda.Device.count()):
212 281 equemene
            if Id==Device:
213 281 equemene
                XPU=cuda.Device(Id)
214 281 equemene
                print("GPU selected %s" % XPU.name())
215 281 equemene
        print
216 281 equemene
217 281 equemene
    except ImportError:
218 281 equemene
        print("Platform does not seem to support CUDA")
219 281 equemene
220 281 equemene
    Context=XPU.make_context()
221 281 equemene
222 281 equemene
    TimeIn=time.time()
223 281 equemene
    mod = SourceModule("""
224 281 equemene

225 281 equemene
#define PI 3.141592653589793
226 281 equemene

227 281 equemene
__global__ void MyDFT(float *A_g, float *B_g, const float *a_g,const float *b_g)
228 281 equemene
{
229 281 equemene
  const int gid = blockIdx.x*blockDim.x+threadIdx.x;
230 281 equemene
  uint size = gridDim.x*blockDim.x;
231 281 equemene
  float A=0.,B=0.;
232 281 equemene
  for (uint i=0; i<size;i++)
233 281 equemene
  {
234 300 equemene
     A+=a_g[i]*cos(2.*PI*(float)(gid*i)/(float)size)+b_g[i]*sin(2.*PI*(float)(gid*i)/(float)size);
235 300 equemene
     B+=-a_g[i]*sin(2.*PI*(float)(gid*i)/(float)size)+b_g[i]*cos(2.*PI*(float)(gid*i)/(float)size);
236 281 equemene
  }
237 281 equemene
  A_g[gid]=A;
238 281 equemene
  B_g[gid]=B;
239 281 equemene
}
240 281 equemene

241 281 equemene
""")
242 281 equemene
    Elapsed=time.time()-TimeIn
243 281 equemene
    print("Definition of kernel : %.3f" % Elapsed)
244 281 equemene
245 281 equemene
    TimeIn=time.time()
246 281 equemene
    MyDFT = mod.get_function("MyDFT")
247 281 equemene
    Elapsed=time.time()-TimeIn
248 281 equemene
    print("Synthesis of kernel : %.3f" % Elapsed)
249 281 equemene
250 281 equemene
    TimeIn=time.time()
251 281 equemene
    A_np = np.zeros_like(a_np)
252 281 equemene
    B_np = np.zeros_like(a_np)
253 281 equemene
    Elapsed=time.time()-TimeIn
254 281 equemene
    print("Allocation on Host for results : %.3f" % Elapsed)
255 281 equemene
256 281 equemene
    Size=a_np.size
257 281 equemene
    if (Size % Threads != 0):
258 281 equemene
        print("Impossible : %i not multiple of %i..." % (Threads,Size) )
259 281 equemene
        TimeIn=time.time()
260 281 equemene
        MyDFT(drv.Out(A_np), drv.Out(B_np), drv.In(a_np), drv.In(b_np),
261 281 equemene
              block=(1,1,1), grid=(a_np.size,1))
262 281 equemene
        Elapsed=time.time()-TimeIn
263 281 equemene
        print("Execution of kernel : %.3f" % Elapsed)
264 281 equemene
    else:
265 281 equemene
        Blocks=int(Size/Threads)
266 281 equemene
        TimeIn=time.time()
267 281 equemene
        MyDFT(drv.Out(A_np), drv.Out(B_np), drv.In(a_np), drv.In(b_np),
268 281 equemene
              block=(Threads,1,1), grid=(Blocks,1))
269 281 equemene
        Elapsed=time.time()-TimeIn
270 281 equemene
        print("Execution of kernel : %.3f" % Elapsed)
271 281 equemene
272 281 equemene
    Context.pop()
273 281 equemene
    Context.detach()
274 281 equemene
275 281 equemene
    return(A_np,B_np)
276 281 equemene
277 281 equemene
import sys
278 281 equemene
import time
279 281 equemene
280 281 equemene
if __name__=='__main__':
281 281 equemene
282 281 equemene
    SIZE=1024
283 281 equemene
    Device=0
284 281 equemene
    NaiveMethod=False
285 281 equemene
    NumpyFFTMethod=True
286 300 equemene
    OpenCLFFTMethod=False
287 281 equemene
    NumpyMethod=False
288 281 equemene
    NumbaMethod=False
289 281 equemene
    OpenCLMethod=False
290 300 equemene
    CUDAMethod=True
291 281 equemene
    Threads=1
292 281 equemene
293 281 equemene
    import getopt
294 281 equemene
295 281 equemene
    HowToUse='%s -n [Naive] -y [numpY] -a [numbA] -o [OpenCL] -c [CUDA] -s <SizeOfVector> -d <DeviceId> -t <threads>'
296 281 equemene
297 281 equemene
    try:
298 281 equemene
        opts, args = getopt.getopt(sys.argv[1:],"nyaochs:d:t:",["size=","device="])
299 281 equemene
    except getopt.GetoptError:
300 281 equemene
        print(HowToUse % sys.argv[0])
301 281 equemene
        sys.exit(2)
302 281 equemene
303 281 equemene
    # List of Devices
304 281 equemene
    Devices=[]
305 281 equemene
    Alu={}
306 281 equemene
307 281 equemene
    for opt, arg in opts:
308 281 equemene
        if opt == '-h':
309 281 equemene
            print(HowToUse % sys.argv[0])
310 281 equemene
311 281 equemene
            print("\nInformations about devices detected under OpenCL API:")
312 281 equemene
            # For PyOpenCL import
313 281 equemene
            try:
314 281 equemene
                import pyopencl as cl
315 281 equemene
                Id=0
316 281 equemene
                for platform in cl.get_platforms():
317 281 equemene
                    for device in platform.get_devices():
318 281 equemene
                        #deviceType=cl.device_type.to_string(device.type)
319 281 equemene
                        deviceType="xPU"
320 281 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
321 281 equemene
                        Id=Id+1
322 281 equemene
323 281 equemene
            except:
324 281 equemene
                print("Your platform does not seem to support OpenCL")
325 281 equemene
326 281 equemene
            print("\nInformations about devices detected under CUDA API:")
327 281 equemene
            # For PyCUDA import
328 281 equemene
            try:
329 281 equemene
                import pycuda.driver as cuda
330 281 equemene
                cuda.init()
331 281 equemene
                for Id in range(cuda.Device.count()):
332 281 equemene
                    device=cuda.Device(Id)
333 281 equemene
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
334 281 equemene
                print
335 281 equemene
            except:
336 281 equemene
                print("Your platform does not seem to support CUDA")
337 281 equemene
338 281 equemene
            sys.exit()
339 281 equemene
340 281 equemene
        elif opt in ("-d", "--device"):
341 281 equemene
            Device=int(arg)
342 281 equemene
        elif opt in ("-s", "--size"):
343 281 equemene
            SIZE = int(arg)
344 281 equemene
        elif opt in ("-t", "--threads"):
345 281 equemene
            Threads = int(arg)
346 281 equemene
        elif opt in ("-n"):
347 281 equemene
            NaiveMethod=True
348 281 equemene
        elif opt in ("-y"):
349 281 equemene
            NumpyMethod=True
350 281 equemene
        elif opt in ("-a"):
351 281 equemene
            NumbaMethod=True
352 281 equemene
        elif opt in ("-o"):
353 281 equemene
            OpenCLMethod=True
354 281 equemene
        elif opt in ("-c"):
355 281 equemene
            CUDAMethod=True
356 281 equemene
357 281 equemene
    print("Device Selection : %i" % Device)
358 281 equemene
    print("Size of complex vector : %i" % SIZE)
359 281 equemene
    print("DFT Naive computation %s " % NaiveMethod )
360 281 equemene
    print("DFT Numpy computation %s " % NumpyMethod )
361 300 equemene
    print("FFT Numpy computation %s " % NumpyFFTMethod )
362 281 equemene
    print("DFT Numba computation %s " % NumbaMethod )
363 281 equemene
    print("DFT OpenCL computation %s " % OpenCLMethod )
364 281 equemene
    print("DFT CUDA computation %s " % CUDAMethod )
365 281 equemene
366 281 equemene
    if CUDAMethod:
367 281 equemene
        try:
368 281 equemene
            # For PyCUDA import
369 281 equemene
            import pycuda.driver as cuda
370 281 equemene
371 281 equemene
            cuda.init()
372 281 equemene
            for Id in range(cuda.Device.count()):
373 281 equemene
                device=cuda.Device(Id)
374 281 equemene
                print("Device #%i of type GPU : %s" % (Id,device.name()))
375 281 equemene
                if Id in Devices:
376 281 equemene
                    Alu[Id]='GPU'
377 281 equemene
378 281 equemene
        except ImportError:
379 281 equemene
            print("Platform does not seem to support CUDA")
380 281 equemene
381 281 equemene
    if OpenCLMethod:
382 281 equemene
        try:
383 281 equemene
            # For PyOpenCL import
384 281 equemene
            import pyopencl as cl
385 281 equemene
            Id=0
386 281 equemene
            for platform in cl.get_platforms():
387 281 equemene
                for device in platform.get_devices():
388 281 equemene
                    #deviceType=cl.device_type.to_string(device.type)
389 281 equemene
                    deviceType="xPU"
390 281 equemene
                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
391 281 equemene
392 281 equemene
                    if Id in Devices:
393 281 equemene
                    # Set the Alu as detected Device Type
394 281 equemene
                        Alu[Id]=deviceType
395 281 equemene
                    Id=Id+1
396 281 equemene
        except ImportError:
397 281 equemene
            print("Platform does not seem to support OpenCL")
398 281 equemene
399 281 equemene
400 281 equemene
401 300 equemene
    # a_np = np.ones(SIZE).astype(np.float32)
402 300 equemene
    # b_np = np.ones(SIZE).astype(np.float32)
403 300 equemene
    a_np = np.random.rand(SIZE).astype(np.float32)
404 300 equemene
    b_np = np.random.rand(SIZE).astype(np.float32)
405 281 equemene
406 281 equemene
    C_np = np.zeros(SIZE).astype(np.float32)
407 281 equemene
    D_np = np.zeros(SIZE).astype(np.float32)
408 281 equemene
    C_np[0] = np.float32(SIZE)
409 281 equemene
    D_np[0] = np.float32(SIZE)
410 281 equemene
411 281 equemene
    # Native & Naive Implementation
412 281 equemene
    if NaiveMethod:
413 281 equemene
        print("Performing naive implementation")
414 281 equemene
        TimeIn=time.time()
415 281 equemene
        c_np,d_np=MyDFT(a_np,b_np)
416 281 equemene
        NativeElapsed=time.time()-TimeIn
417 281 equemene
        NativeRate=int(SIZE/NativeElapsed)
418 281 equemene
        print("NativeRate: %i" % NativeRate)
419 281 equemene
        print("Precision: ",np.linalg.norm(c_np-C_np),
420 281 equemene
              np.linalg.norm(d_np-D_np))
421 281 equemene
422 281 equemene
    # Native & Numpy Implementation
423 281 equemene
    if NumpyMethod:
424 281 equemene
        print("Performing Numpy implementation")
425 281 equemene
        TimeIn=time.time()
426 281 equemene
        e_np,f_np=NumpyDFT(a_np,b_np)
427 281 equemene
        NumpyElapsed=time.time()-TimeIn
428 281 equemene
        NumpyRate=int(SIZE/NumpyElapsed)
429 281 equemene
        print("NumpyRate: %i" % NumpyRate)
430 281 equemene
        print("Precision: ",np.linalg.norm(e_np-C_np),
431 281 equemene
              np.linalg.norm(f_np-D_np))
432 281 equemene
433 281 equemene
    # Native & Numba Implementation
434 281 equemene
    if NumbaMethod:
435 281 equemene
        print("Performing Numba implementation")
436 281 equemene
        TimeIn=time.time()
437 281 equemene
        g_np,h_np=NumbaDFT(a_np,b_np)
438 281 equemene
        NumbaElapsed=time.time()-TimeIn
439 281 equemene
        NumbaRate=int(SIZE/NumbaElapsed)
440 281 equemene
        print("NumbaRate: %i" % NumbaRate)
441 281 equemene
        print("Precision: ",np.linalg.norm(g_np-C_np),
442 281 equemene
              np.linalg.norm(h_np-D_np))
443 281 equemene
444 281 equemene
    # OpenCL Implementation
445 281 equemene
    if OpenCLMethod:
446 281 equemene
        print("Performing OpenCL implementation")
447 281 equemene
        TimeIn=time.time()
448 281 equemene
        i_np,j_np=OpenCLDFT(a_np,b_np,Device)
449 281 equemene
        OpenCLElapsed=time.time()-TimeIn
450 281 equemene
        OpenCLRate=int(SIZE/OpenCLElapsed)
451 281 equemene
        print("OpenCLRate: %i" % OpenCLRate)
452 281 equemene
        print("Precision: ",np.linalg.norm(i_np-C_np),
453 281 equemene
              np.linalg.norm(j_np-D_np))
454 281 equemene
455 281 equemene
    # CUDA Implementation
456 281 equemene
    if CUDAMethod:
457 281 equemene
        print("Performing CUDA implementation")
458 281 equemene
        TimeIn=time.time()
459 281 equemene
        k_np,l_np=CUDADFT(a_np,b_np,Device,Threads)
460 281 equemene
        CUDAElapsed=time.time()-TimeIn
461 281 equemene
        CUDARate=int(SIZE/CUDAElapsed)
462 281 equemene
        print("CUDARate: %i" % CUDARate)
463 281 equemene
        print("Precision: ",np.linalg.norm(k_np-C_np),
464 281 equemene
              np.linalg.norm(l_np-D_np))
465 281 equemene
466 281 equemene
    if NumpyFFTMethod:
467 281 equemene
        print("Performing NumpyFFT implementation")
468 281 equemene
        TimeIn=time.time()
469 281 equemene
        m_np,n_np=NumpyFFT(a_np,b_np)
470 281 equemene
        NumpyFFTElapsed=time.time()-TimeIn
471 281 equemene
        NumpyFFTRate=int(SIZE/NumpyFFTElapsed)
472 296 equemene
        print("NumpyFFTElapsed: %i" % NumpyFFTElapsed)
473 281 equemene
        print("NumpyFFTRate: %i" % NumpyFFTRate)
474 281 equemene
        print("Precision: ",np.linalg.norm(m_np-C_np),
475 281 equemene
              np.linalg.norm(n_np-D_np))
476 281 equemene
477 281 equemene
    # OpenCL Implementation
478 281 equemene
    if OpenCLFFTMethod:
479 281 equemene
        print("Performing OpenCL implementation")
480 281 equemene
        TimeIn=time.time()
481 281 equemene
        i_np,j_np=OpenCLFFT(a_np,b_np,Device)
482 281 equemene
        OpenCLFFTElapsed=time.time()-TimeIn
483 281 equemene
        OpenCLFFTRate=int(SIZE/OpenCLFFTElapsed)
484 296 equemene
        print("OpenCLElapsed: %i" % OpenCLFFTElapsed)
485 281 equemene
        print("OpenCLRate: %i" % OpenCLFFTRate)
486 281 equemene
        print("Precision: ",np.linalg.norm(i_np-C_np),
487 281 equemene
              np.linalg.norm(j_np-D_np))
488 281 equemene
489 300 equemene
<<<<<<< .mine
490 300 equemene
    if OpenCLMethod and NumpyFFTMethod:
491 300 equemene
        print(OpenCLMethod,NumpyFFTMethod)
492 300 equemene
        print("Precision: ",np.linalg.norm(m_np-i_np),
493 300 equemene
              np.linalg.norm(n_np-j_np))
494 300 equemene
        print((m_np-i_np),(n_np-j_np))
495 300 equemene
        print(i_np,j_np)
496 300 equemene
        print(m_np,n_np)
497 300 equemene
        print((i_np-m_np),(j_np-n_np))
498 300 equemene
499 300 equemene
    if CUDAMethod and NumpyFFTMethod:
500 300 equemene
        print(CUDAMethod,NumpyFFTMethod)
501 300 equemene
        print("Precision: ",np.linalg.norm(m_np-k_np),
502 300 equemene
              np.linalg.norm(n_np-l_np))
503 300 equemene
        print((m_np-k_np),(n_np-l_np))
504 300 equemene
        print(k_np,l_np)
505 300 equemene
        print(m_np,n_np)
506 300 equemene
        print((k_np-m_np),(l_np-n_np))
507 300 equemene
508 300 equemene
    if OpenCLMethod and NumpyMethod:
509 300 equemene
        print(OpenCLMethod,NumpyMethod)
510 300 equemene
        print("Precision: ",np.linalg.norm(e_np-i_np),
511 300 equemene
              np.linalg.norm(f_np-j_np))
512 300 equemene
        print((e_np-i_np),(f_np-j_np))
513 300 equemene
514 300 equemene
    if NumpyFFTMethod and NumpyMethod:
515 300 equemene
        print(NumpyFFTMethod,NumpyMethod)
516 300 equemene
        print("Precision: ",np.linalg.norm(e_np-m_np),
517 300 equemene
              np.linalg.norm(f_np-n_np))
518 300 equemene
        print(e_np,f_np)
519 300 equemene
        print(m_np,n_np)
520 300 equemene
        print((e_np-m_np),(f_np-n_np))
521 300 equemene
522 300 equemene
    if NumpyFFTMethod and NaiveMethod:
523 300 equemene
        print(NumpyFFTMethod,NaiveMethod)
524 300 equemene
        print("Precision: ",np.linalg.norm(c_np-m_np),
525 300 equemene
              np.linalg.norm(d_np-n_np))
526 300 equemene
        print(c_np,d_np)
527 300 equemene
        print(m_np,n_np)
528 300 equemene
        print((c_np-m_np),(d_np-n_np))
529 300 equemene
530 300 equemene
    if NumpyFFTMethod and NumbaMethod:
531 300 equemene
        print(NumpyFFTMethod,NumbaMethod)
532 300 equemene
        print("Precision: ",np.linalg.norm(g_np-m_np),
533 300 equemene
              np.linalg.norm(h_np-n_np))
534 300 equemene
        print(g_np,h_np)
535 300 equemene
        print(m_np,n_np)
536 300 equemene
        print((g_np-m_np),(h_np-n_np))
537 300 equemene
538 300 equemene
||||||| .r292
539 300 equemene
=======
540 296 equemene
    if OpenCLFFTMethod and NumpyFFTMethod:
541 296 equemene
        print("NumpyOpenCLRatio: %f" % (OpenCLFFTRate/NumpyFFTRate))
542 300 equemene
>>>>>>> .r299