Statistiques
| Révision :

root / ETSN / MyDFT_4.py @ 271

Historique | Voir | Annoter | Télécharger (5,46 ko)

1 271 equemene
#!/usr/bin/env python3
2 271 equemene
3 271 equemene
import numpy as np
4 271 equemene
import pyopencl as cl
5 271 equemene
from numpy import pi,cos,sin
6 271 equemene
7 271 equemene
# Naive Discrete Fourier Transform
8 271 equemene
def MyDFT(x,y):
9 271 equemene
    size=x.shape[0]
10 271 equemene
    X=np.zeros(size).astype(np.float32)
11 271 equemene
    Y=np.zeros(size).astype(np.float32)
12 271 equemene
    for i in range(size):
13 271 equemene
        for j in range(size):
14 271 equemene
            X[i]=X[i]+x[j]*cos(2.*pi*i*j/size)-y[j]*sin(2.*pi*i*j/size)
15 271 equemene
            Y[i]=Y[i]+x[j]*sin(2.*pi*i*j/size)+y[j]*cos(2.*pi*i*j/size)
16 271 equemene
    return(X,Y)
17 271 equemene
18 271 equemene
# Numpy Discrete Fourier Transform
19 271 equemene
def NumpyDFT(x,y):
20 271 equemene
    size=x.shape[0]
21 271 equemene
    X=np.zeros(size).astype(np.float32)
22 271 equemene
    Y=np.zeros(size).astype(np.float32)
23 271 equemene
    nj=np.multiply(2.0*np.pi/size,np.arange(size)).astype(np.float32)
24 271 equemene
    for i in range(size):
25 271 equemene
        X[i]=np.sum(np.subtract(np.multiply(np.cos(i*nj),x),np.multiply(np.sin(i*nj),y)))
26 271 equemene
        Y[i]=np.sum(np.add(np.multiply(np.sin(i*nj),x),np.multiply(np.cos(i*nj),y)))
27 271 equemene
    return(X,Y)
28 271 equemene
29 271 equemene
# Numba Discrete Fourier Transform
30 271 equemene
import numba
31 271 equemene
@numba.njit(parallel=True)
32 271 equemene
def NumbaDFT(x,y):
33 271 equemene
    size=x.shape[0]
34 271 equemene
    X=np.zeros(size).astype(np.float32)
35 271 equemene
    Y=np.zeros(size).astype(np.float32)
36 271 equemene
    nj=np.multiply(2.0*np.pi/size,np.arange(size)).astype(np.float32)
37 271 equemene
    for i in numba.prange(size):
38 271 equemene
        X[i]=np.sum(np.subtract(np.multiply(np.cos(i*nj),x),np.multiply(np.sin(i*nj),y)))
39 271 equemene
        Y[i]=np.sum(np.add(np.multiply(np.sin(i*nj),x),np.multiply(np.cos(i*nj),y)))
40 271 equemene
    return(X,Y)
41 271 equemene
42 271 equemene
# OpenCL complete operation
43 271 equemene
def OpenCLDFT(a_np,b_np):
44 271 equemene
45 271 equemene
    # Context creation
46 271 equemene
    ctx = cl.create_some_context()
47 271 equemene
    # Every process is stored in a queue
48 271 equemene
    queue = cl.CommandQueue(ctx)
49 271 equemene
50 271 equemene
    TimeIn=time.time()
51 271 equemene
    # Copy from Host to Device using pointers
52 271 equemene
    mf = cl.mem_flags
53 271 equemene
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
54 271 equemene
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
55 271 equemene
    Elapsed=time.time()-TimeIn
56 271 equemene
    print("Copy from Host 2 Device : %.3f" % Elapsed)
57 271 equemene
58 271 equemene
    TimeIn=time.time()
59 271 equemene
    # Definition of kernel under OpenCL
60 271 equemene
    prg = cl.Program(ctx, """
61 271 equemene

62 271 equemene
#define PI 3.141592653589793
63 271 equemene

64 271 equemene
__kernel void MyDFT(
65 271 equemene
    __global const float *a_g, __global const float *b_g, __global float *A_g, __global float *B_g)
66 271 equemene
{
67 271 equemene
  int gid = get_global_id(0);
68 271 equemene
  uint size = get_global_size(0);
69 271 equemene
  float A=0.,B=0.;
70 271 equemene
  for (uint i=0; i<size;i++)
71 271 equemene
  {
72 271 equemene
     A+=a_g[i]*cos(2.*PI*(float)(gid*i)/(float)size)-b_g[i]*sin(2.*PI*(float)(gid*i)/(float)size);
73 271 equemene
     B+=a_g[i]*sin(2.*PI*(float)(gid*i)/(float)size)+b_g[i]*cos(2.*PI*(float)(gid*i)/(float)size);
74 271 equemene
  }
75 271 equemene
  A_g[gid]=A;
76 271 equemene
  B_g[gid]=B;
77 271 equemene
}
78 271 equemene
""").build()
79 271 equemene
    Elapsed=time.time()-TimeIn
80 271 equemene
    print("Building kernels : %.3f" % Elapsed)
81 271 equemene
82 271 equemene
    TimeIn=time.time()
83 271 equemene
    # Memory allocation on Device for result
84 271 equemene
    A_ocl = np.empty_like(a_np)
85 271 equemene
    B_ocl = np.empty_like(a_np)
86 271 equemene
    Elapsed=time.time()-TimeIn
87 271 equemene
    print("Allocation on Host for results : %.3f" % Elapsed)
88 271 equemene
89 271 equemene
    A_g = cl.Buffer(ctx, mf.WRITE_ONLY, A_ocl.nbytes)
90 271 equemene
    B_g = cl.Buffer(ctx, mf.WRITE_ONLY, B_ocl.nbytes)
91 271 equemene
    Elapsed=time.time()-TimeIn
92 271 equemene
    print("Allocation on Device for results : %.3f" % Elapsed)
93 271 equemene
94 271 equemene
    TimeIn=time.time()
95 271 equemene
    # Synthesis of function "sillysum" inside Kernel Sources
96 271 equemene
    knl = prg.MyDFT  # Use this Kernel object for repeated calls
97 271 equemene
    Elapsed=time.time()-TimeIn
98 271 equemene
    print("Synthesis of kernel : %.3f" % Elapsed)
99 271 equemene
100 271 equemene
    TimeIn=time.time()
101 271 equemene
    # Call of kernel previously defined
102 271 equemene
    CallCL=knl(queue, a_np.shape, None, a_g, b_g, A_g, B_g)
103 271 equemene
    #
104 271 equemene
    CallCL.wait()
105 271 equemene
    Elapsed=time.time()-TimeIn
106 271 equemene
    print("Execution of kernel : %.3f" % Elapsed)
107 271 equemene
108 271 equemene
    TimeIn=time.time()
109 271 equemene
    # Copy from Device to Host
110 271 equemene
    cl.enqueue_copy(queue, A_ocl, A_g)
111 271 equemene
    cl.enqueue_copy(queue, B_ocl, B_g)
112 271 equemene
    Elapsed=time.time()-TimeIn
113 271 equemene
    print("Copy from Device 2 Host : %.3f" % Elapsed)
114 271 equemene
115 271 equemene
    return(A_ocl,B_ocl)
116 271 equemene
117 271 equemene
import sys
118 271 equemene
import time
119 271 equemene
120 271 equemene
if __name__=='__main__':
121 271 equemene
122 271 equemene
    # Size of input vectors definition based on stdin
123 271 equemene
    import sys
124 271 equemene
    try:
125 271 equemene
        SIZE=int(sys.argv[1])
126 271 equemene
        print("Size of vectors set to %i" % SIZE)
127 271 equemene
    except:
128 271 equemene
        SIZE=256
129 271 equemene
        print("Size of vectors set to default size %i" % SIZE)
130 271 equemene
131 271 equemene
    a_np = np.ones(SIZE).astype(np.float32)
132 271 equemene
    b_np = np.ones(SIZE).astype(np.float32)
133 271 equemene
134 271 equemene
    C_np = np.zeros(SIZE).astype(np.float32)
135 271 equemene
    D_np = np.zeros(SIZE).astype(np.float32)
136 271 equemene
    C_np[0] = np.float32(SIZE)
137 271 equemene
    D_np[0] = np.float32(SIZE)
138 271 equemene
139 271 equemene
    # # Native & Naive Implementation
140 271 equemene
    # print("Performing naive implementation")
141 271 equemene
    # TimeIn=time.time()
142 271 equemene
    # c_np,d_np=MyDFT(a_np,b_np)
143 271 equemene
    # NativeElapsed=time.time()-TimeIn
144 271 equemene
    # NativeRate=int(SIZE/NativeElapsed)
145 271 equemene
    # print("NativeRate: %i" % NativeRate)
146 271 equemene
    # print("Precision: ",np.linalg.norm(c_np-C_np),np.linalg.norm(d_np-D_np))
147 271 equemene
148 271 equemene
    # Native & Numpy Implementation
149 271 equemene
    print("Performing Numpy implementation")
150 271 equemene
    TimeIn=time.time()
151 271 equemene
    e_np,f_np=NumpyDFT(a_np,b_np)
152 271 equemene
    NumpyElapsed=time.time()-TimeIn
153 271 equemene
    NumpyRate=int(SIZE/NumpyElapsed)
154 271 equemene
    print("NumpyRate: %i" % NumpyRate)
155 271 equemene
    print("Precision: ",np.linalg.norm(e_np-C_np),np.linalg.norm(f_np-D_np))
156 271 equemene
157 271 equemene
    # Native & Numba Implementation
158 271 equemene
    print("Performing Numba implementation")
159 271 equemene
    TimeIn=time.time()
160 271 equemene
    g_np,h_np=NumbaDFT(a_np,b_np)
161 271 equemene
    NumbaElapsed=time.time()-TimeIn
162 271 equemene
    NumbaRate=int(SIZE/NumbaElapsed)
163 271 equemene
    print("NumbaRate: %i" % NumbaRate)
164 271 equemene
    print("Precision: ",np.linalg.norm(g_np-C_np),np.linalg.norm(h_np-D_np))
165 271 equemene
166 271 equemene
    # OpenCL Implementation
167 271 equemene
    TimeIn=time.time()
168 271 equemene
    i_np,j_np=OpenCLDFT(a_np,b_np)
169 271 equemene
    OpenCLElapsed=time.time()-TimeIn
170 271 equemene
    OpenCLRate=int(SIZE/OpenCLElapsed)
171 271 equemene
    print("OpenCLRate: %i" % OpenCLRate)
172 271 equemene
    print("Precision: ",np.linalg.norm(i_np-C_np),np.linalg.norm(j_np-D_np))