Statistiques
| Révision :

root / ETSN / MyDFT_5.py @ 274

Historique | Voir | Annoter | Télécharger (7,12 ko)

1 271 equemene
#!/usr/bin/env python3
2 271 equemene
3 271 equemene
import numpy as np
4 271 equemene
import pyopencl as cl
5 271 equemene
from numpy import pi,cos,sin
6 271 equemene
7 271 equemene
# Naive Discrete Fourier Transform
8 271 equemene
def MyDFT(x,y):
9 271 equemene
    size=x.shape[0]
10 271 equemene
    X=np.zeros(size).astype(np.float32)
11 271 equemene
    Y=np.zeros(size).astype(np.float32)
12 271 equemene
    for i in range(size):
13 271 equemene
        for j in range(size):
14 271 equemene
            X[i]=X[i]+x[j]*cos(2.*pi*i*j/size)-y[j]*sin(2.*pi*i*j/size)
15 271 equemene
            Y[i]=Y[i]+x[j]*sin(2.*pi*i*j/size)+y[j]*cos(2.*pi*i*j/size)
16 271 equemene
    return(X,Y)
17 271 equemene
18 271 equemene
# Numpy Discrete Fourier Transform
19 271 equemene
def NumpyDFT(x,y):
20 271 equemene
    size=x.shape[0]
21 271 equemene
    X=np.zeros(size).astype(np.float32)
22 271 equemene
    Y=np.zeros(size).astype(np.float32)
23 271 equemene
    nj=np.multiply(2.0*np.pi/size,np.arange(size)).astype(np.float32)
24 271 equemene
    for i in range(size):
25 271 equemene
        X[i]=np.sum(np.subtract(np.multiply(np.cos(i*nj),x),np.multiply(np.sin(i*nj),y)))
26 271 equemene
        Y[i]=np.sum(np.add(np.multiply(np.sin(i*nj),x),np.multiply(np.cos(i*nj),y)))
27 271 equemene
    return(X,Y)
28 271 equemene
29 271 equemene
# Numba Discrete Fourier Transform
30 271 equemene
import numba
31 271 equemene
@numba.njit(parallel=True)
32 271 equemene
def NumbaDFT(x,y):
33 271 equemene
    size=x.shape[0]
34 271 equemene
    X=np.zeros(size).astype(np.float32)
35 271 equemene
    Y=np.zeros(size).astype(np.float32)
36 271 equemene
    nj=np.multiply(2.0*np.pi/size,np.arange(size)).astype(np.float32)
37 271 equemene
    for i in numba.prange(size):
38 271 equemene
        X[i]=np.sum(np.subtract(np.multiply(np.cos(i*nj),x),np.multiply(np.sin(i*nj),y)))
39 271 equemene
        Y[i]=np.sum(np.add(np.multiply(np.sin(i*nj),x),np.multiply(np.cos(i*nj),y)))
40 271 equemene
    return(X,Y)
41 271 equemene
42 271 equemene
# OpenCL complete operation
43 271 equemene
def OpenCLDFT(a_np,b_np):
44 271 equemene
45 271 equemene
    # Context creation
46 271 equemene
    ctx = cl.create_some_context()
47 271 equemene
    # Every process is stored in a queue
48 271 equemene
    queue = cl.CommandQueue(ctx)
49 271 equemene
50 271 equemene
    TimeIn=time.time()
51 271 equemene
    # Copy from Host to Device using pointers
52 271 equemene
    mf = cl.mem_flags
53 271 equemene
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
54 271 equemene
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
55 271 equemene
    Elapsed=time.time()-TimeIn
56 271 equemene
    print("Copy from Host 2 Device : %.3f" % Elapsed)
57 271 equemene
58 271 equemene
    TimeIn=time.time()
59 271 equemene
    # Definition of kernel under OpenCL
60 271 equemene
    prg = cl.Program(ctx, """
61 271 equemene

62 271 equemene
#define PI 3.141592653589793
63 271 equemene

64 271 equemene
__kernel void MyDFT(
65 271 equemene
    __global const float *a_g, __global const float *b_g, __global float *A_g, __global float *B_g)
66 271 equemene
{
67 271 equemene
  int gid = get_global_id(0);
68 271 equemene
  uint size = get_global_size(0);
69 271 equemene
  float A=0.,B=0.;
70 271 equemene
  for (uint i=0; i<size;i++)
71 271 equemene
  {
72 271 equemene
     A+=a_g[i]*cos(2.*PI*(float)(gid*i)/(float)size)-b_g[i]*sin(2.*PI*(float)(gid*i)/(float)size);
73 271 equemene
     B+=a_g[i]*sin(2.*PI*(float)(gid*i)/(float)size)+b_g[i]*cos(2.*PI*(float)(gid*i)/(float)size);
74 271 equemene
  }
75 271 equemene
  A_g[gid]=A;
76 271 equemene
  B_g[gid]=B;
77 271 equemene
}
78 271 equemene
""").build()
79 271 equemene
    Elapsed=time.time()-TimeIn
80 271 equemene
    print("Building kernels : %.3f" % Elapsed)
81 271 equemene
82 271 equemene
    TimeIn=time.time()
83 271 equemene
    # Memory allocation on Device for result
84 271 equemene
    A_ocl = np.empty_like(a_np)
85 271 equemene
    B_ocl = np.empty_like(a_np)
86 271 equemene
    Elapsed=time.time()-TimeIn
87 271 equemene
    print("Allocation on Host for results : %.3f" % Elapsed)
88 271 equemene
89 271 equemene
    A_g = cl.Buffer(ctx, mf.WRITE_ONLY, A_ocl.nbytes)
90 271 equemene
    B_g = cl.Buffer(ctx, mf.WRITE_ONLY, B_ocl.nbytes)
91 271 equemene
    Elapsed=time.time()-TimeIn
92 271 equemene
    print("Allocation on Device for results : %.3f" % Elapsed)
93 271 equemene
94 271 equemene
    TimeIn=time.time()
95 271 equemene
    # Synthesis of function "sillysum" inside Kernel Sources
96 271 equemene
    knl = prg.MyDFT  # Use this Kernel object for repeated calls
97 271 equemene
    Elapsed=time.time()-TimeIn
98 271 equemene
    print("Synthesis of kernel : %.3f" % Elapsed)
99 271 equemene
100 271 equemene
    TimeIn=time.time()
101 271 equemene
    # Call of kernel previously defined
102 271 equemene
    CallCL=knl(queue, a_np.shape, None, a_g, b_g, A_g, B_g)
103 271 equemene
    #
104 271 equemene
    CallCL.wait()
105 271 equemene
    Elapsed=time.time()-TimeIn
106 271 equemene
    print("Execution of kernel : %.3f" % Elapsed)
107 271 equemene
108 271 equemene
    TimeIn=time.time()
109 271 equemene
    # Copy from Device to Host
110 271 equemene
    cl.enqueue_copy(queue, A_ocl, A_g)
111 271 equemene
    cl.enqueue_copy(queue, B_ocl, B_g)
112 271 equemene
    Elapsed=time.time()-TimeIn
113 271 equemene
    print("Copy from Device 2 Host : %.3f" % Elapsed)
114 271 equemene
115 271 equemene
    return(A_ocl,B_ocl)
116 271 equemene
117 271 equemene
# CUDA Silly complete operation
118 271 equemene
def CUDADFT(a_np,b_np):
119 271 equemene
    import pycuda.autoinit
120 271 equemene
    import pycuda.driver as drv
121 271 equemene
    import numpy
122 271 equemene
123 271 equemene
    from pycuda.compiler import SourceModule
124 271 equemene
    TimeIn=time.time()
125 271 equemene
    mod = SourceModule("""
126 271 equemene

127 271 equemene
#define PI 3.141592653589793
128 271 equemene

129 271 equemene
__global__ void MyDFT(float *A_g, float *B_g, const float *a_g,const float *b_g)
130 271 equemene
{
131 271 equemene
  const int gid = blockIdx.x;
132 271 equemene
  uint size = gridDim.x;
133 271 equemene
  float A=0.,B=0.;
134 271 equemene
  for (uint i=0; i<size;i++)
135 271 equemene
  {
136 271 equemene
     A+=a_g[i]*cos(2.*PI*(float)(gid*i)/(float)size)-b_g[i]*sin(2.*PI*(float)(gid*i)/(float)size);
137 271 equemene
     B+=a_g[i]*sin(2.*PI*(float)(gid*i)/(float)size)+b_g[i]*cos(2.*PI*(float)(gid*i)/(float)size);
138 271 equemene
  }
139 271 equemene
  A_g[gid]=A;
140 271 equemene
  B_g[gid]=B;
141 271 equemene
}
142 271 equemene

143 271 equemene
""")
144 271 equemene
    Elapsed=time.time()-TimeIn
145 271 equemene
    print("Definition of kernel : %.3f" % Elapsed)
146 271 equemene
147 271 equemene
    TimeIn=time.time()
148 271 equemene
    MyDFT = mod.get_function("MyDFT")
149 271 equemene
    Elapsed=time.time()-TimeIn
150 271 equemene
    print("Synthesis of kernel : %.3f" % Elapsed)
151 271 equemene
152 271 equemene
    TimeIn=time.time()
153 271 equemene
    A_np = numpy.zeros_like(a_np)
154 271 equemene
    B_np = numpy.zeros_like(a_np)
155 271 equemene
    Elapsed=time.time()-TimeIn
156 271 equemene
    print("Allocation on Host for results : %.3f" % Elapsed)
157 271 equemene
158 271 equemene
    TimeIn=time.time()
159 271 equemene
    MyDFT(drv.Out(A_np), drv.Out(B_np), drv.In(a_np), drv.In(b_np),
160 271 equemene
          block=(1,1,1), grid=(a_np.size,1))
161 271 equemene
    Elapsed=time.time()-TimeIn
162 271 equemene
    print("Execution of kernel : %.3f" % Elapsed)
163 271 equemene
    return(A_np,B_np)
164 271 equemene
165 271 equemene
import sys
166 271 equemene
import time
167 271 equemene
168 271 equemene
if __name__=='__main__':
169 271 equemene
170 271 equemene
    # Size of input vectors definition based on stdin
171 271 equemene
    import sys
172 271 equemene
    try:
173 271 equemene
        SIZE=int(sys.argv[1])
174 271 equemene
        print("Size of vectors set to %i" % SIZE)
175 271 equemene
    except:
176 271 equemene
        SIZE=256
177 271 equemene
        print("Size of vectors set to default size %i" % SIZE)
178 271 equemene
179 271 equemene
    a_np = np.ones(SIZE).astype(np.float32)
180 271 equemene
    b_np = np.ones(SIZE).astype(np.float32)
181 271 equemene
182 271 equemene
    C_np = np.zeros(SIZE).astype(np.float32)
183 271 equemene
    D_np = np.zeros(SIZE).astype(np.float32)
184 271 equemene
    C_np[0] = np.float32(SIZE)
185 271 equemene
    D_np[0] = np.float32(SIZE)
186 271 equemene
187 272 equemene
    # # Native & Naive Implementation
188 272 equemene
    # print("Performing naive implementation")
189 272 equemene
    # TimeIn=time.time()
190 272 equemene
    # c_np,d_np=MyDFT(a_np,b_np)
191 272 equemene
    # NativeElapsed=time.time()-TimeIn
192 272 equemene
    # NativeRate=int(SIZE/NativeElapsed)
193 272 equemene
    # print("NativeRate: %i" % NativeRate)
194 272 equemene
    # print("Precision: ",np.linalg.norm(c_np-C_np),np.linalg.norm(d_np-D_np))
195 271 equemene
196 271 equemene
    # Native & Numpy Implementation
197 271 equemene
    print("Performing Numpy implementation")
198 271 equemene
    TimeIn=time.time()
199 271 equemene
    e_np,f_np=NumpyDFT(a_np,b_np)
200 271 equemene
    NumpyElapsed=time.time()-TimeIn
201 271 equemene
    NumpyRate=int(SIZE/NumpyElapsed)
202 271 equemene
    print("NumpyRate: %i" % NumpyRate)
203 271 equemene
    print("Precision: ",np.linalg.norm(e_np-C_np),np.linalg.norm(f_np-D_np))
204 271 equemene
205 271 equemene
    # Native & Numba Implementation
206 271 equemene
    print("Performing Numba implementation")
207 271 equemene
    TimeIn=time.time()
208 271 equemene
    g_np,h_np=NumbaDFT(a_np,b_np)
209 271 equemene
    NumbaElapsed=time.time()-TimeIn
210 271 equemene
    NumbaRate=int(SIZE/NumbaElapsed)
211 271 equemene
    print("NumbaRate: %i" % NumbaRate)
212 271 equemene
    print("Precision: ",np.linalg.norm(g_np-C_np),np.linalg.norm(h_np-D_np))
213 271 equemene
214 271 equemene
    # OpenCL Implementation
215 273 equemene
    print("Performing OpenCL implementation")
216 271 equemene
    TimeIn=time.time()
217 271 equemene
    i_np,j_np=OpenCLDFT(a_np,b_np)
218 271 equemene
    OpenCLElapsed=time.time()-TimeIn
219 271 equemene
    OpenCLRate=int(SIZE/OpenCLElapsed)
220 271 equemene
    print("OpenCLRate: %i" % OpenCLRate)
221 271 equemene
    print("Precision: ",np.linalg.norm(i_np-C_np),np.linalg.norm(j_np-D_np))
222 271 equemene
223 271 equemene
    # CUDA Implementation
224 273 equemene
    print("Performing CUDA implementation")
225 271 equemene
    TimeIn=time.time()
226 271 equemene
    k_np,l_np=CUDADFT(a_np,b_np)
227 271 equemene
    CUDAElapsed=time.time()-TimeIn
228 271 equemene
    CUDARate=int(SIZE/CUDAElapsed)
229 271 equemene
    print("CUDARate: %i" % CUDARate)
230 271 equemene
    print("Precision: ",np.linalg.norm(k_np-C_np),np.linalg.norm(l_np-D_np))