Statistiques
| Révision :

root / ETSN / MyDFT_6.py @ 275

Historique | Voir | Annoter | Télécharger (10,2 ko)

1
#!/usr/bin/env python3
2

    
3
import numpy as np
4
import pyopencl as cl
5
from numpy import pi,cos,sin
6

    
7
# Naive Discrete Fourier Transform
8
def MyDFT(x,y):
9
    size=x.shape[0]
10
    X=np.zeros(size).astype(np.float32)
11
    Y=np.zeros(size).astype(np.float32)
12
    for i in range(size):
13
        for j in range(size):
14
            X[i]=X[i]+x[j]*cos(2.*pi*i*j/size)-y[j]*sin(2.*pi*i*j/size)
15
            Y[i]=Y[i]+x[j]*sin(2.*pi*i*j/size)+y[j]*cos(2.*pi*i*j/size)
16
    return(X,Y)
17

    
18
# Numpy Discrete Fourier Transform
19
def NumpyDFT(x,y):
20
    size=x.shape[0]
21
    X=np.zeros(size).astype(np.float32)
22
    Y=np.zeros(size).astype(np.float32)
23
    nj=np.multiply(2.0*np.pi/size,np.arange(size)).astype(np.float32)
24
    for i in range(size):
25
        X[i]=np.sum(np.subtract(np.multiply(np.cos(i*nj),x),np.multiply(np.sin(i*nj),y)))
26
        Y[i]=np.sum(np.add(np.multiply(np.sin(i*nj),x),np.multiply(np.cos(i*nj),y)))
27
    return(X,Y)
28

    
29
# Numba Discrete Fourier Transform
30
import numba
31
@numba.njit(parallel=True)
32
def NumbaDFT(x,y):
33
    size=x.shape[0]
34
    X=np.zeros(size).astype(np.float32)
35
    Y=np.zeros(size).astype(np.float32)
36
    nj=np.multiply(2.0*np.pi/size,np.arange(size)).astype(np.float32)
37
    for i in numba.prange(size):
38
        X[i]=np.sum(np.subtract(np.multiply(np.cos(i*nj),x),np.multiply(np.sin(i*nj),y)))
39
        Y[i]=np.sum(np.add(np.multiply(np.sin(i*nj),x),np.multiply(np.cos(i*nj),y)))
40
    return(X,Y)
41

    
42
# OpenCL complete operation
43
def OpenCLDFT(a_np,b_np):
44

    
45
    # Context creation
46
    ctx = cl.create_some_context()
47
    # Every process is stored in a queue
48
    queue = cl.CommandQueue(ctx)
49

    
50
    TimeIn=time.time()
51
    # Copy from Host to Device using pointers
52
    mf = cl.mem_flags
53
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
54
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
55
    Elapsed=time.time()-TimeIn
56
    print("Copy from Host 2 Device : %.3f" % Elapsed)
57

    
58
    TimeIn=time.time()
59
    # Definition of kernel under OpenCL
60
    prg = cl.Program(ctx, """
61

62
#define PI 3.141592653589793
63

64
__kernel void MyDFT(
65
    __global const float *a_g, __global const float *b_g, __global float *A_g, __global float *B_g)
66
{
67
  int gid = get_global_id(0);
68
  uint size = get_global_size(0);
69
  float A=0.,B=0.;
70
  for (uint i=0; i<size;i++) 
71
  {
72
     A+=a_g[i]*cos(2.*PI*(float)(gid*i)/(float)size)-b_g[i]*sin(2.*PI*(float)(gid*i)/(float)size);
73
     B+=a_g[i]*sin(2.*PI*(float)(gid*i)/(float)size)+b_g[i]*cos(2.*PI*(float)(gid*i)/(float)size);
74
  }
75
  A_g[gid]=A;
76
  B_g[gid]=B;
77
}
78
""").build()
79
    Elapsed=time.time()-TimeIn
80
    print("Building kernels : %.3f" % Elapsed)
81
    
82
    TimeIn=time.time()
83
    # Memory allocation on Device for result
84
    A_ocl = np.empty_like(a_np)
85
    B_ocl = np.empty_like(a_np)
86
    Elapsed=time.time()-TimeIn
87
    print("Allocation on Host for results : %.3f" % Elapsed)
88

    
89
    A_g = cl.Buffer(ctx, mf.WRITE_ONLY, A_ocl.nbytes)
90
    B_g = cl.Buffer(ctx, mf.WRITE_ONLY, B_ocl.nbytes)
91
    Elapsed=time.time()-TimeIn
92
    print("Allocation on Device for results : %.3f" % Elapsed)
93

    
94
    TimeIn=time.time()
95
    # Synthesis of function "sillysum" inside Kernel Sources
96
    knl = prg.MyDFT  # Use this Kernel object for repeated calls
97
    Elapsed=time.time()-TimeIn
98
    print("Synthesis of kernel : %.3f" % Elapsed)
99

    
100
    TimeIn=time.time()
101
    # Call of kernel previously defined 
102
    CallCL=knl(queue, a_np.shape, None, a_g, b_g, A_g, B_g)
103
    # 
104
    CallCL.wait()
105
    Elapsed=time.time()-TimeIn
106
    print("Execution of kernel : %.3f" % Elapsed)
107

    
108
    TimeIn=time.time()
109
    # Copy from Device to Host
110
    cl.enqueue_copy(queue, A_ocl, A_g)
111
    cl.enqueue_copy(queue, B_ocl, B_g)
112
    Elapsed=time.time()-TimeIn
113
    print("Copy from Device 2 Host : %.3f" % Elapsed)
114

    
115
    # Liberation of memory
116
    a_g.release()
117
    b_g.release()
118
    A_g.release()
119
    B_g.release()
120
    
121
    return(A_ocl,B_ocl)
122

    
123
# CUDA Silly complete operation
124
def CUDADFT(a_np,b_np):
125
    import pycuda.autoinit
126
    import pycuda.driver as drv
127
    import numpy
128

    
129
    from pycuda.compiler import SourceModule
130
    TimeIn=time.time()
131
    mod = SourceModule("""
132

133
#define PI 3.141592653589793
134

135
__global__ void MyDFT(float *A_g, float *B_g, const float *a_g,const float *b_g)
136
{
137
  const int gid = blockIdx.x;
138
  uint size = gridDim.x;
139
  float A=0.,B=0.;
140
  for (uint i=0; i<size;i++) 
141
  {
142
     A+=a_g[i]*cos(2.*PI*(float)(gid*i)/(float)size)-b_g[i]*sin(2.*PI*(float)(gid*i)/(float)size);
143
     B+=a_g[i]*sin(2.*PI*(float)(gid*i)/(float)size)+b_g[i]*cos(2.*PI*(float)(gid*i)/(float)size);
144
  }
145
  A_g[gid]=A;
146
  B_g[gid]=B;
147
}
148

149
""")
150
    Elapsed=time.time()-TimeIn
151
    print("Definition of kernel : %.3f" % Elapsed)
152

    
153
    TimeIn=time.time()
154
    MyDFT = mod.get_function("MyDFT")
155
    Elapsed=time.time()-TimeIn
156
    print("Synthesis of kernel : %.3f" % Elapsed)
157

    
158
    TimeIn=time.time()
159
    A_np = numpy.zeros_like(a_np)
160
    B_np = numpy.zeros_like(a_np)
161
    Elapsed=time.time()-TimeIn
162
    print("Allocation on Host for results : %.3f" % Elapsed)
163

    
164
    TimeIn=time.time()
165
    MyDFT(drv.Out(A_np), drv.Out(B_np), drv.In(a_np), drv.In(b_np),
166
          block=(1,1,1), grid=(a_np.size,1))
167
    Elapsed=time.time()-TimeIn
168
    print("Execution of kernel : %.3f" % Elapsed)
169
    return(A_np,B_np)
170

    
171
import sys
172
import time
173

    
174
if __name__=='__main__':
175

    
176
    GpuStyle='OpenCL'
177
    SIZE=1024
178
    Device=0
179

    
180
    import getopt
181

    
182
    HowToUse='%s -g <CUDA/OpenCL> -s <SizeOfVector> -d <DeviceId>'
183
    
184
    try:
185
        opts, args = getopt.getopt(sys.argv[1:],"hg:s:d:",["gpustyle=","size=","device="])
186
    except getopt.GetoptError:
187
        print(HowToUse % sys.argv[0])
188
        sys.exit(2)
189

    
190
    # List of Devices
191
    Devices=[]
192
    Alu={}
193
        
194
    for opt, arg in opts:
195
        if opt == '-h':
196
            print(HowToUse % sys.argv[0])
197

    
198
            print("\nInformations about devices detected under OpenCL API:")
199
            # For PyOpenCL import
200
            try:
201
                import pyopencl as cl
202
                Id=0
203
                for platform in cl.get_platforms():
204
                    for device in platform.get_devices():
205
                        #deviceType=cl.device_type.to_string(device.type)
206
                        deviceType="xPU"
207
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
208
                        Id=Id+1
209

    
210
            except:
211
                print("Your platform does not seem to support OpenCL")
212

    
213
            print("\nInformations about devices detected under CUDA API:")
214
            # For PyCUDA import
215
            try:
216
                import pycuda.driver as cuda
217
                cuda.init()
218
                for Id in range(cuda.Device.count()):
219
                    device=cuda.Device(Id)
220
                    print("Device #%i of type GPU : %s" % (Id,device.name()))
221
                print
222
            except:
223
                print("Your platform does not seem to support CUDA")
224
        
225
            sys.exit()
226
        
227
        elif opt in ("-d", "--device"):
228
            Device=int(arg)
229
        elif opt in ("-g", "--gpustyle"):
230
            GpuStyle = arg
231
        elif opt in ("-s", "--size"):
232
            SIZE = int(arg)
233

    
234
    print("Device Selection : %i" % Device)
235
    print("GpuStyle used : %s" % GpuStyle)
236
    print("Size of complex vector : %i" % SIZE)
237

    
238
    if GpuStyle=='CUDA':
239
        try:
240
            # For PyCUDA import
241
            import pycuda.driver as cuda
242
            
243
            cuda.init()
244
            for Id in range(cuda.Device.count()):
245
                device=cuda.Device(Id)
246
                print("Device #%i of type GPU : %s" % (Id,device.name()))
247
                if Id in Devices:
248
                    Alu[Id]='GPU'
249
            
250
        except ImportError:
251
            print("Platform does not seem to support CUDA")
252

    
253
    if GpuStyle=='OpenCL':
254
        try:
255
            # For PyOpenCL import
256
            import pyopencl as cl
257
            Id=0
258
            for platform in cl.get_platforms():
259
                for device in platform.get_devices():
260
                    #deviceType=cl.device_type.to_string(device.type)
261
                    deviceType="xPU"
262
                    print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip().rstrip(),deviceType,device.name.lstrip().rstrip()))
263

    
264
                    if Id in Devices:
265
                    # Set the Alu as detected Device Type
266
                        Alu[Id]=deviceType
267
                    Id=Id+1
268
        except ImportError:
269
            print("Platform does not seem to support OpenCL")
270

    
271
    
272
        
273
    a_np = np.ones(SIZE).astype(np.float32)
274
    b_np = np.ones(SIZE).astype(np.float32)
275

    
276
    C_np = np.zeros(SIZE).astype(np.float32)
277
    D_np = np.zeros(SIZE).astype(np.float32)
278
    C_np[0] = np.float32(SIZE)
279
    D_np[0] = np.float32(SIZE)
280
    
281
    # # Native & Naive Implementation
282
    # print("Performing naive implementation")
283
    # TimeIn=time.time()
284
    # c_np,d_np=MyDFT(a_np,b_np)
285
    # NativeElapsed=time.time()-TimeIn
286
    # NativeRate=int(SIZE/NativeElapsed)
287
    # print("NativeRate: %i" % NativeRate)
288
    # print("Precision: ",np.linalg.norm(c_np-C_np),np.linalg.norm(d_np-D_np)) 
289

    
290
    # # Native & Numpy Implementation
291
    # print("Performing Numpy implementation")
292
    # TimeIn=time.time()
293
    # e_np,f_np=NumpyDFT(a_np,b_np)
294
    # NumpyElapsed=time.time()-TimeIn
295
    # NumpyRate=int(SIZE/NumpyElapsed)
296
    # print("NumpyRate: %i" % NumpyRate)
297
    # print("Precision: ",np.linalg.norm(e_np-C_np),np.linalg.norm(f_np-D_np)) 
298
        
299
    # # Native & Numba Implementation
300
    # print("Performing Numba implementation")
301
    # TimeIn=time.time()
302
    # g_np,h_np=NumbaDFT(a_np,b_np)
303
    # NumbaElapsed=time.time()-TimeIn
304
    # NumbaRate=int(SIZE/NumbaElapsed)
305
    # print("NumbaRate: %i" % NumbaRate)
306
    # print("Precision: ",np.linalg.norm(g_np-C_np),np.linalg.norm(h_np-D_np)) 
307
    
308
    # # OpenCL Implementation
309
    # print("Performing OpenCL implementation")
310
    # TimeIn=time.time()
311
    # i_np,j_np=OpenCLDFT(a_np,b_np)
312
    # OpenCLElapsed=time.time()-TimeIn
313
    # OpenCLRate=int(SIZE/OpenCLElapsed)
314
    # print("OpenCLRate: %i" % OpenCLRate)
315
    # print("Precision: ",np.linalg.norm(i_np-C_np),np.linalg.norm(j_np-D_np)) 
316
    
317
    # # CUDA Implementation
318
    # print("Performing CUDA implementation")
319
    # TimeIn=time.time()
320
    # k_np,l_np=CUDADFT(a_np,b_np)
321
    # CUDAElapsed=time.time()-TimeIn
322
    # CUDARate=int(SIZE/CUDAElapsed)
323
    # print("CUDARate: %i" % CUDARate)
324
    # print("Precision: ",np.linalg.norm(k_np-C_np),np.linalg.norm(l_np-D_np)) 
325