Statistiques
| Révision :

root / ETSN / MySteps_5.py @ 280

Historique | Voir | Annoter | Télécharger (8,37 ko)

1
#!/usr/bin/env python3
2

    
3
import numpy as np
4
import pyopencl as cl
5

    
6
# piling 16 arithmetical functions
7
def MySillyFunction(x):
8
    return(np.power(np.sqrt(np.log(np.exp(np.arctanh(np.tanh(np.arcsinh(np.sinh(np.arccosh(np.cosh(np.arctan(np.tan(np.arcsin(np.sin(np.arccos(np.cos(x))))))))))))))),2))
9

    
10
# Native Operation under Numpy (for prototyping & tests
11
def NativeAddition(a_np,b_np):
12
    return(a_np+b_np)
13

    
14
# Native Operation with MySillyFunction under Numpy (for prototyping & tests
15
def NativeSillyAddition(a_np,b_np):
16
    return(MySillyFunction(a_np)+MySillyFunction(b_np))
17

    
18
# CUDA complete operation
19
def CUDAAddition(a_np,b_np):
20
    import pycuda.autoinit
21
    import pycuda.driver as drv
22
    import numpy
23

    
24
    from pycuda.compiler import SourceModule
25
    mod = SourceModule("""
26
    __global__ void sum(float *dest, float *a, float *b)
27
{
28
  // const int i = threadIdx.x;
29
  const int i = blockIdx.x;
30
  dest[i] = a[i] + b[i];
31
}
32
""")
33

    
34
    # sum = mod.get_function("sum")
35
    sum = mod.get_function("sum")
36

    
37
    res_np = numpy.zeros_like(a_np)
38
    sum(drv.Out(res_np), drv.In(a_np), drv.In(b_np),
39
        block=(1,1,1), grid=(a_np.size,1))
40
    return(res_np)
41

    
42
# CUDA Silly complete operation
43
def CUDASillyAddition(a_np,b_np):
44
    import pycuda.autoinit
45
    import pycuda.driver as drv
46
    import numpy
47

    
48
    from pycuda.compiler import SourceModule
49
    TimeIn=time.time()
50
    mod = SourceModule("""
51
__device__ float MySillyFunction(float x)
52
{
53
    return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); 
54
}
55

56
__global__ void sillysum(float *dest, float *a, float *b)
57
{
58
  const int i = blockIdx.x;
59
  dest[i] = MySillyFunction(a[i]) + MySillyFunction(b[i]);
60
}
61
""")
62
    Elapsed=time.time()-TimeIn
63
    print("Definition of kernel : %.3f" % Elapsed)
64

    
65
    TimeIn=time.time()
66
    # sum = mod.get_function("sum")
67
    sillysum = mod.get_function("sillysum")
68
    Elapsed=time.time()-TimeIn
69
    print("Synthesis of kernel : %.3f" % Elapsed)
70

    
71
    TimeIn=time.time()
72
    res_np = numpy.zeros_like(a_np)
73
    Elapsed=time.time()-TimeIn
74
    print("Allocation on Host for results : %.3f" % Elapsed)
75

    
76
    TimeIn=time.time()
77
    sillysum(drv.Out(res_np), drv.In(a_np), drv.In(b_np),
78
             block=(1,1,1), grid=(a_np.size,1))
79
    Elapsed=time.time()-TimeIn
80
    print("Execution of kernel : %.3f" % Elapsed)
81
    return(res_np)
82

    
83
# OpenCL complete operation
84
def OpenCLAddition(a_np,b_np):
85

    
86
    # Context creation
87
    ctx = cl.create_some_context()
88
    # Every process is stored in a queue
89
    queue = cl.CommandQueue(ctx)
90

    
91
    TimeIn=time.time()
92
    # Copy from Host to Device using pointers
93
    mf = cl.mem_flags
94
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
95
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
96
    Elapsed=time.time()-TimeIn
97
    print("Copy from Host 2 Device : %.3f" % Elapsed)
98

    
99
    TimeIn=time.time()
100
    # Definition of kernel under OpenCL
101
    prg = cl.Program(ctx, """
102
__kernel void sum(
103
    __global const float *a_g, __global const float *b_g, __global float *res_g)
104
{
105
  int gid = get_global_id(0);
106
  res_g[gid] = a_g[gid] + b_g[gid];
107
}
108
""").build()
109
    Elapsed=time.time()-TimeIn
110
    print("Building kernels : %.3f" % Elapsed)
111
    
112
    TimeIn=time.time()
113
    # Memory allocation on Device for result
114
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
115
    Elapsed=time.time()-TimeIn
116
    print("Allocation on Device for results : %.3f" % Elapsed)
117

    
118
    TimeIn=time.time()
119
    # Synthesis of function "sum" inside Kernel Sources
120
    knl = prg.sum  # Use this Kernel object for repeated calls
121
    Elapsed=time.time()-TimeIn
122
    print("Synthesis of kernel : %.3f" % Elapsed)
123

    
124
    TimeIn=time.time()
125
    # Call of kernel previously defined 
126
    knl(queue, a_np.shape, None, a_g, b_g, res_g)
127
    Elapsed=time.time()-TimeIn
128
    print("Execution of kernel : %.3f" % Elapsed)
129

    
130
    TimeIn=time.time()
131
    # Creation of vector for result with same size as input vectors
132
    res_np = np.empty_like(a_np)
133
    Elapsed=time.time()-TimeIn
134
    print("Allocation on Host for results: %.3f" % Elapsed)
135

    
136
    TimeIn=time.time()
137
    # Copy from Device to Host
138
    cl.enqueue_copy(queue, res_np, res_g)
139
    Elapsed=time.time()-TimeIn
140
    print("Copy from Device 2 Host : %.3f" % Elapsed)
141

    
142
    # Liberation of memory
143
    a_g.release()
144
    b_g.release()
145
    res_g.release()
146
    
147
    return(res_np)
148

    
149
# OpenCL complete operation
150
def OpenCLSillyAddition(a_np,b_np):
151

    
152
    # Context creation
153
    ctx = cl.create_some_context()
154
    # Every process is stored in a queue
155
    queue = cl.CommandQueue(ctx)
156

    
157
    TimeIn=time.time()
158
    # Copy from Host to Device using pointers
159
    mf = cl.mem_flags
160
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
161
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
162
    Elapsed=time.time()-TimeIn
163
    print("Copy from Host 2 Device : %.3f" % Elapsed)
164

    
165
    TimeIn=time.time()
166
    # Definition of kernel under OpenCL
167
    prg = cl.Program(ctx, """
168

169
float MySillyFunction(float x)
170
{
171
    return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); 
172
}
173

174
__kernel void sillysum(
175
    __global const float *a_g, __global const float *b_g, __global float *res_g)
176
{
177
  int gid = get_global_id(0);
178
  res_g[gid] = MySillyFunction(a_g[gid]) + MySillyFunction(b_g[gid]);
179
}
180

181
__kernel void sum(
182
    __global const float *a_g, __global const float *b_g, __global float *res_g)
183
{
184
  int gid = get_global_id(0);
185
  res_g[gid] = a_g[gid] + b_g[gid];
186
}
187
""").build()
188
    Elapsed=time.time()-TimeIn
189
    print("Building kernels : %.3f" % Elapsed)
190
    
191
    TimeIn=time.time()
192
    # Memory allocation on Device for result
193
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
194
    Elapsed=time.time()-TimeIn
195
    print("Allocation on Device for results : %.3f" % Elapsed)
196

    
197
    TimeIn=time.time()
198
    # Synthesis of function "sillysum" inside Kernel Sources
199
    knl = prg.sillysum  # Use this Kernel object for repeated calls
200
    Elapsed=time.time()-TimeIn
201
    print("Synthesis of kernel : %.3f" % Elapsed)
202

    
203
    TimeIn=time.time()
204
    # Call of kernel previously defined 
205
    CallCL=knl(queue, a_np.shape, None, a_g, b_g, res_g)
206
    # 
207
    CallCL.wait()
208
    Elapsed=time.time()-TimeIn
209
    print("Execution of kernel : %.3f" % Elapsed)
210

    
211
    TimeIn=time.time()
212
    # Creation of vector for result with same size as input vectors
213
    res_np = np.empty_like(a_np)
214
    Elapsed=time.time()-TimeIn
215
    print("Allocation on Host for results: %.3f" % Elapsed)
216

    
217
    TimeIn=time.time()
218
    # Copy from Device to Host
219
    cl.enqueue_copy(queue, res_np, res_g)
220
    Elapsed=time.time()-TimeIn
221
    print("Copy from Device 2 Host : %.3f" % Elapsed)
222

    
223
    # Liberation of memory
224
    a_g.release()
225
    b_g.release()
226
    res_g.release()
227

    
228
    return(res_np)
229

    
230
import sys
231
import time
232

    
233
if __name__=='__main__':
234

    
235
    # Size of input vectors definition based on stdin
236
    import sys
237
    try:
238
        SIZE=int(sys.argv[1])
239
        print("Size of vectors set to %i" % SIZE)
240
    except: 
241
        SIZE=50000
242
        print("Size of vectors set to default size %i" % SIZE)
243
        
244
    a_np = np.random.rand(SIZE).astype(np.float32)
245
    b_np = np.random.rand(SIZE).astype(np.float32)
246

    
247
    # Native Implementation
248
    TimeIn=time.time()
249
    # res_np=NativeAddition(a_np,b_np)
250
    res_np=NativeSillyAddition(a_np,b_np)
251
    NativeElapsed=time.time()-TimeIn
252
    NativeRate=int(SIZE/NativeElapsed)
253
    print("NativeRate: %i" % NativeRate)
254

    
255
    # OpenCL Implementation
256
    TimeIn=time.time()
257
    # res_cl=OpenCLAddition(a_np,b_np)
258
    res_cl=OpenCLSillyAddition(a_np,b_np)
259
    OpenCLElapsed=time.time()-TimeIn
260
    OpenCLRate=int(SIZE/OpenCLElapsed)
261
    print("OpenCLRate: %i" % OpenCLRate)
262

    
263
    # CUDA Implementation
264
    TimeIn=time.time()
265
    # res_cuda=CUDAAddition(a_np,b_np)
266
    res_cuda=CUDASillyAddition(a_np,b_np)
267
    CUDAElapsed=time.time()-TimeIn
268
    CUDARate=int(SIZE/CUDAElapsed)
269
    print("CUDARate: %i" % CUDARate)
270
    
271
    print("OpenCLvsNative ratio: %f" % (OpenCLRate/NativeRate))
272
    print("CUDAvsNative ratio: %f" % (CUDARate/NativeRate))
273
    
274
   # Check on OpenCL with Numpy:
275
    print(res_cl - res_np)
276
    print(np.linalg.norm(res_cl - res_np))
277
    try:
278
        assert np.allclose(res_np, res_cl)
279
    except:
280
        print("Results between Native & OpenCL seem to be too different!")
281
        
282
    # Check on CUDA with Numpy:
283
    print(res_cuda - res_np)
284
    print(np.linalg.norm(res_cuda - res_np))
285
    try:
286
        assert np.allclose(res_np, res_cuda)
287
    except:
288
        print("Results between Native & CUDA seem to be too different!")
289

    
290