Révision 269

ETSN/MySteps_4.py (revision 269)
1
#!/usr/bin/env python3
2

  
3
import numpy as np
4
import pyopencl as cl
5

  
6
# piling 16 arithmetical functions
7
def MySillyFunction(x):
8
    return(np.power(np.sqrt(np.log(np.exp(np.arctanh(np.tanh(np.arcsinh(np.sinh(np.arccosh(np.cosh(np.arctan(np.tan(np.arcsin(np.sin(np.arccos(np.cos(x))))))))))))))),2))
9

  
10
# Native Operation under Numpy (for prototyping & tests
11
def NativeAddition(a_np,b_np):
12
    return(a_np+b_np)
13

  
14
# Native Operation with MySillyFunction under Numpy (for prototyping & tests
15
def NativeSillyAddition(a_np,b_np):
16
    return(MySillyFunction(a_np)+MySillyFunction(b_np))
17

  
18
# CUDA complete operation
19
def CUDAAddition(a_np,b_np):
20
    import pycuda.autoinit
21
    import pycuda.driver as drv
22
    import numpy
23

  
24
    from pycuda.compiler import SourceModule
25
    mod = SourceModule("""
26
    __global__ void sum(float *dest, float *a, float *b)
27
{
28
  // const int i = threadIdx.x;
29
  const int i = blockIdx.x;
30
  dest[i] = a[i] + b[i];
31
}
32
""")
33

  
34
    sum = mod.get_function("sum")
35

  
36
    res_np = numpy.zeros_like(a_np)
37
    sum(drv.Out(res_np), drv.In(a_np), drv.In(b_np),
38
        block=(1,1,1), grid=(a_np.size,1))
39
    return(res_np)
40

  
41
# OpenCL complete operation
42
def OpenCLAddition(a_np,b_np):
43

  
44
    # Context creation
45
    ctx = cl.create_some_context()
46
    # Every process is stored in a queue
47
    queue = cl.CommandQueue(ctx)
48

  
49
    TimeIn=time.time()
50
    # Copy from Host to Device using pointers
51
    mf = cl.mem_flags
52
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
53
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
54
    Elapsed=time.time()-TimeIn
55
    print("Copy from Host 2 Device : %.3f" % Elapsed)
56

  
57
    TimeIn=time.time()
58
    # Definition of kernel under OpenCL
59
    prg = cl.Program(ctx, """
60
__kernel void sum(
61
    __global const float *a_g, __global const float *b_g, __global float *res_g)
62
{
63
  int gid = get_global_id(0);
64
  res_g[gid] = a_g[gid] + b_g[gid];
65
}
66
""").build()
67
    Elapsed=time.time()-TimeIn
68
    print("Building kernels : %.3f" % Elapsed)
69
    
70
    TimeIn=time.time()
71
    # Memory allocation on Device for result
72
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
73
    Elapsed=time.time()-TimeIn
74
    print("Allocation on Device for results : %.3f" % Elapsed)
75

  
76
    TimeIn=time.time()
77
    # Synthesis of function "sum" inside Kernel Sources
78
    knl = prg.sum  # Use this Kernel object for repeated calls
79
    Elapsed=time.time()-TimeIn
80
    print("Synthesis of kernel : %.3f" % Elapsed)
81

  
82
    TimeIn=time.time()
83
    # Call of kernel previously defined 
84
    knl(queue, a_np.shape, None, a_g, b_g, res_g)
85
    Elapsed=time.time()-TimeIn
86
    print("Execution of kernel : %.3f" % Elapsed)
87

  
88
    TimeIn=time.time()
89
    # Creation of vector for result with same size as input vectors
90
    res_np = np.empty_like(a_np)
91
    Elapsed=time.time()-TimeIn
92
    print("Allocation on Host for results: %.3f" % Elapsed)
93

  
94
    TimeIn=time.time()
95
    # Copy from Device to Host
96
    cl.enqueue_copy(queue, res_np, res_g)
97
    Elapsed=time.time()-TimeIn
98
    print("Copy from Device 2 Host : %.3f" % Elapsed)
99

  
100
    return(res_np)
101

  
102
# OpenCL complete operation
103
def OpenCLSillyAddition(a_np,b_np):
104

  
105
    # Context creation
106
    ctx = cl.create_some_context()
107
    # Every process is stored in a queue
108
    queue = cl.CommandQueue(ctx)
109

  
110
    TimeIn=time.time()
111
    # Copy from Host to Device using pointers
112
    mf = cl.mem_flags
113
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
114
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
115
    Elapsed=time.time()-TimeIn
116
    print("Copy from Host 2 Device : %.3f" % Elapsed)
117

  
118
    TimeIn=time.time()
119
    # Definition of kernel under OpenCL
120
    prg = cl.Program(ctx, """
121

  
122
float MySillyFunction(float x)
123
{
124
    return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); 
125
}
126

  
127
__kernel void sillysum(
128
    __global const float *a_g, __global const float *b_g, __global float *res_g)
129
{
130
  int gid = get_global_id(0);
131
  res_g[gid] = MySillyFunction(a_g[gid]) + MySillyFunction(b_g[gid]);
132
}
133

  
134
__kernel void sum(
135
    __global const float *a_g, __global const float *b_g, __global float *res_g)
136
{
137
  int gid = get_global_id(0);
138
  res_g[gid] = a_g[gid] + b_g[gid];
139
}
140
""").build()
141
    Elapsed=time.time()-TimeIn
142
    print("Building kernels : %.3f" % Elapsed)
143
    
144
    TimeIn=time.time()
145
    # Memory allocation on Device for result
146
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
147
    Elapsed=time.time()-TimeIn
148
    print("Allocation on Device for results : %.3f" % Elapsed)
149

  
150
    TimeIn=time.time()
151
    # Synthesis of function "sillysum" inside Kernel Sources
152
    knl = prg.sillysum  # Use this Kernel object for repeated calls
153
    Elapsed=time.time()-TimeIn
154
    print("Synthesis of kernel : %.3f" % Elapsed)
155

  
156
    TimeIn=time.time()
157
    # Call of kernel previously defined 
158
    CallCL=knl(queue, a_np.shape, None, a_g, b_g, res_g)
159
    # 
160
    CallCL.wait()
161
    Elapsed=time.time()-TimeIn
162
    print("Execution of kernel : %.3f" % Elapsed)
163

  
164
    TimeIn=time.time()
165
    # Creation of vector for result with same size as input vectors
166
    res_np = np.empty_like(a_np)
167
    Elapsed=time.time()-TimeIn
168
    print("Allocation on Host for results: %.3f" % Elapsed)
169

  
170
    TimeIn=time.time()
171
    # Copy from Device to Host
172
    cl.enqueue_copy(queue, res_np, res_g)
173
    Elapsed=time.time()-TimeIn
174
    print("Copy from Device 2 Host : %.3f" % Elapsed)
175

  
176
    return(res_np)
177

  
178
import sys
179
import time
180

  
181
if __name__=='__main__':
182

  
183
    # Size of input vectors definition based on stdin
184
    import sys
185
    try:
186
        SIZE=int(sys.argv[1])
187
        print("Size of vectors set to %i" % SIZE)
188
    except: 
189
        SIZE=50000
190
        print("Size of vectors set to default size %i" % SIZE)
191
        
192
    a_np = np.random.rand(SIZE).astype(np.float32)
193
    b_np = np.random.rand(SIZE).astype(np.float32)
194

  
195
    # Native Implementation
196
    TimeIn=time.time()
197
    # res_np=NativeSillyAddition(a_np,b_np)
198
    res_np=NativeAddition(a_np,b_np)
199
    NativeElapsed=time.time()-TimeIn
200
    NativeRate=int(SIZE/NativeElapsed)
201
    print("NativeRate: %i" % NativeRate)
202

  
203
    # OpenCL Implementation
204
    TimeIn=time.time()
205
    # res_cl=OpenCLSillyAddition(a_np,b_np)
206
    res_cl=OpenCLAddition(a_np,b_np)
207
    OpenCLElapsed=time.time()-TimeIn
208
    OpenCLRate=int(SIZE/OpenCLElapsed)
209
    print("OpenCLRate: %i" % OpenCLRate)
210

  
211
    # CUDA Implementation
212
    TimeIn=time.time()
213
    res_cuda=CUDAAddition(a_np,b_np)
214
    CUDAElapsed=time.time()-TimeIn
215
    CUDARate=int(SIZE/CUDAElapsed)
216
    print("CUDARate: %i" % CUDARate)
217
    
218
    print("OpenCLvsNative ratio: %f" % (OpenCLRate/NativeRate))
219
    print("CUDAvsNative ratio: %f" % (CUDARate/NativeRate))
220
    
221
    # Check on OpenCL with Numpy:
222
    print(res_cl - res_np)
223
    print(np.linalg.norm(res_cl - res_np))
224
    try:
225
        assert np.allclose(res_np, res_cl)
226
    except:
227
        print("Results between Native & OpenCL seem to be too different!")
228
        
229
    # Check on CUDA with Numpy:
230
    print(res_cuda - res_np)
231
    print(np.linalg.norm(res_cuda - res_np))
232
    try:
233
        assert np.allclose(res_np, res_cuda)
234
    except:
235
        print("Results between Native & CUDA seem to be too different!")
236

  
237

  
0 238

  
ETSN/MySteps_5.py (revision 269)
1
#!/usr/bin/env python3
2

  
3
import numpy as np
4
import pyopencl as cl
5

  
6
# piling 16 arithmetical functions
7
def MySillyFunction(x):
8
    return(np.power(np.sqrt(np.log(np.exp(np.arctanh(np.tanh(np.arcsinh(np.sinh(np.arccosh(np.cosh(np.arctan(np.tan(np.arcsin(np.sin(np.arccos(np.cos(x))))))))))))))),2))
9

  
10
# Native Operation under Numpy (for prototyping & tests
11
def NativeAddition(a_np,b_np):
12
    return(a_np+b_np)
13

  
14
# Native Operation with MySillyFunction under Numpy (for prototyping & tests
15
def NativeSillyAddition(a_np,b_np):
16
    return(MySillyFunction(a_np)+MySillyFunction(b_np))
17

  
18
# CUDA complete operation
19
def CUDAAddition(a_np,b_np):
20
    import pycuda.autoinit
21
    import pycuda.driver as drv
22
    import numpy
23

  
24
    from pycuda.compiler import SourceModule
25
    mod = SourceModule("""
26
    __global__ void sum(float *dest, float *a, float *b)
27
{
28
  // const int i = threadIdx.x;
29
  const int i = blockIdx.x;
30
  dest[i] = a[i] + b[i];
31
}
32
""")
33

  
34
    # sum = mod.get_function("sum")
35
    sum = mod.get_function("sum")
36

  
37
    res_np = numpy.zeros_like(a_np)
38
    sum(drv.Out(res_np), drv.In(a_np), drv.In(b_np),
39
        block=(1,1,1), grid=(a_np.size,1))
40
    return(res_np)
41

  
42
# CUDA Silly complete operation
43
def CUDASillyAddition(a_np,b_np):
44
    import pycuda.autoinit
45
    import pycuda.driver as drv
46
    import numpy
47

  
48
    from pycuda.compiler import SourceModule
49
    TimeIn=time.time()
50
    mod = SourceModule("""
51
__device__ float MySillyFunction(float x)
52
{
53
    return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); 
54
}
55

  
56
__global__ void sillysum(float *dest, float *a, float *b)
57
{
58
  const int i = blockIdx.x;
59
  dest[i] = MySillyFunction(a[i]) + MySillyFunction(b[i]);
60
}
61
""")
62
    Elapsed=time.time()-TimeIn
63
    print("Definition of kernel : %.3f" % Elapsed)
64

  
65
    TimeIn=time.time()
66
    # sum = mod.get_function("sum")
67
    sillysum = mod.get_function("sillysum")
68
    Elapsed=time.time()-TimeIn
69
    print("Synthesis of kernel : %.3f" % Elapsed)
70

  
71
    TimeIn=time.time()
72
    res_np = numpy.zeros_like(a_np)
73
    Elapsed=time.time()-TimeIn
74
    print("Allocation on Host for results : %.3f" % Elapsed)
75

  
76
    TimeIn=time.time()
77
    sillysum(drv.Out(res_np), drv.In(a_np), drv.In(b_np),
78
             block=(1,1,1), grid=(a_np.size,1))
79
    Elapsed=time.time()-TimeIn
80
    print("Execution of kernel : %.3f" % Elapsed)
81
    return(res_np)
82

  
83
# OpenCL complete operation
84
def OpenCLAddition(a_np,b_np):
85

  
86
    # Context creation
87
    ctx = cl.create_some_context()
88
    # Every process is stored in a queue
89
    queue = cl.CommandQueue(ctx)
90

  
91
    TimeIn=time.time()
92
    # Copy from Host to Device using pointers
93
    mf = cl.mem_flags
94
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
95
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
96
    Elapsed=time.time()-TimeIn
97
    print("Copy from Host 2 Device : %.3f" % Elapsed)
98

  
99
    TimeIn=time.time()
100
    # Definition of kernel under OpenCL
101
    prg = cl.Program(ctx, """
102
__kernel void sum(
103
    __global const float *a_g, __global const float *b_g, __global float *res_g)
104
{
105
  int gid = get_global_id(0);
106
  res_g[gid] = a_g[gid] + b_g[gid];
107
}
108
""").build()
109
    Elapsed=time.time()-TimeIn
110
    print("Building kernels : %.3f" % Elapsed)
111
    
112
    TimeIn=time.time()
113
    # Memory allocation on Device for result
114
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
115
    Elapsed=time.time()-TimeIn
116
    print("Allocation on Device for results : %.3f" % Elapsed)
117

  
118
    TimeIn=time.time()
119
    # Synthesis of function "sum" inside Kernel Sources
120
    knl = prg.sum  # Use this Kernel object for repeated calls
121
    Elapsed=time.time()-TimeIn
122
    print("Synthesis of kernel : %.3f" % Elapsed)
123

  
124
    TimeIn=time.time()
125
    # Call of kernel previously defined 
126
    knl(queue, a_np.shape, None, a_g, b_g, res_g)
127
    Elapsed=time.time()-TimeIn
128
    print("Execution of kernel : %.3f" % Elapsed)
129

  
130
    TimeIn=time.time()
131
    # Creation of vector for result with same size as input vectors
132
    res_np = np.empty_like(a_np)
133
    Elapsed=time.time()-TimeIn
134
    print("Allocation on Host for results: %.3f" % Elapsed)
135

  
136
    TimeIn=time.time()
137
    # Copy from Device to Host
138
    cl.enqueue_copy(queue, res_np, res_g)
139
    Elapsed=time.time()-TimeIn
140
    print("Copy from Device 2 Host : %.3f" % Elapsed)
141

  
142
    return(res_np)
143

  
144
# OpenCL complete operation
145
def OpenCLSillyAddition(a_np,b_np):
146

  
147
    # Context creation
148
    ctx = cl.create_some_context()
149
    # Every process is stored in a queue
150
    queue = cl.CommandQueue(ctx)
151

  
152
    TimeIn=time.time()
153
    # Copy from Host to Device using pointers
154
    mf = cl.mem_flags
155
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
156
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
157
    Elapsed=time.time()-TimeIn
158
    print("Copy from Host 2 Device : %.3f" % Elapsed)
159

  
160
    TimeIn=time.time()
161
    # Definition of kernel under OpenCL
162
    prg = cl.Program(ctx, """
163

  
164
float MySillyFunction(float x)
165
{
166
    return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); 
167
}
168

  
169
__kernel void sillysum(
170
    __global const float *a_g, __global const float *b_g, __global float *res_g)
171
{
172
  int gid = get_global_id(0);
173
  res_g[gid] = MySillyFunction(a_g[gid]) + MySillyFunction(b_g[gid]);
174
}
175

  
176
__kernel void sum(
177
    __global const float *a_g, __global const float *b_g, __global float *res_g)
178
{
179
  int gid = get_global_id(0);
180
  res_g[gid] = a_g[gid] + b_g[gid];
181
}
182
""").build()
183
    Elapsed=time.time()-TimeIn
184
    print("Building kernels : %.3f" % Elapsed)
185
    
186
    TimeIn=time.time()
187
    # Memory allocation on Device for result
188
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
189
    Elapsed=time.time()-TimeIn
190
    print("Allocation on Device for results : %.3f" % Elapsed)
191

  
192
    TimeIn=time.time()
193
    # Synthesis of function "sillysum" inside Kernel Sources
194
    knl = prg.sillysum  # Use this Kernel object for repeated calls
195
    Elapsed=time.time()-TimeIn
196
    print("Synthesis of kernel : %.3f" % Elapsed)
197

  
198
    TimeIn=time.time()
199
    # Call of kernel previously defined 
200
    CallCL=knl(queue, a_np.shape, None, a_g, b_g, res_g)
201
    # 
202
    CallCL.wait()
203
    Elapsed=time.time()-TimeIn
204
    print("Execution of kernel : %.3f" % Elapsed)
205

  
206
    TimeIn=time.time()
207
    # Creation of vector for result with same size as input vectors
208
    res_np = np.empty_like(a_np)
209
    Elapsed=time.time()-TimeIn
210
    print("Allocation on Host for results: %.3f" % Elapsed)
211

  
212
    TimeIn=time.time()
213
    # Copy from Device to Host
214
    cl.enqueue_copy(queue, res_np, res_g)
215
    Elapsed=time.time()-TimeIn
216
    print("Copy from Device 2 Host : %.3f" % Elapsed)
217

  
218
    return(res_np)
219

  
220
import sys
221
import time
222

  
223
if __name__=='__main__':
224

  
225
    # Size of input vectors definition based on stdin
226
    import sys
227
    try:
228
        SIZE=int(sys.argv[1])
229
        print("Size of vectors set to %i" % SIZE)
230
    except: 
231
        SIZE=50000
232
        print("Size of vectors set to default size %i" % SIZE)
233
        
234
    a_np = np.random.rand(SIZE).astype(np.float32)
235
    b_np = np.random.rand(SIZE).astype(np.float32)
236

  
237
    # Native Implementation
238
    TimeIn=time.time()
239
    # res_np=NativeAddition(a_np,b_np)
240
    res_np=NativeSillyAddition(a_np,b_np)
241
    NativeElapsed=time.time()-TimeIn
242
    NativeRate=int(SIZE/NativeElapsed)
243
    print("NativeRate: %i" % NativeRate)
244

  
245
    # OpenCL Implementation
246
    TimeIn=time.time()
247
    # res_cl=OpenCLAddition(a_np,b_np)
248
    res_cl=OpenCLSillyAddition(a_np,b_np)
249
    OpenCLElapsed=time.time()-TimeIn
250
    OpenCLRate=int(SIZE/OpenCLElapsed)
251
    print("OpenCLRate: %i" % OpenCLRate)
252

  
253
    # CUDA Implementation
254
    TimeIn=time.time()
255
    # res_cuda=CUDAAddition(a_np,b_np)
256
    res_cuda=CUDASillyAddition(a_np,b_np)
257
    CUDAElapsed=time.time()-TimeIn
258
    CUDARate=int(SIZE/CUDAElapsed)
259
    print("CUDARate: %i" % CUDARate)
260
    
261
    print("OpenCLvsNative ratio: %f" % (OpenCLRate/NativeRate))
262
    print("CUDAvsNative ratio: %f" % (CUDARate/NativeRate))
263
    
264
   # Check on OpenCL with Numpy:
265
    print(res_cl - res_np)
266
    print(np.linalg.norm(res_cl - res_np))
267
    try:
268
        assert np.allclose(res_np, res_cl)
269
    except:
270
        print("Results between Native & OpenCL seem to be too different!")
271
        
272
    # Check on CUDA with Numpy:
273
    print(res_cuda - res_np)
274
    print(np.linalg.norm(res_cuda - res_np))
275
    try:
276
        assert np.allclose(res_np, res_cuda)
277
    except:
278
        print("Results between Native & CUDA seem to be too different!")
279

  
280

  
0 281

  
ETSN/MySteps_5b.py (revision 269)
1
#!/usr/bin/env python3
2

  
3
import numpy as np
4
import pyopencl as cl
5

  
6
# piling 16 arithmetical functions
7
def MySillyFunction(x):
8
    return(np.power(np.sqrt(np.log(np.exp(np.arctanh(np.tanh(np.arcsinh(np.sinh(np.arccosh(np.cosh(np.arctan(np.tan(np.arcsin(np.sin(np.arccos(np.cos(x))))))))))))))),2))
9

  
10
# Native Operation under Numpy (for prototyping & tests
11
def NativeAddition(a_np,b_np):
12
    return(a_np+b_np)
13

  
14
# Native Operation with MySillyFunction under Numpy (for prototyping & tests
15
def NativeSillyAddition(a_np,b_np):
16
    return(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(a_np))))))))))))))))+MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(b_np)))))))))))))))))
17

  
18
# CUDA complete operation
19
def CUDAAddition(a_np,b_np):
20
    import pycuda.autoinit
21
    import pycuda.driver as drv
22
    import numpy
23

  
24
    from pycuda.compiler import SourceModule
25
    mod = SourceModule("""
26
    __global__ void sum(float *dest, float *a, float *b)
27
{
28
  // const int i = threadIdx.x;
29
  const int i = blockIdx.x;
30
  dest[i] = a[i] + b[i];
31
}
32
""")
33

  
34
    # sum = mod.get_function("sum")
35
    sum = mod.get_function("sum")
36

  
37
    res_np = numpy.zeros_like(a_np)
38
    sum(drv.Out(res_np), drv.In(a_np), drv.In(b_np),
39
        block=(1,1,1), grid=(a_np.size,1))
40
    return(res_np)
41

  
42
# CUDA Silly complete operation
43
def CUDASillyAddition(a_np,b_np):
44
    import pycuda.autoinit
45
    import pycuda.driver as drv
46
    import numpy
47

  
48
    from pycuda.compiler import SourceModule
49
    mod = SourceModule("""
50
__device__ float MySillyFunction(float x)
51
{
52
    return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); 
53
}
54

  
55
__global__ void sillysum(float *dest, float *a, float *b)
56
{
57
  const int i = blockIdx.x;
58
  dest[i] = MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(a[i])))))))))))))))) + MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(b[i]))))))))))))))));
59
}
60
""")
61

  
62
    # sum = mod.get_function("sum")
63
    sillysum = mod.get_function("sillysum")
64

  
65
    res_np = numpy.zeros_like(a_np)
66
    sillysum(drv.Out(res_np), drv.In(a_np), drv.In(b_np),
67
             block=(1,1,1), grid=(a_np.size,1))
68
    return(res_np)
69

  
70
# OpenCL complete operation
71
def OpenCLAddition(a_np,b_np):
72

  
73
    # Context creation
74
    ctx = cl.create_some_context()
75
    # Every process is stored in a queue
76
    queue = cl.CommandQueue(ctx)
77

  
78
    TimeIn=time.time()
79
    # Copy from Host to Device using pointers
80
    mf = cl.mem_flags
81
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
82
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
83
    Elapsed=time.time()-TimeIn
84
    print("Copy from Host 2 Device : %.3f" % Elapsed)
85

  
86
    TimeIn=time.time()
87
    # Definition of kernel under OpenCL
88
    prg = cl.Program(ctx, """
89
__kernel void sum(
90
    __global const float *a_g, __global const float *b_g, __global float *res_g)
91
{
92
  int gid = get_global_id(0);
93
  res_g[gid] = a_g[gid] + b_g[gid];
94
}
95
""").build()
96
    Elapsed=time.time()-TimeIn
97
    print("Building kernels : %.3f" % Elapsed)
98
    
99
    TimeIn=time.time()
100
    # Memory allocation on Device for result
101
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
102
    Elapsed=time.time()-TimeIn
103
    print("Allocation on Device for results : %.3f" % Elapsed)
104

  
105
    TimeIn=time.time()
106
    # Synthesis of function "sum" inside Kernel Sources
107
    knl = prg.sum  # Use this Kernel object for repeated calls
108
    Elapsed=time.time()-TimeIn
109
    print("Synthesis of kernel : %.3f" % Elapsed)
110

  
111
    TimeIn=time.time()
112
    # Call of kernel previously defined 
113
    knl(queue, a_np.shape, None, a_g, b_g, res_g)
114
    Elapsed=time.time()-TimeIn
115
    print("Execution of kernel : %.3f" % Elapsed)
116

  
117
    TimeIn=time.time()
118
    # Creation of vector for result with same size as input vectors
119
    res_np = np.empty_like(a_np)
120
    Elapsed=time.time()-TimeIn
121
    print("Allocation on Host for results: %.3f" % Elapsed)
122

  
123
    TimeIn=time.time()
124
    # Copy from Device to Host
125
    cl.enqueue_copy(queue, res_np, res_g)
126
    Elapsed=time.time()-TimeIn
127
    print("Copy from Device 2 Host : %.3f" % Elapsed)
128

  
129
    return(res_np)
130

  
131
# OpenCL complete operation
132
def OpenCLSillyAddition(a_np,b_np):
133

  
134
    # Context creation
135
    ctx = cl.create_some_context()
136
    # Every process is stored in a queue
137
    queue = cl.CommandQueue(ctx)
138

  
139
    TimeIn=time.time()
140
    # Copy from Host to Device using pointers
141
    mf = cl.mem_flags
142
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
143
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
144
    Elapsed=time.time()-TimeIn
145
    print("Copy from Host 2 Device : %.3f" % Elapsed)
146

  
147
    TimeIn=time.time()
148
    # Definition of kernel under OpenCL
149
    prg = cl.Program(ctx, """
150

  
151
float MySillyFunction(float x)
152
{
153
    return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); 
154
}
155

  
156
__kernel void sillysum(
157
    __global const float *a_g, __global const float *b_g, __global float *res_g)
158
{
159
  int gid = get_global_id(0);
160
  res_g[gid] = MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(a_g[gid])))))))))))))))) + MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(b_g[gid]))))))))))))))));
161
}
162

  
163
__kernel void sum(
164
    __global const float *a_g, __global const float *b_g, __global float *res_g)
165
{
166
  int gid = get_global_id(0);
167
  res_g[gid] = a_g[gid] + b_g[gid];
168
}
169
""").build()
170
    Elapsed=time.time()-TimeIn
171
    print("Building kernels : %.3f" % Elapsed)
172
    
173
    TimeIn=time.time()
174
    # Memory allocation on Device for result
175
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
176
    Elapsed=time.time()-TimeIn
177
    print("Allocation on Device for results : %.3f" % Elapsed)
178

  
179
    TimeIn=time.time()
180
    # Synthesis of function "sillysum" inside Kernel Sources
181
    knl = prg.sillysum  # Use this Kernel object for repeated calls
182
    Elapsed=time.time()-TimeIn
183
    print("Synthesis of kernel : %.3f" % Elapsed)
184

  
185
    TimeIn=time.time()
186
    # Call of kernel previously defined 
187
    CallCL=knl(queue, a_np.shape, None, a_g, b_g, res_g)
188
    # 
189
    CallCL.wait()
190
    Elapsed=time.time()-TimeIn
191
    print("Execution of kernel : %.3f" % Elapsed)
192

  
193
    TimeIn=time.time()
194
    # Creation of vector for result with same size as input vectors
195
    res_np = np.empty_like(a_np)
196
    Elapsed=time.time()-TimeIn
197
    print("Allocation on Host for results: %.3f" % Elapsed)
198

  
199
    TimeIn=time.time()
200
    # Copy from Device to Host
201
    cl.enqueue_copy(queue, res_np, res_g)
202
    Elapsed=time.time()-TimeIn
203
    print("Copy from Device 2 Host : %.3f" % Elapsed)
204

  
205
    return(res_np)
206

  
207
import sys
208
import time
209

  
210
if __name__=='__main__':
211

  
212
    # Size of input vectors definition based on stdin
213
    import sys
214
    try:
215
        SIZE=int(sys.argv[1])
216
        print("Size of vectors set to %i" % SIZE)
217
    except: 
218
        SIZE=50000
219
        print("Size of vectors set to default size %i" % SIZE)
220
        
221
    a_np = np.random.rand(SIZE).astype(np.float32)
222
    b_np = np.random.rand(SIZE).astype(np.float32)
223

  
224
    # Native Implementation
225
    TimeIn=time.time()
226
    # res_np=NativeAddition(a_np,b_np)
227
    res_np=NativeSillyAddition(a_np,b_np)
228
    NativeElapsed=time.time()-TimeIn
229
    NativeRate=int(SIZE/NativeElapsed)
230
    print("NativeRate: %i" % NativeRate)
231

  
232
    # OpenCL Implementation
233
    TimeIn=time.time()
234
    # res_cl=OpenCLAddition(a_np,b_np)
235
    res_cl=OpenCLSillyAddition(a_np,b_np)
236
    OpenCLElapsed=time.time()-TimeIn
237
    OpenCLRate=int(SIZE/OpenCLElapsed)
238
    print("OpenCLRate: %i" % OpenCLRate)
239

  
240
    # CUDA Implementation
241
    TimeIn=time.time()
242
    # res_cuda=CUDAAddition(a_np,b_np)
243
    res_cuda=CUDASillyAddition(a_np,b_np)
244
    CUDAElapsed=time.time()-TimeIn
245
    CUDARate=int(SIZE/CUDAElapsed)
246
    print("CUDARate: %i" % CUDARate)
247
    
248
    print("OpenCLvsNative ratio: %f" % (OpenCLRate/NativeRate))
249
    print("CUDAvsNative ratio: %f" % (CUDARate/NativeRate))
250
    
251
    # Check on CPU with Numpy:
252
    print(res_cl - res_np)
253
    print(np.linalg.norm(res_cl - res_np))
254
    try:
255
        assert np.allclose(res_np, res_cl)
256
    except:
257
        print("Results between Native & OpenCL seem to be too different!")
258
        
259
    # Check on CPU with Numpy:
260
    print(res_cuda - res_np)
261
    print(np.linalg.norm(res_cuda - res_np))
262
    try:
263
        assert np.allclose(res_np, res_cuda)
264
    except:
265
        print("Results between Native & CUDA seem to be too different!")
266

  
0 267

  
ETSN/MySteps_5c.py (revision 269)
1
#!/usr/bin/env python3
2

  
3
import numpy as np
4
import pyopencl as cl
5

  
6
# piling 16 arithmetical functions
7
def MySillyFunction(x):
8
    return(np.power(np.sqrt(np.log(np.exp(np.arctanh(np.tanh(np.arcsinh(np.sinh(np.arccosh(np.cosh(np.arctan(np.tan(np.arcsin(np.sin(np.arccos(np.cos(x))))))))))))))),2))
9

  
10
# Native Operation under Numpy (for prototyping & tests
11
def NativeAddition(a_np,b_np):
12
    return(a_np+b_np)
13

  
14
# Native Operation with MySillyFunction under Numpy (for prototyping & tests
15
def NativeSillyAddition(a_np,b_np):
16
    return(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(a_np))))))))))))))))+MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(b_np)))))))))))))))))
17

  
18
# CUDA complete operation
19
def CUDAAddition(a_np,b_np):
20
    import pycuda.autoinit
21
    import pycuda.driver as drv
22
    import numpy
23

  
24
    from pycuda.compiler import SourceModule
25
    mod = SourceModule("""
26
    __global__ void sum(float *dest, float *a, float *b)
27
{
28
  // const int i = threadIdx.x;
29
  const int i = blockIdx.x;
30
  dest[i] = a[i] + b[i];
31
}
32
""")
33

  
34
    # sum = mod.get_function("sum")
35
    sum = mod.get_function("sum")
36

  
37
    res_np = numpy.zeros_like(a_np)
38
    sum(drv.Out(res_np), drv.In(a_np), drv.In(b_np),
39
        block=(1,1,1), grid=(a_np.size,1))
40
    return(res_np)
41

  
42
# CUDA Silly complete operation
43
def CUDASillyAddition(a_np,b_np):
44
    import pycuda.autoinit
45
    import pycuda.driver as drv
46
    import numpy
47

  
48
    from pycuda.compiler import SourceModule
49
    mod = SourceModule("""
50
__device__ float MySillyFunction(float x)
51
{
52
    return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); 
53
}
54

  
55
__global__ void sillysum(float *dest, float *a, float *b)
56
{
57
  const int i = blockIdx.x;
58
  dest[i] = MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(a[i])))))))))))))))) + MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(b[i]))))))))))))))));
59
}
60

  
61
__global__ void hybridsillysum(float *dest, float *a, float *b)
62
{
63
  const int i = threadIdx.x+blockDim.x*blockIdx.x;
64
  dest[i] = MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(a[i])))))))))))))))) + MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(b[i]))))))))))))))));
65
}
66
""")
67

  
68
    # sum = mod.get_function("sum")
69
    # sillysum = mod.get_function("sillysum")
70
    hybridsillysum = mod.get_function("hybridsillysum")
71

  
72
    res_np = numpy.zeros_like(a_np)
73
    threads=1024
74
    blocks=int(a_np.size/threads)
75
    # sillysum(drv.Out(res_np), drv.In(a_np), drv.In(b_np),
76
    #          block=(threads,1,1), grid=(blocks,1))
77
    hybridsillysum(drv.Out(res_np), drv.In(a_np), drv.In(b_np),
78
                   block=(threads,1,1), grid=(blocks,1))
79
    return(res_np)
80

  
81
# OpenCL complete operation
82
def OpenCLAddition(a_np,b_np):
83

  
84
    # Context creation
85
    ctx = cl.create_some_context()
86
    # Every process is stored in a queue
87
    queue = cl.CommandQueue(ctx)
88

  
89
    TimeIn=time.time()
90
    # Copy from Host to Device using pointers
91
    mf = cl.mem_flags
92
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
93
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
94
    Elapsed=time.time()-TimeIn
95
    print("Copy from Host 2 Device : %.3f" % Elapsed)
96

  
97
    TimeIn=time.time()
98
    # Definition of kernel under OpenCL
99
    prg = cl.Program(ctx, """
100
__kernel void sum(
101
    __global const float *a_g, __global const float *b_g, __global float *res_g)
102
{
103
  int gid = get_global_id(0);
104
  res_g[gid] = a_g[gid] + b_g[gid];
105
}
106
""").build()
107
    Elapsed=time.time()-TimeIn
108
    print("Building kernels : %.3f" % Elapsed)
109
    
110
    TimeIn=time.time()
111
    # Memory allocation on Device for result
112
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
113
    Elapsed=time.time()-TimeIn
114
    print("Allocation on Device for results : %.3f" % Elapsed)
115

  
116
    TimeIn=time.time()
117
    # Synthesis of function "sum" inside Kernel Sources
118
    knl = prg.sum  # Use this Kernel object for repeated calls
119
    Elapsed=time.time()-TimeIn
120
    print("Synthesis of kernel : %.3f" % Elapsed)
121

  
122
    TimeIn=time.time()
123
    # Call of kernel previously defined 
124
    knl(queue, a_np.shape, None, a_g, b_g, res_g)
125
    Elapsed=time.time()-TimeIn
126
    print("Execution of kernel : %.3f" % Elapsed)
127

  
128
    TimeIn=time.time()
129
    # Creation of vector for result with same size as input vectors
130
    res_np = np.empty_like(a_np)
131
    Elapsed=time.time()-TimeIn
132
    print("Allocation on Host for results: %.3f" % Elapsed)
133

  
134
    TimeIn=time.time()
135
    # Copy from Device to Host
136
    cl.enqueue_copy(queue, res_np, res_g)
137
    Elapsed=time.time()-TimeIn
138
    print("Copy from Device 2 Host : %.3f" % Elapsed)
139

  
140
    return(res_np)
141

  
142
# OpenCL complete operation
143
def OpenCLSillyAddition(a_np,b_np):
144

  
145
    # Context creation
146
    ctx = cl.create_some_context()
147
    # Every process is stored in a queue
148
    queue = cl.CommandQueue(ctx)
149

  
150
    TimeIn=time.time()
151
    # Copy from Host to Device using pointers
152
    mf = cl.mem_flags
153
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
154
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
155
    Elapsed=time.time()-TimeIn
156
    print("Copy from Host 2 Device : %.3f" % Elapsed)
157

  
158
    TimeIn=time.time()
159
    # Definition of kernel under OpenCL
160
    prg = cl.Program(ctx, """
161

  
162
float MySillyFunction(float x)
163
{
164
    return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); 
165
}
166

  
167
__kernel void sillysum(
168
    __global const float *a_g, __global const float *b_g, __global float *res_g)
169
{
170
  int gid = get_global_id(0);
171
  res_g[gid] = MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(a_g[gid])))))))))))))))) + MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(MySillyFunction(b_g[gid]))))))))))))))));
172
}
173

  
174
__kernel void sum(
175
    __global const float *a_g, __global const float *b_g, __global float *res_g)
176
{
177
  int gid = get_global_id(0);
178
  res_g[gid] = a_g[gid] + b_g[gid];
179
}
180
""").build()
181
    Elapsed=time.time()-TimeIn
182
    print("Building kernels : %.3f" % Elapsed)
183
    
184
    TimeIn=time.time()
185
    # Memory allocation on Device for result
186
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
187
    Elapsed=time.time()-TimeIn
188
    print("Allocation on Device for results : %.3f" % Elapsed)
189

  
190
    TimeIn=time.time()
191
    # Synthesis of function "sillysum" inside Kernel Sources
192
    knl = prg.sillysum  # Use this Kernel object for repeated calls
193
    Elapsed=time.time()-TimeIn
194
    print("Synthesis of kernel : %.3f" % Elapsed)
195

  
196
    TimeIn=time.time()
197
    # Call of kernel previously defined 
198
    CallCL=knl(queue, a_np.shape, None, a_g, b_g, res_g)
199
    # 
200
    CallCL.wait()
201
    Elapsed=time.time()-TimeIn
202
    print("Execution of kernel : %.3f" % Elapsed)
203

  
204
    TimeIn=time.time()
205
    # Creation of vector for result with same size as input vectors
206
    res_np = np.empty_like(a_np)
207
    Elapsed=time.time()-TimeIn
208
    print("Allocation on Host for results: %.3f" % Elapsed)
209

  
210
    TimeIn=time.time()
211
    # Copy from Device to Host
212
    cl.enqueue_copy(queue, res_np, res_g)
213
    Elapsed=time.time()-TimeIn
214
    print("Copy from Device 2 Host : %.3f" % Elapsed)
215

  
216
    return(res_np)
217

  
218
import sys
219
import time
220

  
221
if __name__=='__main__':
222

  
223
    # Size of input vectors definition based on stdin
224
    import sys
225
    try:
226
        SIZE=int(sys.argv[1])
227
        print("Size of vectors set to %i" % SIZE)
228
    except: 
229
        SIZE=50000
230
        print("Size of vectors set to default size %i" % SIZE)
231
        
232
    a_np = np.random.rand(SIZE).astype(np.float32)
233
    b_np = np.random.rand(SIZE).astype(np.float32)
234

  
235
    # Native Implementation
236
    TimeIn=time.time()
237
    # res_np=NativeAddition(a_np,b_np)
238
    res_np=NativeSillyAddition(a_np,b_np)
239
    NativeElapsed=time.time()-TimeIn
240
    NativeRate=int(SIZE/NativeElapsed)
241
    print("NativeRate: %i" % NativeRate)
242

  
243
    # OpenCL Implementation
244
    TimeIn=time.time()
245
    # res_cl=OpenCLAddition(a_np,b_np)
246
    res_cl=OpenCLSillyAddition(a_np,b_np)
247
    OpenCLElapsed=time.time()-TimeIn
248
    OpenCLRate=int(SIZE/OpenCLElapsed)
249
    print("OpenCLRate: %i" % OpenCLRate)
250

  
251
    # CUDA Implementation
252
    TimeIn=time.time()
253
    # res_cuda=CUDAAddition(a_np,b_np)
254
    res_cuda=CUDASillyAddition(a_np,b_np)
255
    CUDAElapsed=time.time()-TimeIn
256
    CUDARate=int(SIZE/CUDAElapsed)
257
    print("CUDARate: %i" % CUDARate)
258
    
259
    print("OpenCLvsNative ratio: %f" % (OpenCLRate/NativeRate))
260
    print("CUDAvsNative ratio: %f" % (CUDARate/NativeRate))
261
    
262
    # Check on CPU with Numpy:
263
    print(res_cl - res_np)
264
    print(np.linalg.norm(res_cl - res_np))
265
    try:
266
        assert np.allclose(res_np, res_cl)
267
    except:
268
        print("Results between Native & OpenCL seem to be too different!")
269
        
270
    # Check on CPU with Numpy:
271
    print(res_cuda - res_np)
272
    print(np.linalg.norm(res_cuda - res_np))
273
    try:
274
        assert np.allclose(res_np, res_cuda)
275
    except:
276
        print("Results between Native & CUDA seem to be too different!")
277
        
0 278

  
ETSN/MySteps_3.py (revision 269)
191 191
    a_np = np.random.rand(SIZE).astype(np.float32)
192 192
    b_np = np.random.rand(SIZE).astype(np.float32)
193 193

  
194
    # Native Implementation
194 195
    TimeIn=time.time()
195 196
    # res_np=NativeSillyAddition(a_np,b_np)
196 197
    res_np=NativeAddition(a_np,b_np)
......
198 199
    NativeRate=int(SIZE/NativeElapsed)
199 200
    print("NativeRate: %i" % NativeRate)
200 201

  
202
    # OpenCL Implementation
201 203
    TimeIn=time.time()
202 204
    # res_cl=OpenCLSillyAddition(a_np,b_np)
203 205
    res_cl=OpenCLAddition(a_np,b_np)
......
205 207
    OpenCLRate=int(SIZE/OpenCLElapsed)
206 208
    print("OpenCLRate: %i" % OpenCLRate)
207 209

  
210
    # CUDA Implementation
208 211
    TimeIn=time.time()
209
    # res_cl=OpenCLSillyAddition(a_np,b_np)
210 212
    res_cuda=CUDAAddition(a_np,b_np)
211 213
    CUDAElapsed=time.time()-TimeIn
212 214
    CUDARate=int(SIZE/CUDAElapsed)

Formats disponibles : Unified diff