Révision 268

ETSN/MySteps_0.py (revision 268)
1
#!/usr/bin/env python3
2

  
3
import numpy as np
4
import pyopencl as cl
5

  
6
# Native Operation under Numpy (for prototyping & tests
7
def NativeAddition(a_np,b_np):
8
    return(a_np+b_np)
9

  
10
# OpenCL complete operation
11
def OpenCLAddition(a_np,b_np):
12

  
13
    # Context creation
14
    ctx = cl.create_some_context()
15
    # Every process is stored in a queue
16
    queue = cl.CommandQueue(ctx)
17

  
18
    # Copy from Host to Device using pointers
19
    mf = cl.mem_flags
20
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
21
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
22

  
23
    # Definition of kernel under OpenCL
24
    prg = cl.Program(ctx, """
25
__kernel void sum(
26
    __global const float *a_g, __global const float *b_g, __global float *res_g)
27
{
28
  int gid = get_global_id(0);
29
  res_g[gid] = a_g[gid] + b_g[gid];
30
}
31
""").build()
32

  
33
    # Memory allocation on Device for result
34
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
35
    # Synthesis of function "sum" inside Kernel Sources
36
    knl = prg.sum  # Use this Kernel object for repeated calls
37
    # Call of kernel previously defined 
38
    knl(queue, a_np.shape, None, a_g, b_g, res_g)
39

  
40
    # Creation of vector for result with same size as input vectors
41
    res_np = np.empty_like(a_np)
42
    # Copy from Device to Host
43
    cl.enqueue_copy(queue, res_np, res_g)
44
    
45
    return(res_np)
46

  
47
#if __name__=='__main__':
48

  
49
a_np = np.random.rand(50000).astype(np.float32)
50
b_np = np.random.rand(50000).astype(np.float32)
51

  
52
res_np=NativeAddition(a_np,b_np)
53
res_cl=OpenCLAddition(a_np,b_np)
54

  
55
# Check on CPU with Numpy:
56
print(res_cl - res_np)
57
print(np.linalg.norm(res_cl - res_np))
58
assert np.allclose(res_np, res_cl)
0 59

  
ETSN/MySteps.py (revision 268)
1
#!/usr/bin/env python3
2

  
3
import numpy as np
4
import pyopencl as cl
5

  
6
a_np = np.random.rand(50000).astype(np.float32)
7
b_np = np.random.rand(50000).astype(np.float32)
8

  
9
ctx = cl.create_some_context()
10
queue = cl.CommandQueue(ctx)
11

  
12
mf = cl.mem_flags
13
a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
14
b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
15

  
16
prg = cl.Program(ctx, """
17
__kernel void sum(
18
    __global const float *a_g, __global const float *b_g, __global float *res_g)
19
{
20
  int gid = get_global_id(0);
21
  res_g[gid] = a_g[gid] + b_g[gid];
22
}
23
""").build()
24

  
25
res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
26
knl = prg.sum  # Use this Kernel object for repeated calls
27
knl(queue, a_np.shape, None, a_g, b_g, res_g)
28

  
29
res_np = np.empty_like(a_np)
30
cl.enqueue_copy(queue, res_np, res_g)
31

  
32
# Check on CPU with Numpy:
33
print(res_np - (a_np + b_np))
34
print(np.linalg.norm(res_np - (a_np + b_np)))
35
assert np.allclose(res_np, a_np + b_np)
0 36

  
ETSN/MySteps_1.py (revision 268)
1
#!/usr/bin/env python3
2

  
3
import numpy as np
4
import pyopencl as cl
5

  
6
# Native Operation under Numpy (for prototyping & tests
7
def NativeAddition(a_np,b_np):
8
    return(a_np+b_np)
9

  
10
# OpenCL complete operation
11
def OpenCLAddition(a_np,b_np):
12

  
13
    # Context creation
14
    ctx = cl.create_some_context()
15
    # Every process is stored in a queue
16
    queue = cl.CommandQueue(ctx)
17

  
18
    # Copy from Host to Device using pointers
19
    mf = cl.mem_flags
20
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
21
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
22

  
23
    # Definition of kernel under OpenCL
24
    prg = cl.Program(ctx, """
25
__kernel void sum(
26
    __global const float *a_g, __global const float *b_g, __global float *res_g)
27
{
28
  int gid = get_global_id(0);
29
  res_g[gid] = a_g[gid] + b_g[gid];
30
}
31
""").build()
32

  
33
    # Memory allocation on Device for result
34
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
35
    # Synthesis of function "sum" inside Kernel Sources
36
    knl = prg.sum  # Use this Kernel object for repeated calls
37
    # Call of kernel previously defined 
38
    knl(queue, a_np.shape, None, a_g, b_g, res_g)
39

  
40
    # Creation of vector for result with same size as input vectors
41
    res_np = np.empty_like(a_np)
42
    # Copy from Device to Host
43
    cl.enqueue_copy(queue, res_np, res_g)
44
    
45
    return(res_np)
46

  
47
import sys
48
import time
49

  
50
if __name__=='__main__':
51

  
52
    # Size of input vectors definition based on stdin
53
    import sys
54
    try:
55
        SIZE=int(sys.argv[1])
56
        print("Size of vectors set to %i" % SIZE)
57
    except: 
58
        SIZE=50000
59
        print("Size of vectors set to default size %i" % SIZE)
60
        
61
    a_np = np.random.rand(SIZE).astype(np.float32)
62
    b_np = np.random.rand(SIZE).astype(np.float32)
63

  
64
    TimeIn=time.time()
65
    res_np=NativeAddition(a_np,b_np)
66
    NativeElapsed=time.time()-TimeIn
67
    NativeRate=int(SIZE/NativeElapsed)
68
    print("NativeRate: %i" % NativeRate)
69

  
70
    TimeIn=time.time()
71
    res_cl=OpenCLAddition(a_np,b_np)
72
    OpenCLElapsed=time.time()-TimeIn
73
    OpenCLRate=int(SIZE/OpenCLElapsed)
74
    print("OpenCLRate: %i" % OpenCLRate)
75
    
76
    print("OpenCLvsNative ratio: %f" % (OpenCLRate/NativeRate))
77
    
78
    # Check on CPU with Numpy:
79
    print(res_cl - res_np)
80
    print(np.linalg.norm(res_cl - res_np))
81
    assert np.allclose(res_np, res_cl)
0 82

  
ETSN/MySteps_2.py (revision 268)
1
#!/usr/bin/env python3
2

  
3
import numpy as np
4
import pyopencl as cl
5

  
6
# piling 16 arithmetical functions
7
def MySillyFunction(x):
8
    return(np.power(np.sqrt(np.log(np.exp(np.arctanh(np.tanh(np.arcsinh(np.sinh(np.arccosh(np.cosh(np.arctan(np.tan(np.arcsin(np.sin(np.arccos(np.cos(x))))))))))))))),2))
9

  
10
# Native Operation under Numpy (for prototyping & tests
11
def NativeAddition(a_np,b_np):
12
    return(a_np+b_np)
13

  
14
# Native Operation with MySillyFunction under Numpy (for prototyping & tests
15
def NativeSillyAddition(a_np,b_np):
16
    return(MySillyFunction(a_np)+MySillyFunction(b_np))
17

  
18
# OpenCL complete operation
19
def OpenCLAddition(a_np,b_np):
20

  
21
    # Context creation
22
    ctx = cl.create_some_context()
23
    # Every process is stored in a queue
24
    queue = cl.CommandQueue(ctx)
25

  
26
    TimeIn=time.time()
27
    # Copy from Host to Device using pointers
28
    mf = cl.mem_flags
29
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
30
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
31
    Elapsed=time.time()-TimeIn
32
    print("Copy from Host 2 Device : %.3f" % Elapsed)
33

  
34
    TimeIn=time.time()
35
    # Definition of kernel under OpenCL
36
    prg = cl.Program(ctx, """
37
__kernel void sum(
38
    __global const float *a_g, __global const float *b_g, __global float *res_g)
39
{
40
  int gid = get_global_id(0);
41
  res_g[gid] = a_g[gid] + b_g[gid];
42
}
43
""").build()
44
    Elapsed=time.time()-TimeIn
45
    print("Building kernels : %.3f" % Elapsed)
46
    
47
    TimeIn=time.time()
48
    # Memory allocation on Device for result
49
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
50
    Elapsed=time.time()-TimeIn
51
    print("Allocation on Device for results : %.3f" % Elapsed)
52

  
53
    TimeIn=time.time()
54
    # Synthesis of function "sum" inside Kernel Sources
55
    knl = prg.sum  # Use this Kernel object for repeated calls
56
    Elapsed=time.time()-TimeIn
57
    print("Synthesis of kernel : %.3f" % Elapsed)
58

  
59
    TimeIn=time.time()
60
    # Call of kernel previously defined 
61
    knl(queue, a_np.shape, None, a_g, b_g, res_g)
62
    Elapsed=time.time()-TimeIn
63
    print("Execution of kernel : %.3f" % Elapsed)
64

  
65
    TimeIn=time.time()
66
    # Creation of vector for result with same size as input vectors
67
    res_np = np.empty_like(a_np)
68
    Elapsed=time.time()-TimeIn
69
    print("Allocation on Host for results: %.3f" % Elapsed)
70

  
71
    TimeIn=time.time()
72
    # Copy from Device to Host
73
    cl.enqueue_copy(queue, res_np, res_g)
74
    Elapsed=time.time()-TimeIn
75
    print("Copy from Device 2 Host : %.3f" % Elapsed)
76

  
77
    return(res_np)
78

  
79
# OpenCL complete operation
80
def OpenCLSillyAddition(a_np,b_np):
81

  
82
    # Context creation
83
    ctx = cl.create_some_context()
84
    # Every process is stored in a queue
85
    queue = cl.CommandQueue(ctx)
86

  
87
    TimeIn=time.time()
88
    # Copy from Host to Device using pointers
89
    mf = cl.mem_flags
90
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
91
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
92
    Elapsed=time.time()-TimeIn
93
    print("Copy from Host 2 Device : %.3f" % Elapsed)
94

  
95
    TimeIn=time.time()
96
    # Definition of kernel under OpenCL
97
    prg = cl.Program(ctx, """
98

  
99
float MySillyFunction(float x)
100
{
101
    return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); 
102
}
103

  
104
__kernel void sillysum(
105
    __global const float *a_g, __global const float *b_g, __global float *res_g)
106
{
107
  int gid = get_global_id(0);
108
  res_g[gid] = MySillyFunction(a_g[gid]) + MySillyFunction(b_g[gid]);
109
}
110

  
111
__kernel void sum(
112
    __global const float *a_g, __global const float *b_g, __global float *res_g)
113
{
114
  int gid = get_global_id(0);
115
  res_g[gid] = a_g[gid] + b_g[gid];
116
}
117
""").build()
118
    Elapsed=time.time()-TimeIn
119
    print("Building kernels : %.3f" % Elapsed)
120
    
121
    TimeIn=time.time()
122
    # Memory allocation on Device for result
123
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
124
    Elapsed=time.time()-TimeIn
125
    print("Allocation on Device for results : %.3f" % Elapsed)
126

  
127
    TimeIn=time.time()
128
    # Synthesis of function "sillysum" inside Kernel Sources
129
    knl = prg.sillysum  # Use this Kernel object for repeated calls
130
    Elapsed=time.time()-TimeIn
131
    print("Synthesis of kernel : %.3f" % Elapsed)
132

  
133
    TimeIn=time.time()
134
    # Call of kernel previously defined 
135
    CallCL=knl(queue, a_np.shape, None, a_g, b_g, res_g)
136
    # 
137
    CallCL.wait()
138
    Elapsed=time.time()-TimeIn
139
    print("Execution of kernel : %.3f" % Elapsed)
140

  
141
    TimeIn=time.time()
142
    # Creation of vector for result with same size as input vectors
143
    res_np = np.empty_like(a_np)
144
    Elapsed=time.time()-TimeIn
145
    print("Allocation on Host for results: %.3f" % Elapsed)
146

  
147
    TimeIn=time.time()
148
    # Copy from Device to Host
149
    cl.enqueue_copy(queue, res_np, res_g)
150
    Elapsed=time.time()-TimeIn
151
    print("Copy from Device 2 Host : %.3f" % Elapsed)
152

  
153
    return(res_np)
154

  
155
import sys
156
import time
157

  
158
if __name__=='__main__':
159

  
160
    # Size of input vectors definition based on stdin
161
    import sys
162
    try:
163
        SIZE=int(sys.argv[1])
164
        print("Size of vectors set to %i" % SIZE)
165
    except: 
166
        SIZE=50000
167
        print("Size of vectors set to default size %i" % SIZE)
168
        
169
    a_np = np.random.rand(SIZE).astype(np.float32)
170
    b_np = np.random.rand(SIZE).astype(np.float32)
171

  
172
    TimeIn=time.time()
173
    res_np=NativeSillyAddition(a_np,b_np)
174
    NativeElapsed=time.time()-TimeIn
175
    NativeRate=int(SIZE/NativeElapsed)
176
    print("NativeRate: %i" % NativeRate)
177

  
178
    TimeIn=time.time()
179
    res_cl=OpenCLSillyAddition(a_np,b_np)
180
    OpenCLElapsed=time.time()-TimeIn
181
    OpenCLRate=int(SIZE/OpenCLElapsed)
182
    print("OpenCLRate: %i" % OpenCLRate)
183
    
184
    print("OpenCLvsNative ratio: %f" % (OpenCLRate/NativeRate))
185
    
186
    # Check on CPU with Numpy:
187
    print(res_cl - res_np)
188
    print(np.linalg.norm(res_cl - res_np))
189
    assert np.allclose(res_np, res_cl,rtol=1e-4)
0 190

  
ETSN/MySteps_3.py (revision 268)
1
#!/usr/bin/env python3
2

  
3
import numpy as np
4
import pyopencl as cl
5

  
6
# piling 16 arithmetical functions
7
def MySillyFunction(x):
8
    return(np.power(np.sqrt(np.log(np.exp(np.arctanh(np.tanh(np.arcsinh(np.sinh(np.arccosh(np.cosh(np.arctan(np.tan(np.arcsin(np.sin(np.arccos(np.cos(x))))))))))))))),2))
9

  
10
# Native Operation under Numpy (for prototyping & tests
11
def NativeAddition(a_np,b_np):
12
    return(a_np+b_np)
13

  
14
# Native Operation with MySillyFunction under Numpy (for prototyping & tests
15
def NativeSillyAddition(a_np,b_np):
16
    return(MySillyFunction(a_np)+MySillyFunction(b_np))
17

  
18
# CUDA complete operation
19
def CUDAAddition(a_np,b_np):
20
    import pycuda.autoinit
21
    import pycuda.driver as drv
22
    import numpy
23

  
24
    from pycuda.compiler import SourceModule
25
    mod = SourceModule("""
26
    __global__ void sum(float *dest, float *a, float *b)
27
{
28
  const int i = threadIdx.x;
29
  dest[i] = a[i] + b[i];
30
}
31
""")
32

  
33
    sum = mod.get_function("sum")
34

  
35
    res_np = numpy.zeros_like(a_np)
36
    sum(drv.Out(res_np), drv.In(a_np), drv.In(b_np),
37
        block=(a_np.size,1,1), grid=(1,1))
38
    return(res_np)
39

  
40
# OpenCL complete operation
41
def OpenCLAddition(a_np,b_np):
42

  
43
    # Context creation
44
    ctx = cl.create_some_context()
45
    # Every process is stored in a queue
46
    queue = cl.CommandQueue(ctx)
47

  
48
    TimeIn=time.time()
49
    # Copy from Host to Device using pointers
50
    mf = cl.mem_flags
51
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
52
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
53
    Elapsed=time.time()-TimeIn
54
    print("Copy from Host 2 Device : %.3f" % Elapsed)
55

  
56
    TimeIn=time.time()
57
    # Definition of kernel under OpenCL
58
    prg = cl.Program(ctx, """
59
__kernel void sum(
60
    __global const float *a_g, __global const float *b_g, __global float *res_g)
61
{
62
  int gid = get_global_id(0);
63
  res_g[gid] = a_g[gid] + b_g[gid];
64
}
65
""").build()
66
    Elapsed=time.time()-TimeIn
67
    print("Building kernels : %.3f" % Elapsed)
68
    
69
    TimeIn=time.time()
70
    # Memory allocation on Device for result
71
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
72
    Elapsed=time.time()-TimeIn
73
    print("Allocation on Device for results : %.3f" % Elapsed)
74

  
75
    TimeIn=time.time()
76
    # Synthesis of function "sum" inside Kernel Sources
77
    knl = prg.sum  # Use this Kernel object for repeated calls
78
    Elapsed=time.time()-TimeIn
79
    print("Synthesis of kernel : %.3f" % Elapsed)
80

  
81
    TimeIn=time.time()
82
    # Call of kernel previously defined 
83
    knl(queue, a_np.shape, None, a_g, b_g, res_g)
84
    Elapsed=time.time()-TimeIn
85
    print("Execution of kernel : %.3f" % Elapsed)
86

  
87
    TimeIn=time.time()
88
    # Creation of vector for result with same size as input vectors
89
    res_np = np.empty_like(a_np)
90
    Elapsed=time.time()-TimeIn
91
    print("Allocation on Host for results: %.3f" % Elapsed)
92

  
93
    TimeIn=time.time()
94
    # Copy from Device to Host
95
    cl.enqueue_copy(queue, res_np, res_g)
96
    Elapsed=time.time()-TimeIn
97
    print("Copy from Device 2 Host : %.3f" % Elapsed)
98

  
99
    return(res_np)
100

  
101
# OpenCL complete operation
102
def OpenCLSillyAddition(a_np,b_np):
103

  
104
    # Context creation
105
    ctx = cl.create_some_context()
106
    # Every process is stored in a queue
107
    queue = cl.CommandQueue(ctx)
108

  
109
    TimeIn=time.time()
110
    # Copy from Host to Device using pointers
111
    mf = cl.mem_flags
112
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
113
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
114
    Elapsed=time.time()-TimeIn
115
    print("Copy from Host 2 Device : %.3f" % Elapsed)
116

  
117
    TimeIn=time.time()
118
    # Definition of kernel under OpenCL
119
    prg = cl.Program(ctx, """
120

  
121
float MySillyFunction(float x)
122
{
123
    return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); 
124
}
125

  
126
__kernel void sillysum(
127
    __global const float *a_g, __global const float *b_g, __global float *res_g)
128
{
129
  int gid = get_global_id(0);
130
  res_g[gid] = MySillyFunction(a_g[gid]) + MySillyFunction(b_g[gid]);
131
}
132

  
133
__kernel void sum(
134
    __global const float *a_g, __global const float *b_g, __global float *res_g)
135
{
136
  int gid = get_global_id(0);
137
  res_g[gid] = a_g[gid] + b_g[gid];
138
}
139
""").build()
140
    Elapsed=time.time()-TimeIn
141
    print("Building kernels : %.3f" % Elapsed)
142
    
143
    TimeIn=time.time()
144
    # Memory allocation on Device for result
145
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
146
    Elapsed=time.time()-TimeIn
147
    print("Allocation on Device for results : %.3f" % Elapsed)
148

  
149
    TimeIn=time.time()
150
    # Synthesis of function "sillysum" inside Kernel Sources
151
    knl = prg.sillysum  # Use this Kernel object for repeated calls
152
    Elapsed=time.time()-TimeIn
153
    print("Synthesis of kernel : %.3f" % Elapsed)
154

  
155
    TimeIn=time.time()
156
    # Call of kernel previously defined 
157
    CallCL=knl(queue, a_np.shape, None, a_g, b_g, res_g)
158
    # 
159
    CallCL.wait()
160
    Elapsed=time.time()-TimeIn
161
    print("Execution of kernel : %.3f" % Elapsed)
162

  
163
    TimeIn=time.time()
164
    # Creation of vector for result with same size as input vectors
165
    res_np = np.empty_like(a_np)
166
    Elapsed=time.time()-TimeIn
167
    print("Allocation on Host for results: %.3f" % Elapsed)
168

  
169
    TimeIn=time.time()
170
    # Copy from Device to Host
171
    cl.enqueue_copy(queue, res_np, res_g)
172
    Elapsed=time.time()-TimeIn
173
    print("Copy from Device 2 Host : %.3f" % Elapsed)
174

  
175
    return(res_np)
176

  
177
import sys
178
import time
179

  
180
if __name__=='__main__':
181

  
182
    # Size of input vectors definition based on stdin
183
    import sys
184
    try:
185
        SIZE=int(sys.argv[1])
186
        print("Size of vectors set to %i" % SIZE)
187
    except: 
188
        SIZE=50000
189
        print("Size of vectors set to default size %i" % SIZE)
190
        
191
    a_np = np.random.rand(SIZE).astype(np.float32)
192
    b_np = np.random.rand(SIZE).astype(np.float32)
193

  
194
    TimeIn=time.time()
195
    # res_np=NativeSillyAddition(a_np,b_np)
196
    res_np=NativeAddition(a_np,b_np)
197
    NativeElapsed=time.time()-TimeIn
198
    NativeRate=int(SIZE/NativeElapsed)
199
    print("NativeRate: %i" % NativeRate)
200

  
201
    TimeIn=time.time()
202
    # res_cl=OpenCLSillyAddition(a_np,b_np)
203
    res_cl=OpenCLAddition(a_np,b_np)
204
    OpenCLElapsed=time.time()-TimeIn
205
    OpenCLRate=int(SIZE/OpenCLElapsed)
206
    print("OpenCLRate: %i" % OpenCLRate)
207

  
208
    TimeIn=time.time()
209
    # res_cl=OpenCLSillyAddition(a_np,b_np)
210
    res_cuda=CUDAAddition(a_np,b_np)
211
    CUDAElapsed=time.time()-TimeIn
212
    CUDARate=int(SIZE/CUDAElapsed)
213
    print("CUDARate: %i" % CUDARate)
214
    
215
    print("OpenCLvsNative ratio: %f" % (OpenCLRate/NativeRate))
216
    print("CUDAvsNative ratio: %f" % (CUDARate/NativeRate))
217
    
218
    # Check on CPU with Numpy:
219
    print(res_cl - res_np)
220
    print(np.linalg.norm(res_cl - res_np))
221
    assert np.allclose(res_np, res_cl)
222

  
223
    # Check on CPU with Numpy:
224
    print(res_cuda - res_np)
225
    print(np.linalg.norm(res_cuda - res_np))
226
    assert np.allclose(res_np, res_cuda)
0 227

  

Formats disponibles : Unified diff