Statistiques
| Révision :

root / ETSN / MySteps_2.py @ 277

Historique | Voir | Annoter | Télécharger (5,87 ko)

1
#!/usr/bin/env python3
2

    
3
import numpy as np
4
import pyopencl as cl
5

    
6
# piling 16 arithmetical functions
7
def MySillyFunction(x):
8
    return(np.power(np.sqrt(np.log(np.exp(np.arctanh(np.tanh(np.arcsinh(np.sinh(np.arccosh(np.cosh(np.arctan(np.tan(np.arcsin(np.sin(np.arccos(np.cos(x))))))))))))))),2))
9

    
10
# Native Operation under Numpy (for prototyping & tests
11
def NativeAddition(a_np,b_np):
12
    return(a_np+b_np)
13

    
14
# Native Operation with MySillyFunction under Numpy (for prototyping & tests
15
def NativeSillyAddition(a_np,b_np):
16
    return(MySillyFunction(a_np)+MySillyFunction(b_np))
17

    
18
# OpenCL complete operation
19
def OpenCLAddition(a_np,b_np):
20

    
21
    # Context creation
22
    ctx = cl.create_some_context()
23
    # Every process is stored in a queue
24
    queue = cl.CommandQueue(ctx)
25

    
26
    TimeIn=time.time()
27
    # Copy from Host to Device using pointers
28
    mf = cl.mem_flags
29
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
30
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
31
    Elapsed=time.time()-TimeIn
32
    print("Copy from Host 2 Device : %.3f" % Elapsed)
33

    
34
    TimeIn=time.time()
35
    # Definition of kernel under OpenCL
36
    prg = cl.Program(ctx, """
37
__kernel void sum(
38
    __global const float *a_g, __global const float *b_g, __global float *res_g)
39
{
40
  int gid = get_global_id(0);
41
  res_g[gid] = a_g[gid] + b_g[gid];
42
}
43
""").build()
44
    Elapsed=time.time()-TimeIn
45
    print("Building kernels : %.3f" % Elapsed)
46
    
47
    TimeIn=time.time()
48
    # Memory allocation on Device for result
49
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
50
    Elapsed=time.time()-TimeIn
51
    print("Allocation on Device for results : %.3f" % Elapsed)
52

    
53
    TimeIn=time.time()
54
    # Synthesis of function "sum" inside Kernel Sources
55
    knl = prg.sum  # Use this Kernel object for repeated calls
56
    Elapsed=time.time()-TimeIn
57
    print("Synthesis of kernel : %.3f" % Elapsed)
58

    
59
    TimeIn=time.time()
60
    # Call of kernel previously defined 
61
    knl(queue, a_np.shape, None, a_g, b_g, res_g)
62
    Elapsed=time.time()-TimeIn
63
    print("Execution of kernel : %.3f" % Elapsed)
64

    
65
    TimeIn=time.time()
66
    # Creation of vector for result with same size as input vectors
67
    res_np = np.empty_like(a_np)
68
    Elapsed=time.time()-TimeIn
69
    print("Allocation on Host for results: %.3f" % Elapsed)
70

    
71
    TimeIn=time.time()
72
    # Copy from Device to Host
73
    cl.enqueue_copy(queue, res_np, res_g)
74
    Elapsed=time.time()-TimeIn
75
    print("Copy from Device 2 Host : %.3f" % Elapsed)
76

    
77
    # Liberation of memory
78
    a_g.release()
79
    b_g.release()
80
    res_g.release()
81

    
82
    return(res_np)
83

    
84
# OpenCL complete operation
85
def OpenCLSillyAddition(a_np,b_np):
86

    
87
    # Context creation
88
    ctx = cl.create_some_context()
89
    # Every process is stored in a queue
90
    queue = cl.CommandQueue(ctx)
91

    
92
    TimeIn=time.time()
93
    # Copy from Host to Device using pointers
94
    mf = cl.mem_flags
95
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
96
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
97
    Elapsed=time.time()-TimeIn
98
    print("Copy from Host 2 Device : %.3f" % Elapsed)
99

    
100
    TimeIn=time.time()
101
    # Definition of kernel under OpenCL
102
    prg = cl.Program(ctx, """
103

104
float MySillyFunction(float x)
105
{
106
    return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); 
107
}
108

109
__kernel void sillysum(
110
    __global const float *a_g, __global const float *b_g, __global float *res_g)
111
{
112
  int gid = get_global_id(0);
113
  res_g[gid] = MySillyFunction(a_g[gid]) + MySillyFunction(b_g[gid]);
114
}
115

116
__kernel void sum(
117
    __global const float *a_g, __global const float *b_g, __global float *res_g)
118
{
119
  int gid = get_global_id(0);
120
  res_g[gid] = a_g[gid] + b_g[gid];
121
}
122
""").build()
123
    Elapsed=time.time()-TimeIn
124
    print("Building kernels : %.3f" % Elapsed)
125
    
126
    TimeIn=time.time()
127
    # Memory allocation on Device for result
128
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
129
    Elapsed=time.time()-TimeIn
130
    print("Allocation on Device for results : %.3f" % Elapsed)
131

    
132
    TimeIn=time.time()
133
    # Synthesis of function "sillysum" inside Kernel Sources
134
    knl = prg.sillysum  # Use this Kernel object for repeated calls
135
    Elapsed=time.time()-TimeIn
136
    print("Synthesis of kernel : %.3f" % Elapsed)
137

    
138
    TimeIn=time.time()
139
    # Call of kernel previously defined 
140
    CallCL=knl(queue, a_np.shape, None, a_g, b_g, res_g)
141
    # 
142
    CallCL.wait()
143
    Elapsed=time.time()-TimeIn
144
    print("Execution of kernel : %.3f" % Elapsed)
145

    
146
    TimeIn=time.time()
147
    # Creation of vector for result with same size as input vectors
148
    res_np = np.empty_like(a_np)
149
    Elapsed=time.time()-TimeIn
150
    print("Allocation on Host for results: %.3f" % Elapsed)
151

    
152
    TimeIn=time.time()
153
    # Copy from Device to Host
154
    cl.enqueue_copy(queue, res_np, res_g)
155
    Elapsed=time.time()-TimeIn
156
    print("Copy from Device 2 Host : %.3f" % Elapsed)
157

    
158
    # Liberation of memory
159
    a_g.release()
160
    b_g.release()
161
    res_g.release()
162

    
163
    return(res_np)
164

    
165
import sys
166
import time
167

    
168
if __name__=='__main__':
169

    
170
    # Size of input vectors definition based on stdin
171
    import sys
172
    try:
173
        SIZE=int(sys.argv[1])
174
        print("Size of vectors set to %i" % SIZE)
175
    except: 
176
        SIZE=50000
177
        print("Size of vectors set to default size %i" % SIZE)
178
        
179
    a_np = np.random.rand(SIZE).astype(np.float32)
180
    b_np = np.random.rand(SIZE).astype(np.float32)
181

    
182
    TimeIn=time.time()
183
    res_np=NativeSillyAddition(a_np,b_np)
184
    NativeElapsed=time.time()-TimeIn
185
    NativeRate=int(SIZE/NativeElapsed)
186
    print("NativeRate: %i" % NativeRate)
187

    
188
    TimeIn=time.time()
189
    res_cl=OpenCLSillyAddition(a_np,b_np)
190
    OpenCLElapsed=time.time()-TimeIn
191
    OpenCLRate=int(SIZE/OpenCLElapsed)
192
    print("OpenCLRate: %i" % OpenCLRate)
193
    
194
    print("OpenCLvsNative ratio: %f" % (OpenCLRate/NativeRate))
195
    
196
    # Check on CPU with Numpy:
197
    print(res_cl - res_np)
198
    print(np.linalg.norm(res_cl - res_np))
199
    assert np.allclose(res_np, res_cl,rtol=1e-4)