Statistiques
| Révision :

root / ETSN / MySteps_2.py @ 274

Historique | Voir | Annoter | Télécharger (5,71 ko)

1
#!/usr/bin/env python3
2

    
3
import numpy as np
4
import pyopencl as cl
5

    
6
# piling 16 arithmetical functions
7
def MySillyFunction(x):
8
    return(np.power(np.sqrt(np.log(np.exp(np.arctanh(np.tanh(np.arcsinh(np.sinh(np.arccosh(np.cosh(np.arctan(np.tan(np.arcsin(np.sin(np.arccos(np.cos(x))))))))))))))),2))
9

    
10
# Native Operation under Numpy (for prototyping & tests
11
def NativeAddition(a_np,b_np):
12
    return(a_np+b_np)
13

    
14
# Native Operation with MySillyFunction under Numpy (for prototyping & tests
15
def NativeSillyAddition(a_np,b_np):
16
    return(MySillyFunction(a_np)+MySillyFunction(b_np))
17

    
18
# OpenCL complete operation
19
def OpenCLAddition(a_np,b_np):
20

    
21
    # Context creation
22
    ctx = cl.create_some_context()
23
    # Every process is stored in a queue
24
    queue = cl.CommandQueue(ctx)
25

    
26
    TimeIn=time.time()
27
    # Copy from Host to Device using pointers
28
    mf = cl.mem_flags
29
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
30
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
31
    Elapsed=time.time()-TimeIn
32
    print("Copy from Host 2 Device : %.3f" % Elapsed)
33

    
34
    TimeIn=time.time()
35
    # Definition of kernel under OpenCL
36
    prg = cl.Program(ctx, """
37
__kernel void sum(
38
    __global const float *a_g, __global const float *b_g, __global float *res_g)
39
{
40
  int gid = get_global_id(0);
41
  res_g[gid] = a_g[gid] + b_g[gid];
42
}
43
""").build()
44
    Elapsed=time.time()-TimeIn
45
    print("Building kernels : %.3f" % Elapsed)
46
    
47
    TimeIn=time.time()
48
    # Memory allocation on Device for result
49
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
50
    Elapsed=time.time()-TimeIn
51
    print("Allocation on Device for results : %.3f" % Elapsed)
52

    
53
    TimeIn=time.time()
54
    # Synthesis of function "sum" inside Kernel Sources
55
    knl = prg.sum  # Use this Kernel object for repeated calls
56
    Elapsed=time.time()-TimeIn
57
    print("Synthesis of kernel : %.3f" % Elapsed)
58

    
59
    TimeIn=time.time()
60
    # Call of kernel previously defined 
61
    knl(queue, a_np.shape, None, a_g, b_g, res_g)
62
    Elapsed=time.time()-TimeIn
63
    print("Execution of kernel : %.3f" % Elapsed)
64

    
65
    TimeIn=time.time()
66
    # Creation of vector for result with same size as input vectors
67
    res_np = np.empty_like(a_np)
68
    Elapsed=time.time()-TimeIn
69
    print("Allocation on Host for results: %.3f" % Elapsed)
70

    
71
    TimeIn=time.time()
72
    # Copy from Device to Host
73
    cl.enqueue_copy(queue, res_np, res_g)
74
    Elapsed=time.time()-TimeIn
75
    print("Copy from Device 2 Host : %.3f" % Elapsed)
76

    
77
    return(res_np)
78

    
79
# OpenCL complete operation
80
def OpenCLSillyAddition(a_np,b_np):
81

    
82
    # Context creation
83
    ctx = cl.create_some_context()
84
    # Every process is stored in a queue
85
    queue = cl.CommandQueue(ctx)
86

    
87
    TimeIn=time.time()
88
    # Copy from Host to Device using pointers
89
    mf = cl.mem_flags
90
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
91
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
92
    Elapsed=time.time()-TimeIn
93
    print("Copy from Host 2 Device : %.3f" % Elapsed)
94

    
95
    TimeIn=time.time()
96
    # Definition of kernel under OpenCL
97
    prg = cl.Program(ctx, """
98

99
float MySillyFunction(float x)
100
{
101
    return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2)); 
102
}
103

104
__kernel void sillysum(
105
    __global const float *a_g, __global const float *b_g, __global float *res_g)
106
{
107
  int gid = get_global_id(0);
108
  res_g[gid] = MySillyFunction(a_g[gid]) + MySillyFunction(b_g[gid]);
109
}
110

111
__kernel void sum(
112
    __global const float *a_g, __global const float *b_g, __global float *res_g)
113
{
114
  int gid = get_global_id(0);
115
  res_g[gid] = a_g[gid] + b_g[gid];
116
}
117
""").build()
118
    Elapsed=time.time()-TimeIn
119
    print("Building kernels : %.3f" % Elapsed)
120
    
121
    TimeIn=time.time()
122
    # Memory allocation on Device for result
123
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
124
    Elapsed=time.time()-TimeIn
125
    print("Allocation on Device for results : %.3f" % Elapsed)
126

    
127
    TimeIn=time.time()
128
    # Synthesis of function "sillysum" inside Kernel Sources
129
    knl = prg.sillysum  # Use this Kernel object for repeated calls
130
    Elapsed=time.time()-TimeIn
131
    print("Synthesis of kernel : %.3f" % Elapsed)
132

    
133
    TimeIn=time.time()
134
    # Call of kernel previously defined 
135
    CallCL=knl(queue, a_np.shape, None, a_g, b_g, res_g)
136
    # 
137
    CallCL.wait()
138
    Elapsed=time.time()-TimeIn
139
    print("Execution of kernel : %.3f" % Elapsed)
140

    
141
    TimeIn=time.time()
142
    # Creation of vector for result with same size as input vectors
143
    res_np = np.empty_like(a_np)
144
    Elapsed=time.time()-TimeIn
145
    print("Allocation on Host for results: %.3f" % Elapsed)
146

    
147
    TimeIn=time.time()
148
    # Copy from Device to Host
149
    cl.enqueue_copy(queue, res_np, res_g)
150
    Elapsed=time.time()-TimeIn
151
    print("Copy from Device 2 Host : %.3f" % Elapsed)
152

    
153
    return(res_np)
154

    
155
import sys
156
import time
157

    
158
if __name__=='__main__':
159

    
160
    # Size of input vectors definition based on stdin
161
    import sys
162
    try:
163
        SIZE=int(sys.argv[1])
164
        print("Size of vectors set to %i" % SIZE)
165
    except: 
166
        SIZE=50000
167
        print("Size of vectors set to default size %i" % SIZE)
168
        
169
    a_np = np.random.rand(SIZE).astype(np.float32)
170
    b_np = np.random.rand(SIZE).astype(np.float32)
171

    
172
    TimeIn=time.time()
173
    res_np=NativeSillyAddition(a_np,b_np)
174
    NativeElapsed=time.time()-TimeIn
175
    NativeRate=int(SIZE/NativeElapsed)
176
    print("NativeRate: %i" % NativeRate)
177

    
178
    TimeIn=time.time()
179
    res_cl=OpenCLSillyAddition(a_np,b_np)
180
    OpenCLElapsed=time.time()-TimeIn
181
    OpenCLRate=int(SIZE/OpenCLElapsed)
182
    print("OpenCLRate: %i" % OpenCLRate)
183
    
184
    print("OpenCLvsNative ratio: %f" % (OpenCLRate/NativeRate))
185
    
186
    # Check on CPU with Numpy:
187
    print(res_cl - res_np)
188
    print(np.linalg.norm(res_cl - res_np))
189
    assert np.allclose(res_np, res_cl,rtol=1e-4)