Statistiques
| Révision :

root / ETSN / MySteps_0.py @ 275

Historique | Voir | Annoter | Télécharger (1,7 ko)

1 268 equemene
#!/usr/bin/env python3
2 268 equemene
3 268 equemene
import numpy as np
4 268 equemene
import pyopencl as cl
5 268 equemene
6 268 equemene
# Native Operation under Numpy (for prototyping & tests
7 268 equemene
def NativeAddition(a_np,b_np):
8 268 equemene
    return(a_np+b_np)
9 268 equemene
10 268 equemene
# OpenCL complete operation
11 268 equemene
def OpenCLAddition(a_np,b_np):
12 268 equemene
13 268 equemene
    # Context creation
14 268 equemene
    ctx = cl.create_some_context()
15 268 equemene
    # Every process is stored in a queue
16 268 equemene
    queue = cl.CommandQueue(ctx)
17 268 equemene
18 268 equemene
    # Copy from Host to Device using pointers
19 268 equemene
    mf = cl.mem_flags
20 268 equemene
    a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
21 268 equemene
    b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
22 268 equemene
23 268 equemene
    # Definition of kernel under OpenCL
24 268 equemene
    prg = cl.Program(ctx, """
25 268 equemene
__kernel void sum(
26 268 equemene
    __global const float *a_g, __global const float *b_g, __global float *res_g)
27 268 equemene
{
28 268 equemene
  int gid = get_global_id(0);
29 268 equemene
  res_g[gid] = a_g[gid] + b_g[gid];
30 268 equemene
}
31 268 equemene
""").build()
32 268 equemene
33 268 equemene
    # Memory allocation on Device for result
34 268 equemene
    res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
35 268 equemene
    # Synthesis of function "sum" inside Kernel Sources
36 268 equemene
    knl = prg.sum  # Use this Kernel object for repeated calls
37 268 equemene
    # Call of kernel previously defined
38 268 equemene
    knl(queue, a_np.shape, None, a_g, b_g, res_g)
39 268 equemene
40 268 equemene
    # Creation of vector for result with same size as input vectors
41 268 equemene
    res_np = np.empty_like(a_np)
42 268 equemene
    # Copy from Device to Host
43 268 equemene
    cl.enqueue_copy(queue, res_np, res_g)
44 275 equemene
45 275 equemene
    # Liberation of memory
46 275 equemene
    a_g.release()
47 275 equemene
    b_g.release()
48 275 equemene
    res_g.release()
49 268 equemene
50 268 equemene
    return(res_np)
51 268 equemene
52 268 equemene
#if __name__=='__main__':
53 268 equemene
54 268 equemene
a_np = np.random.rand(50000).astype(np.float32)
55 268 equemene
b_np = np.random.rand(50000).astype(np.float32)
56 268 equemene
57 268 equemene
res_np=NativeAddition(a_np,b_np)
58 268 equemene
res_cl=OpenCLAddition(a_np,b_np)
59 268 equemene
60 268 equemene
# Check on CPU with Numpy:
61 268 equemene
print(res_cl - res_np)
62 268 equemene
print(np.linalg.norm(res_cl - res_np))
63 268 equemene
assert np.allclose(res_np, res_cl)