import numpy as np
import pyopencl as cl
import pyopencl.array as cl_array
from numpy.random import randint as nprnd

deviceID = 0
platformID = 0
workGroup=(1,1)

N = 10
MyData = np.zeros(N, dtype=cl_array.vec.float4)

dev = cl.get_platforms()[platformID].get_devices()[deviceID]

ctx = cl.Context([dev])
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
clData = cl.Buffer(ctx, mf.READ_WRITE, MyData.nbytes)


prg = cl.Program(ctx, """
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
#define MWC   (znew+wnew)
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
#define CONG  (jcong=69069*jcong+1234567)
#define KISS  ((MWC^CONG)+SHR3)

#define MWCfp MWC * 2.328306435454494e-10f
#define KISSfp KISS * 2.328306435454494e-10f
#define SHR3fp SHR3 * 2.328306435454494e-10f
#define CONGfp CONG * 2.328306435454494e-10f

__kernel void SplutterSpace(__global float4* clData, 
                               uint seed_z,uint seed_w)
{
   int gid = get_global_id(0);
   uint z=seed_z+(uint)gid;
   uint w=seed_w-(uint)gid;

  clData[gid].xyzw = (float4) (MWCfp,MWCfp,MWCfp,0.);
}


 """).build()

#prg.Pack_Cmplx(queue, (N,1), workGroup, Data_In, np.int32(N))

prg.SplutterSpace(queue, (N,1), None, clData, 
                   numpy.uint32(nprnd(2**32)),numpy.uint32(nprnd(2**32)))
cl.enqueue_copy(queue, MyData, clData)


print MyData
