Révision 112 Epidevomath/vector8.py
vector8.py (revision 112) | ||
---|---|---|
1 |
# -*- coding: utf-8 -*- |
|
2 |
""" |
|
3 |
Demonstrateur OpenCL pour l'ANR Epidevomath |
|
4 |
|
|
5 |
Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> |
|
6 |
""" |
|
7 |
import getopt |
|
8 |
import sys |
|
9 |
import time |
|
1 | 10 |
import numpy as np |
2 | 11 |
import pyopencl as cl |
3 | 12 |
import pyopencl.array as cl_array |
... | ... | |
3 | 12 |
from numpy.random import randint as nprnd |
4 | 13 |
|
5 |
platformID = 0 |
|
6 |
deviceID = 1 |
|
7 |
workGroup=(1,1) |
|
8 | 14 |
|
9 |
N = 16384 |
|
10 |
MyData = np.zeros(N, dtype=cl_array.vec.float8) |
|
11 |
|
|
12 |
dev = cl.get_platforms()[platformID].get_devices()[deviceID] |
|
13 |
|
|
14 |
ctx = cl.Context([dev]) |
|
15 |
queue = cl.CommandQueue(ctx) |
|
16 |
mf = cl.mem_flags |
|
17 |
clData = cl.Buffer(ctx, mf.READ_WRITE, MyData.nbytes) |
|
18 |
|
|
19 |
MyRoutines = cl.Program(ctx, """ |
|
15 |
BlobOpenCL= """ |
|
20 | 16 |
#define znew ((z=36969*(z&65535)+(z>>16))<<16) |
21 | 17 |
#define wnew ((w=18000*(w&65535)+(w>>16))&65535) |
... | ... | |
34 | 30 |
|
35 | 31 |
#define PI 3.14159265359 |
36 | 32 |
|
37 |
#define SMALL_NUM 0.0000001 |
|
33 |
#define SMALL_NUM 0.000000001
|
|
38 | 34 |
|
39 |
__kernel void SplutterSpace(__global float8* clData,
|
|
35 |
__kernel void SplutterPoints(__global float8* clData, float box,
|
|
40 | 36 |
uint seed_z,uint seed_w) |
41 | 37 |
{ |
42 | 38 |
int gid = get_global_id(0); |
43 | 39 |
uint z=seed_z+(uint)gid; |
44 | 40 |
uint w=seed_w-(uint)gid; |
45 | 41 |
|
46 |
clData[gid].s01234567 = (float8) (MWCfp,MWCfp,MWCfp,0.,0.,0.,0.,0.);
|
|
42 |
clData[gid].s01234567 = (float8) (box*MWCfp,box*MWCfp,box*MWCfp,0.,0.,0.,0.,0.);
|
|
47 | 43 |
} |
48 | 44 |
|
49 |
__kernel void ExtendSegment(__global float8* clData, |
|
45 |
__kernel void ExtendSegment(__global float8* clData, float length,
|
|
50 | 46 |
uint seed_z,uint seed_w) |
51 | 47 |
{ |
52 | 48 |
int gid = get_global_id(0); |
... | ... | |
56 | 52 |
float theta=MWCfp*PI; |
57 | 53 |
float phi=MWCfp*PI*2.; |
58 | 54 |
float sinTheta=sin(theta); |
59 |
clData[gid].s4=clData[gid].s0+LENGTH*sinTheta*cos(phi);
|
|
60 |
clData[gid].s5=clData[gid].s1+LENGTH*sinTheta*sin(phi);
|
|
61 |
clData[gid].s6=clData[gid].s2+LENGTH*cos(theta);
|
|
55 |
clData[gid].s4=clData[gid].s0+length*sinTheta*cos(phi);
|
|
56 |
clData[gid].s5=clData[gid].s1+length*sinTheta*sin(phi);
|
|
57 |
clData[gid].s6=clData[gid].s2+length*cos(theta);
|
|
62 | 58 |
|
63 | 59 |
} |
64 | 60 |
|
... | ... | |
147 | 143 |
clDistance[ggsz*gidy+gidx]=length(dP); // return the closest distance |
148 | 144 |
} |
149 | 145 |
|
150 |
""").build()
|
|
146 |
""" |
|
151 | 147 |
|
152 |
print 'Tous au meme endroit',MyData |
|
148 |
if __name__=='__main__': |
|
149 |
|
|
150 |
# Set defaults values |
|
151 |
|
|
152 |
# Id of Device : 1 is for first find ! |
|
153 |
Device=1 |
|
154 |
# Iterations is integer |
|
155 |
Number=16384 |
|
156 |
# Size of box |
|
157 |
SizeOfBox=1000. |
|
158 |
# Size of segment |
|
159 |
LengthOfSegment=1. |
|
160 |
# Redo the last process |
|
161 |
Redo=1 |
|
153 | 162 |
|
154 |
MyRoutines.SplutterSpace(queue, (N,1), None, clData, np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32)))
|
|
163 |
HowToUse='%s -d <DeviceId> -n <NumberOfSegments> -s <SizeOfBox> -l <LengthOfSegment>'
|
|
155 | 164 |
|
156 |
cl.enqueue_copy(queue, MyData, clData) |
|
165 |
try: |
|
166 |
opts, args = getopt.getopt(sys.argv[1:],"hd:n:s:l:r:",["device=","number=","size=","length=","redo="]) |
|
167 |
except getopt.GetoptError: |
|
168 |
print HowToUse % sys.argv[0] |
|
169 |
sys.exit(2) |
|
157 | 170 |
|
158 |
print 'Tous distribues',MyData |
|
171 |
for opt, arg in opts: |
|
172 |
if opt == '-h': |
|
173 |
print HowToUse % sys.argv[0] |
|
159 | 174 |
|
160 |
MyRoutines.ExtendSegment(queue, (N,1), None, clData,np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32))) |
|
175 |
print "\nInformations about devices detected under OpenCL:" |
|
176 |
try: |
|
177 |
Id=1 |
|
178 |
for platform in cl.get_platforms(): |
|
179 |
for device in platform.get_devices(): |
|
180 |
deviceType=cl.device_type.to_string(device.type) |
|
181 |
print "Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()) |
|
182 |
Id=Id+1 |
|
183 |
sys.exit() |
|
184 |
except ImportError: |
|
185 |
print "Your platform does not seem to support OpenCL" |
|
186 |
sys.exit() |
|
161 | 187 |
|
162 |
cl.enqueue_copy(queue, MyData, clData) |
|
188 |
elif opt in ("-d", "--device"): |
|
189 |
Device=int(arg) |
|
190 |
elif opt in ("-n", "--number"): |
|
191 |
Number=int(arg) |
|
192 |
elif opt in ("-s", "--size"): |
|
193 |
SizeOfBox=np.float32(arg) |
|
194 |
elif opt in ("-l", "--length"): |
|
195 |
LengthOfSegment=np.float32(arg) |
|
196 |
elif opt in ("-r", "--redo"): |
|
197 |
Redo=int(arg) |
|
198 |
|
|
199 |
print "Device choosed : %s" % Device |
|
200 |
print "Number of segments : %s" % Number |
|
201 |
print "Size of Box : %s" % SizeOfBox |
|
202 |
print "Length of Segment % s" % LengthOfSegment |
|
203 |
print "Redo the last process % s" % Redo |
|
204 |
|
|
205 |
MyData = np.zeros(Number, dtype=cl_array.vec.float8) |
|
163 | 206 |
|
164 |
print 'Tous avec leur extremite',MyData |
|
207 |
Id=1 |
|
208 |
HasXPU=False |
|
209 |
for platform in cl.get_platforms(): |
|
210 |
for device in platform.get_devices(): |
|
211 |
if Id==Device: |
|
212 |
PlatForm=platform |
|
213 |
XPU=device |
|
214 |
print "CPU/GPU selected: ",device.name.lstrip() |
|
215 |
HasXPU=True |
|
216 |
Id+=1 |
|
165 | 217 |
|
166 |
MySize = np.zeros(len(MyData), dtype=np.float32) |
|
167 |
clSize = cl.Buffer(ctx, mf.READ_WRITE, MySize.nbytes) |
|
218 |
if HasXPU==False: |
|
219 |
print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1) |
|
220 |
sys.exit() |
|
168 | 221 |
|
169 |
MyRoutines.EstimateLength(queue, (N,1), None, clData, clSize) |
|
170 |
cl.enqueue_copy(queue, MySize, clSize) |
|
222 |
# Je cree le contexte et la queue pour son execution |
|
223 |
try: |
|
224 |
ctx = cl.Context([XPU]) |
|
225 |
queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE) |
|
226 |
except: |
|
227 |
print "Crash during context creation" |
|
228 |
|
|
171 | 229 |
|
172 |
print 'La distance de chacun avec son extremite',MySize
|
|
230 |
MyRoutines = cl.Program(ctx, BlobOpenCL).build()
|
|
173 | 231 |
|
174 |
MyDistance = np.zeros(len(MyData)*len(MyData), dtype=np.float32)
|
|
175 |
clDistance = cl.Buffer(ctx, mf.READ_WRITE, MyDistance.nbytes)
|
|
232 |
mf = cl.mem_flags
|
|
233 |
clData = cl.Buffer(ctx, mf.READ_WRITE, MyData.nbytes)
|
|
176 | 234 |
|
177 |
MyRoutines.ShortestDistance(queue, (N,N), None, clData, clDistance) |
|
178 |
cl.enqueue_copy(queue, MyDistance, clDistance) |
|
235 |
print 'Tous au meme endroit',MyData |
|
179 | 236 |
|
180 |
MyDistance=np.reshape(MyDistance,(N,N))
|
|
237 |
MyRoutines.SplutterPoints(queue,(Number,1),None,clData,np.float32(SizeOfBox-LengthOfSegment),np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32)))
|
|
181 | 238 |
|
182 |
print 'La distance de chacun',MyDistance
|
|
239 |
cl.enqueue_copy(queue, MyData, clData)
|
|
183 | 240 |
|
241 |
print 'Tous distribues',MyData |
|
242 |
|
|
243 |
MyRoutines.ExtendSegment(queue,(Number,1),None,clData,np.float32(LengthOfSegment),np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32))) |
|
244 |
|
|
245 |
cl.enqueue_copy(queue, MyData, clData) |
|
246 |
|
|
247 |
print 'Tous avec leur extremite',MyData |
|
248 |
|
|
249 |
MySize = np.zeros(len(MyData), dtype=np.float32) |
|
250 |
clSize = cl.Buffer(ctx, mf.READ_WRITE, MySize.nbytes) |
|
251 |
|
|
252 |
MyRoutines.EstimateLength(queue, (Number,1), None, clData, clSize) |
|
253 |
cl.enqueue_copy(queue, MySize, clSize) |
|
254 |
|
|
255 |
print 'La distance de chacun avec son extremite',MySize |
|
256 |
|
|
257 |
MyDistance = np.zeros(len(MyData)*len(MyData), dtype=np.float32) |
|
258 |
clDistance = cl.Buffer(ctx, mf.READ_WRITE, MyDistance.nbytes) |
|
259 |
|
|
260 |
time_start=time.time() |
|
261 |
for i in xrange(Redo): |
|
262 |
sys.stdout.write('.') |
|
263 |
CLLaunch=MyRoutines.ShortestDistance(queue, (Number,Number), None, clData, clDistance) |
|
264 |
CLLaunch.wait() |
|
265 |
print "Duration for each iteration %s" % ((time.time()-time_start)/Redo) |
|
266 |
cl.enqueue_copy(queue, MyDistance, clDistance) |
|
267 |
|
|
268 |
MyDistance=np.reshape(MyDistance,(Number,Number)) |
|
269 |
clDistance.release() |
|
270 |
|
|
271 |
print 'La distance de chacun',MyDistance |
|
272 |
|
|
273 |
clData.release() |
Formats disponibles : Unified diff