root / Epidevomath / vector8.py @ 112
Historique | Voir | Annoter | Télécharger (8,18 ko)
1 |
# -*- coding: utf-8 -*-
|
---|---|
2 |
"""
|
3 |
Demonstrateur OpenCL pour l'ANR Epidevomath
|
4 |
|
5 |
Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr>
|
6 |
"""
|
7 |
import getopt |
8 |
import sys |
9 |
import time |
10 |
import numpy as np |
11 |
import pyopencl as cl |
12 |
import pyopencl.array as cl_array |
13 |
from numpy.random import randint as nprnd |
14 |
|
15 |
|
16 |
BlobOpenCL= """
|
17 |
#define znew ((z=36969*(z&65535)+(z>>16))<<16)
|
18 |
#define wnew ((w=18000*(w&65535)+(w>>16))&65535)
|
19 |
#define MWC (znew+wnew)
|
20 |
#define SHR3 (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
|
21 |
#define CONG (jcong=69069*jcong+1234567)
|
22 |
#define KISS ((MWC^CONG)+SHR3)
|
23 |
|
24 |
#define MWCfp MWC * 2.328306435454494e-10f
|
25 |
#define KISSfp KISS * 2.328306435454494e-10f
|
26 |
#define SHR3fp SHR3 * 2.328306435454494e-10f
|
27 |
#define CONGfp CONG * 2.328306435454494e-10f
|
28 |
|
29 |
#define LENGTH 1.
|
30 |
|
31 |
#define PI 3.14159265359
|
32 |
|
33 |
#define SMALL_NUM 0.000000001
|
34 |
|
35 |
__kernel void SplutterPoints(__global float8* clData, float box,
|
36 |
uint seed_z,uint seed_w)
|
37 |
{
|
38 |
int gid = get_global_id(0);
|
39 |
uint z=seed_z+(uint)gid;
|
40 |
uint w=seed_w-(uint)gid;
|
41 |
|
42 |
clData[gid].s01234567 = (float8) (box*MWCfp,box*MWCfp,box*MWCfp,0.,0.,0.,0.,0.);
|
43 |
}
|
44 |
|
45 |
__kernel void ExtendSegment(__global float8* clData, float length,
|
46 |
uint seed_z,uint seed_w)
|
47 |
{
|
48 |
int gid = get_global_id(0);
|
49 |
uint z=seed_z+(uint)gid;
|
50 |
uint w=seed_w-(uint)gid;
|
51 |
|
52 |
float theta=MWCfp*PI;
|
53 |
float phi=MWCfp*PI*2.;
|
54 |
float sinTheta=sin(theta);
|
55 |
clData[gid].s4=clData[gid].s0+length*sinTheta*cos(phi);
|
56 |
clData[gid].s5=clData[gid].s1+length*sinTheta*sin(phi);
|
57 |
clData[gid].s6=clData[gid].s2+length*cos(theta);
|
58 |
|
59 |
}
|
60 |
|
61 |
__kernel void EstimateLength(__global float8* clData,__global float* clSize)
|
62 |
{
|
63 |
int gid = get_global_id(0);
|
64 |
|
65 |
clSize[gid]=distance(clData[gid].lo,clData[gid].hi);
|
66 |
}
|
67 |
|
68 |
// Get from http://geomalgorithms.com/a07-_distance.html
|
69 |
|
70 |
__kernel void ShortestDistance(__global float8* clData,__global float* clDistance)
|
71 |
{
|
72 |
int gidx = get_global_id(0);
|
73 |
int ggsz = get_global_size(0);
|
74 |
int gidy = get_global_id(1);
|
75 |
|
76 |
float4 u = clData[gidx].hi - clData[gidx].lo;
|
77 |
float4 v = clData[gidy].hi - clData[gidy].lo;
|
78 |
float4 w = clData[gidx].lo - clData[gidy].lo;
|
79 |
|
80 |
float a = dot(u,u); // always >= 0
|
81 |
float b = dot(u,v);
|
82 |
float c = dot(v,v); // always >= 0
|
83 |
float d = dot(u,w);
|
84 |
float e = dot(v,w);
|
85 |
|
86 |
float D = a*c - b*b; // always >= 0
|
87 |
float sc, sN, sD = D; // sc = sN / sD, default sD = D >= 0
|
88 |
float tc, tN, tD = D; // tc = tN / tD, default tD = D >= 0
|
89 |
|
90 |
// compute the line parameters of the two closest points
|
91 |
if (D < SMALL_NUM) { // the lines are almost parallel
|
92 |
sN = 0.0; // force using point P0 on segment S1
|
93 |
sD = 1.0; // to prevent possible division by 0.0 later
|
94 |
tN = e;
|
95 |
tD = c;
|
96 |
}
|
97 |
else { // get the closest points on the infinite lines
|
98 |
sN = (b*e - c*d);
|
99 |
tN = (a*e - b*d);
|
100 |
if (sN < 0.0) { // sc < 0 => the s=0 edge is visible
|
101 |
sN = 0.0;
|
102 |
tN = e;
|
103 |
tD = c;
|
104 |
}
|
105 |
else if (sN > sD) { // sc > 1 => the s=1 edge is visible
|
106 |
sN = sD;
|
107 |
tN = e + b;
|
108 |
tD = c;
|
109 |
}
|
110 |
}
|
111 |
|
112 |
if (tN < 0.0) { // tc < 0 => the t=0 edge is visible
|
113 |
tN = 0.0;
|
114 |
// recompute sc for this edge
|
115 |
if (-d < 0.0)
|
116 |
sN = 0.0;
|
117 |
else if (-d > a)
|
118 |
sN = sD;
|
119 |
else {
|
120 |
sN = -d;
|
121 |
sD = a;
|
122 |
}
|
123 |
}
|
124 |
else if (tN > tD) { // tc > 1 => the t=1 edge is visible
|
125 |
tN = tD;
|
126 |
// recompute sc for this edge
|
127 |
if ((-d + b) < 0.0)
|
128 |
sN = 0;
|
129 |
else if ((-d + b) > a)
|
130 |
sN = sD;
|
131 |
else {
|
132 |
sN = (-d + b);
|
133 |
sD = a;
|
134 |
}
|
135 |
}
|
136 |
// finally do the division to get sc and tc
|
137 |
sc = (fabs(sN) < SMALL_NUM ? 0.0 : sN / sD);
|
138 |
tc = (fabs(tN) < SMALL_NUM ? 0.0 : tN / tD);
|
139 |
|
140 |
// get the difference of the two closest points
|
141 |
float4 dP = w + (sc * u) - (tc * v); // = S1(sc) - S2(tc)
|
142 |
|
143 |
clDistance[ggsz*gidy+gidx]=length(dP); // return the closest distance
|
144 |
}
|
145 |
|
146 |
"""
|
147 |
|
148 |
if __name__=='__main__': |
149 |
|
150 |
# Set defaults values
|
151 |
|
152 |
# Id of Device : 1 is for first find !
|
153 |
Device=1
|
154 |
# Iterations is integer
|
155 |
Number=16384
|
156 |
# Size of box
|
157 |
SizeOfBox=1000.
|
158 |
# Size of segment
|
159 |
LengthOfSegment=1.
|
160 |
# Redo the last process
|
161 |
Redo=1
|
162 |
|
163 |
HowToUse='%s -d <DeviceId> -n <NumberOfSegments> -s <SizeOfBox> -l <LengthOfSegment>'
|
164 |
|
165 |
try:
|
166 |
opts, args = getopt.getopt(sys.argv[1:],"hd:n:s:l:r:",["device=","number=","size=","length=","redo="]) |
167 |
except getopt.GetoptError:
|
168 |
print HowToUse % sys.argv[0] |
169 |
sys.exit(2)
|
170 |
|
171 |
for opt, arg in opts: |
172 |
if opt == '-h': |
173 |
print HowToUse % sys.argv[0] |
174 |
|
175 |
print "\nInformations about devices detected under OpenCL:" |
176 |
try:
|
177 |
Id=1
|
178 |
for platform in cl.get_platforms(): |
179 |
for device in platform.get_devices(): |
180 |
deviceType=cl.device_type.to_string(device.type) |
181 |
print "Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()) |
182 |
Id=Id+1
|
183 |
sys.exit() |
184 |
except ImportError: |
185 |
print "Your platform does not seem to support OpenCL" |
186 |
sys.exit() |
187 |
|
188 |
elif opt in ("-d", "--device"): |
189 |
Device=int(arg)
|
190 |
elif opt in ("-n", "--number"): |
191 |
Number=int(arg)
|
192 |
elif opt in ("-s", "--size"): |
193 |
SizeOfBox=np.float32(arg) |
194 |
elif opt in ("-l", "--length"): |
195 |
LengthOfSegment=np.float32(arg) |
196 |
elif opt in ("-r", "--redo"): |
197 |
Redo=int(arg)
|
198 |
|
199 |
print "Device choosed : %s" % Device |
200 |
print "Number of segments : %s" % Number |
201 |
print "Size of Box : %s" % SizeOfBox |
202 |
print "Length of Segment % s" % LengthOfSegment |
203 |
print "Redo the last process % s" % Redo |
204 |
|
205 |
MyData = np.zeros(Number, dtype=cl_array.vec.float8) |
206 |
|
207 |
Id=1
|
208 |
HasXPU=False
|
209 |
for platform in cl.get_platforms(): |
210 |
for device in platform.get_devices(): |
211 |
if Id==Device:
|
212 |
PlatForm=platform |
213 |
XPU=device |
214 |
print "CPU/GPU selected: ",device.name.lstrip() |
215 |
HasXPU=True
|
216 |
Id+=1
|
217 |
|
218 |
if HasXPU==False: |
219 |
print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1) |
220 |
sys.exit() |
221 |
|
222 |
# Je cree le contexte et la queue pour son execution
|
223 |
try:
|
224 |
ctx = cl.Context([XPU]) |
225 |
queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE) |
226 |
except:
|
227 |
print "Crash during context creation" |
228 |
|
229 |
|
230 |
MyRoutines = cl.Program(ctx, BlobOpenCL).build() |
231 |
|
232 |
mf = cl.mem_flags |
233 |
clData = cl.Buffer(ctx, mf.READ_WRITE, MyData.nbytes) |
234 |
|
235 |
print 'Tous au meme endroit',MyData |
236 |
|
237 |
MyRoutines.SplutterPoints(queue,(Number,1),None,clData,np.float32(SizeOfBox-LengthOfSegment),np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32))) |
238 |
|
239 |
cl.enqueue_copy(queue, MyData, clData) |
240 |
|
241 |
print 'Tous distribues',MyData |
242 |
|
243 |
MyRoutines.ExtendSegment(queue,(Number,1),None,clData,np.float32(LengthOfSegment),np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32))) |
244 |
|
245 |
cl.enqueue_copy(queue, MyData, clData) |
246 |
|
247 |
print 'Tous avec leur extremite',MyData |
248 |
|
249 |
MySize = np.zeros(len(MyData), dtype=np.float32)
|
250 |
clSize = cl.Buffer(ctx, mf.READ_WRITE, MySize.nbytes) |
251 |
|
252 |
MyRoutines.EstimateLength(queue, (Number,1), None, clData, clSize) |
253 |
cl.enqueue_copy(queue, MySize, clSize) |
254 |
|
255 |
print 'La distance de chacun avec son extremite',MySize |
256 |
|
257 |
MyDistance = np.zeros(len(MyData)*len(MyData), dtype=np.float32) |
258 |
clDistance = cl.Buffer(ctx, mf.READ_WRITE, MyDistance.nbytes) |
259 |
|
260 |
time_start=time.time() |
261 |
for i in xrange(Redo): |
262 |
sys.stdout.write('.')
|
263 |
CLLaunch=MyRoutines.ShortestDistance(queue, (Number,Number), None, clData, clDistance)
|
264 |
CLLaunch.wait() |
265 |
print "Duration for each iteration %s" % ((time.time()-time_start)/Redo) |
266 |
cl.enqueue_copy(queue, MyDistance, clDistance) |
267 |
|
268 |
MyDistance=np.reshape(MyDistance,(Number,Number)) |
269 |
clDistance.release() |
270 |
|
271 |
print 'La distance de chacun',MyDistance |
272 |
|
273 |
clData.release() |