root / Epidevomath / vector8.py @ 301
Historique | Voir | Annoter | Télécharger (8,22 ko)
1 | 113 | equemene | #!/usr/bin/env python
|
---|---|---|---|
2 | 112 | equemene | # -*- coding: utf-8 -*-
|
3 | 112 | equemene | """
|
4 | 112 | equemene | Demonstrateur OpenCL pour l'ANR Epidevomath
|
5 | 112 | equemene |
|
6 | 113 | equemene | Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> CeCILLv2
|
7 | 112 | equemene | """
|
8 | 112 | equemene | import getopt |
9 | 112 | equemene | import sys |
10 | 112 | equemene | import time |
11 | 109 | equemene | import numpy as np |
12 | 109 | equemene | import pyopencl as cl |
13 | 109 | equemene | import pyopencl.array as cl_array |
14 | 109 | equemene | from numpy.random import randint as nprnd |
15 | 109 | equemene | |
16 | 109 | equemene | |
17 | 112 | equemene | BlobOpenCL= """
|
18 | 109 | equemene | #define znew ((z=36969*(z&65535)+(z>>16))<<16)
|
19 | 109 | equemene | #define wnew ((w=18000*(w&65535)+(w>>16))&65535)
|
20 | 109 | equemene | #define MWC (znew+wnew)
|
21 | 109 | equemene | #define SHR3 (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
|
22 | 109 | equemene | #define CONG (jcong=69069*jcong+1234567)
|
23 | 109 | equemene | #define KISS ((MWC^CONG)+SHR3)
|
24 | 109 | equemene |
|
25 | 109 | equemene | #define MWCfp MWC * 2.328306435454494e-10f
|
26 | 109 | equemene | #define KISSfp KISS * 2.328306435454494e-10f
|
27 | 109 | equemene | #define SHR3fp SHR3 * 2.328306435454494e-10f
|
28 | 109 | equemene | #define CONGfp CONG * 2.328306435454494e-10f
|
29 | 109 | equemene |
|
30 | 109 | equemene | #define LENGTH 1.
|
31 | 109 | equemene |
|
32 | 109 | equemene | #define PI 3.14159265359
|
33 | 109 | equemene |
|
34 | 112 | equemene | #define SMALL_NUM 0.000000001
|
35 | 109 | equemene |
|
36 | 112 | equemene | __kernel void SplutterPoints(__global float8* clData, float box,
|
37 | 109 | equemene | uint seed_z,uint seed_w)
|
38 | 109 | equemene | {
|
39 | 109 | equemene | int gid = get_global_id(0);
|
40 | 109 | equemene | uint z=seed_z+(uint)gid;
|
41 | 109 | equemene | uint w=seed_w-(uint)gid;
|
42 | 109 | equemene |
|
43 | 112 | equemene | clData[gid].s01234567 = (float8) (box*MWCfp,box*MWCfp,box*MWCfp,0.,0.,0.,0.,0.);
|
44 | 109 | equemene | }
|
45 | 109 | equemene |
|
46 | 112 | equemene | __kernel void ExtendSegment(__global float8* clData, float length,
|
47 | 109 | equemene | uint seed_z,uint seed_w)
|
48 | 109 | equemene | {
|
49 | 109 | equemene | int gid = get_global_id(0);
|
50 | 109 | equemene | uint z=seed_z+(uint)gid;
|
51 | 109 | equemene | uint w=seed_w-(uint)gid;
|
52 | 109 | equemene |
|
53 | 109 | equemene | float theta=MWCfp*PI;
|
54 | 109 | equemene | float phi=MWCfp*PI*2.;
|
55 | 109 | equemene | float sinTheta=sin(theta);
|
56 | 112 | equemene | clData[gid].s4=clData[gid].s0+length*sinTheta*cos(phi);
|
57 | 112 | equemene | clData[gid].s5=clData[gid].s1+length*sinTheta*sin(phi);
|
58 | 112 | equemene | clData[gid].s6=clData[gid].s2+length*cos(theta);
|
59 | 109 | equemene |
|
60 | 109 | equemene | }
|
61 | 109 | equemene |
|
62 | 109 | equemene | __kernel void EstimateLength(__global float8* clData,__global float* clSize)
|
63 | 109 | equemene | {
|
64 | 109 | equemene | int gid = get_global_id(0);
|
65 | 109 | equemene |
|
66 | 109 | equemene | clSize[gid]=distance(clData[gid].lo,clData[gid].hi);
|
67 | 109 | equemene | }
|
68 | 109 | equemene |
|
69 | 109 | equemene | // Get from http://geomalgorithms.com/a07-_distance.html
|
70 | 109 | equemene |
|
71 | 117 | equemene | __kernel void ShortestDistance(__global float8* clData,__global float* clDistance)
|
72 | 109 | equemene | {
|
73 | 109 | equemene | int gidx = get_global_id(0);
|
74 | 109 | equemene | int ggsz = get_global_size(0);
|
75 | 109 | equemene | int gidy = get_global_id(1);
|
76 | 109 | equemene |
|
77 | 109 | equemene | float4 u = clData[gidx].hi - clData[gidx].lo;
|
78 | 109 | equemene | float4 v = clData[gidy].hi - clData[gidy].lo;
|
79 | 109 | equemene | float4 w = clData[gidx].lo - clData[gidy].lo;
|
80 | 109 | equemene |
|
81 | 109 | equemene | float a = dot(u,u); // always >= 0
|
82 | 109 | equemene | float b = dot(u,v);
|
83 | 109 | equemene | float c = dot(v,v); // always >= 0
|
84 | 109 | equemene | float d = dot(u,w);
|
85 | 109 | equemene | float e = dot(v,w);
|
86 | 109 | equemene |
|
87 | 109 | equemene | float D = a*c - b*b; // always >= 0
|
88 | 109 | equemene | float sc, sN, sD = D; // sc = sN / sD, default sD = D >= 0
|
89 | 109 | equemene | float tc, tN, tD = D; // tc = tN / tD, default tD = D >= 0
|
90 | 109 | equemene |
|
91 | 109 | equemene | // compute the line parameters of the two closest points
|
92 | 109 | equemene | if (D < SMALL_NUM) { // the lines are almost parallel
|
93 | 109 | equemene | sN = 0.0; // force using point P0 on segment S1
|
94 | 109 | equemene | sD = 1.0; // to prevent possible division by 0.0 later
|
95 | 109 | equemene | tN = e;
|
96 | 109 | equemene | tD = c;
|
97 | 109 | equemene | }
|
98 | 109 | equemene | else { // get the closest points on the infinite lines
|
99 | 109 | equemene | sN = (b*e - c*d);
|
100 | 109 | equemene | tN = (a*e - b*d);
|
101 | 109 | equemene | if (sN < 0.0) { // sc < 0 => the s=0 edge is visible
|
102 | 109 | equemene | sN = 0.0;
|
103 | 109 | equemene | tN = e;
|
104 | 109 | equemene | tD = c;
|
105 | 109 | equemene | }
|
106 | 109 | equemene | else if (sN > sD) { // sc > 1 => the s=1 edge is visible
|
107 | 109 | equemene | sN = sD;
|
108 | 109 | equemene | tN = e + b;
|
109 | 109 | equemene | tD = c;
|
110 | 109 | equemene | }
|
111 | 109 | equemene | }
|
112 | 109 | equemene |
|
113 | 109 | equemene | if (tN < 0.0) { // tc < 0 => the t=0 edge is visible
|
114 | 109 | equemene | tN = 0.0;
|
115 | 109 | equemene | // recompute sc for this edge
|
116 | 109 | equemene | if (-d < 0.0)
|
117 | 109 | equemene | sN = 0.0;
|
118 | 109 | equemene | else if (-d > a)
|
119 | 109 | equemene | sN = sD;
|
120 | 109 | equemene | else {
|
121 | 109 | equemene | sN = -d;
|
122 | 109 | equemene | sD = a;
|
123 | 109 | equemene | }
|
124 | 109 | equemene | }
|
125 | 109 | equemene | else if (tN > tD) { // tc > 1 => the t=1 edge is visible
|
126 | 109 | equemene | tN = tD;
|
127 | 109 | equemene | // recompute sc for this edge
|
128 | 109 | equemene | if ((-d + b) < 0.0)
|
129 | 109 | equemene | sN = 0;
|
130 | 109 | equemene | else if ((-d + b) > a)
|
131 | 109 | equemene | sN = sD;
|
132 | 109 | equemene | else {
|
133 | 109 | equemene | sN = (-d + b);
|
134 | 109 | equemene | sD = a;
|
135 | 109 | equemene | }
|
136 | 109 | equemene | }
|
137 | 109 | equemene | // finally do the division to get sc and tc
|
138 | 109 | equemene | sc = (fabs(sN) < SMALL_NUM ? 0.0 : sN / sD);
|
139 | 109 | equemene | tc = (fabs(tN) < SMALL_NUM ? 0.0 : tN / tD);
|
140 | 109 | equemene |
|
141 | 109 | equemene | // get the difference of the two closest points
|
142 | 109 | equemene | float4 dP = w + (sc * u) - (tc * v); // = S1(sc) - S2(tc)
|
143 | 109 | equemene |
|
144 | 109 | equemene | clDistance[ggsz*gidy+gidx]=length(dP); // return the closest distance
|
145 | 109 | equemene | }
|
146 | 109 | equemene |
|
147 | 112 | equemene | """
|
148 | 109 | equemene | |
149 | 112 | equemene | if __name__=='__main__': |
150 | 112 | equemene | |
151 | 112 | equemene | # Set defaults values
|
152 | 112 | equemene | |
153 | 112 | equemene | # Id of Device : 1 is for first find !
|
154 | 112 | equemene | Device=1
|
155 | 112 | equemene | # Iterations is integer
|
156 | 112 | equemene | Number=16384
|
157 | 112 | equemene | # Size of box
|
158 | 112 | equemene | SizeOfBox=1000.
|
159 | 112 | equemene | # Size of segment
|
160 | 112 | equemene | LengthOfSegment=1.
|
161 | 112 | equemene | # Redo the last process
|
162 | 112 | equemene | Redo=1
|
163 | 109 | equemene | |
164 | 112 | equemene | HowToUse='%s -d <DeviceId> -n <NumberOfSegments> -s <SizeOfBox> -l <LengthOfSegment>'
|
165 | 109 | equemene | |
166 | 112 | equemene | try:
|
167 | 112 | equemene | opts, args = getopt.getopt(sys.argv[1:],"hd:n:s:l:r:",["device=","number=","size=","length=","redo="]) |
168 | 112 | equemene | except getopt.GetoptError:
|
169 | 112 | equemene | print HowToUse % sys.argv[0] |
170 | 112 | equemene | sys.exit(2)
|
171 | 109 | equemene | |
172 | 112 | equemene | for opt, arg in opts: |
173 | 112 | equemene | if opt == '-h': |
174 | 112 | equemene | print HowToUse % sys.argv[0] |
175 | 109 | equemene | |
176 | 112 | equemene | print "\nInformations about devices detected under OpenCL:" |
177 | 112 | equemene | try:
|
178 | 112 | equemene | Id=1
|
179 | 112 | equemene | for platform in cl.get_platforms(): |
180 | 112 | equemene | for device in platform.get_devices(): |
181 | 112 | equemene | deviceType=cl.device_type.to_string(device.type) |
182 | 112 | equemene | print "Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()) |
183 | 112 | equemene | Id=Id+1
|
184 | 112 | equemene | sys.exit() |
185 | 112 | equemene | except ImportError: |
186 | 112 | equemene | print "Your platform does not seem to support OpenCL" |
187 | 112 | equemene | sys.exit() |
188 | 109 | equemene | |
189 | 112 | equemene | elif opt in ("-d", "--device"): |
190 | 112 | equemene | Device=int(arg)
|
191 | 112 | equemene | elif opt in ("-n", "--number"): |
192 | 112 | equemene | Number=int(arg)
|
193 | 112 | equemene | elif opt in ("-s", "--size"): |
194 | 112 | equemene | SizeOfBox=np.float32(arg) |
195 | 112 | equemene | elif opt in ("-l", "--length"): |
196 | 112 | equemene | LengthOfSegment=np.float32(arg) |
197 | 112 | equemene | elif opt in ("-r", "--redo"): |
198 | 112 | equemene | Redo=int(arg)
|
199 | 112 | equemene | |
200 | 112 | equemene | print "Device choosed : %s" % Device |
201 | 112 | equemene | print "Number of segments : %s" % Number |
202 | 112 | equemene | print "Size of Box : %s" % SizeOfBox |
203 | 112 | equemene | print "Length of Segment % s" % LengthOfSegment |
204 | 112 | equemene | print "Redo the last process % s" % Redo |
205 | 112 | equemene | |
206 | 112 | equemene | MyData = np.zeros(Number, dtype=cl_array.vec.float8) |
207 | 109 | equemene | |
208 | 112 | equemene | Id=1
|
209 | 112 | equemene | HasXPU=False
|
210 | 112 | equemene | for platform in cl.get_platforms(): |
211 | 112 | equemene | for device in platform.get_devices(): |
212 | 112 | equemene | if Id==Device:
|
213 | 112 | equemene | PlatForm=platform |
214 | 112 | equemene | XPU=device |
215 | 112 | equemene | print "CPU/GPU selected: ",device.name.lstrip() |
216 | 112 | equemene | HasXPU=True
|
217 | 112 | equemene | Id+=1
|
218 | 109 | equemene | |
219 | 112 | equemene | if HasXPU==False: |
220 | 112 | equemene | print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1) |
221 | 112 | equemene | sys.exit() |
222 | 109 | equemene | |
223 | 112 | equemene | # Je cree le contexte et la queue pour son execution
|
224 | 112 | equemene | try:
|
225 | 112 | equemene | ctx = cl.Context([XPU]) |
226 | 112 | equemene | queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE) |
227 | 112 | equemene | except:
|
228 | 112 | equemene | print "Crash during context creation" |
229 | 112 | equemene | |
230 | 109 | equemene | |
231 | 112 | equemene | MyRoutines = cl.Program(ctx, BlobOpenCL).build() |
232 | 109 | equemene | |
233 | 112 | equemene | mf = cl.mem_flags |
234 | 112 | equemene | clData = cl.Buffer(ctx, mf.READ_WRITE, MyData.nbytes) |
235 | 109 | equemene | |
236 | 112 | equemene | print 'Tous au meme endroit',MyData |
237 | 109 | equemene | |
238 | 112 | equemene | MyRoutines.SplutterPoints(queue,(Number,1),None,clData,np.float32(SizeOfBox-LengthOfSegment),np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32))) |
239 | 109 | equemene | |
240 | 112 | equemene | cl.enqueue_copy(queue, MyData, clData) |
241 | 109 | equemene | |
242 | 112 | equemene | print 'Tous distribues',MyData |
243 | 112 | equemene | |
244 | 112 | equemene | MyRoutines.ExtendSegment(queue,(Number,1),None,clData,np.float32(LengthOfSegment),np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32))) |
245 | 112 | equemene | |
246 | 112 | equemene | cl.enqueue_copy(queue, MyData, clData) |
247 | 112 | equemene | |
248 | 112 | equemene | print 'Tous avec leur extremite',MyData |
249 | 112 | equemene | |
250 | 112 | equemene | MySize = np.zeros(len(MyData), dtype=np.float32)
|
251 | 112 | equemene | clSize = cl.Buffer(ctx, mf.READ_WRITE, MySize.nbytes) |
252 | 112 | equemene | |
253 | 112 | equemene | MyRoutines.EstimateLength(queue, (Number,1), None, clData, clSize) |
254 | 112 | equemene | cl.enqueue_copy(queue, MySize, clSize) |
255 | 112 | equemene | |
256 | 112 | equemene | print 'La distance de chacun avec son extremite',MySize |
257 | 112 | equemene | |
258 | 112 | equemene | MyDistance = np.zeros(len(MyData)*len(MyData), dtype=np.float32) |
259 | 112 | equemene | clDistance = cl.Buffer(ctx, mf.READ_WRITE, MyDistance.nbytes) |
260 | 112 | equemene | |
261 | 112 | equemene | time_start=time.time() |
262 | 112 | equemene | for i in xrange(Redo): |
263 | 114 | equemene | CLLaunch=MyRoutines.ShortestDistance(queue, (Number,Number), None, clData, clDistance)
|
264 | 112 | equemene | sys.stdout.write('.')
|
265 | 112 | equemene | CLLaunch.wait() |
266 | 114 | equemene | print "\nDuration on %s for each %s" % (Device,(time.time()-time_start)/Redo) |
267 | 112 | equemene | cl.enqueue_copy(queue, MyDistance, clDistance) |
268 | 112 | equemene | |
269 | 112 | equemene | MyDistance=np.reshape(MyDistance,(Number,Number)) |
270 | 112 | equemene | clDistance.release() |
271 | 112 | equemene | |
272 | 112 | equemene | print 'La distance de chacun',MyDistance |
273 | 112 | equemene | |
274 | 112 | equemene | clData.release() |