Statistiques
| Révision :

root / Epidevomath / vector8.py @ 112

Historique | Voir | Annoter | Télécharger (8,18 ko)

1 112 equemene
# -*- coding: utf-8 -*-
2 112 equemene
"""
3 112 equemene
Demonstrateur OpenCL pour l'ANR Epidevomath
4 112 equemene

5 112 equemene
Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr>
6 112 equemene
"""
7 112 equemene
import getopt
8 112 equemene
import sys
9 112 equemene
import time
10 109 equemene
import numpy as np
11 109 equemene
import pyopencl as cl
12 109 equemene
import pyopencl.array as cl_array
13 109 equemene
from numpy.random import randint as nprnd
14 109 equemene
15 109 equemene
16 112 equemene
BlobOpenCL= """
17 109 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
18 109 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
19 109 equemene
#define MWC   (znew+wnew)
20 109 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
21 109 equemene
#define CONG  (jcong=69069*jcong+1234567)
22 109 equemene
#define KISS  ((MWC^CONG)+SHR3)
23 109 equemene

24 109 equemene
#define MWCfp MWC * 2.328306435454494e-10f
25 109 equemene
#define KISSfp KISS * 2.328306435454494e-10f
26 109 equemene
#define SHR3fp SHR3 * 2.328306435454494e-10f
27 109 equemene
#define CONGfp CONG * 2.328306435454494e-10f
28 109 equemene

29 109 equemene
#define LENGTH 1.
30 109 equemene

31 109 equemene
#define PI 3.14159265359
32 109 equemene

33 112 equemene
#define SMALL_NUM 0.000000001
34 109 equemene

35 112 equemene
__kernel void SplutterPoints(__global float8* clData, float box,
36 109 equemene
                               uint seed_z,uint seed_w)
37 109 equemene
{
38 109 equemene
    int gid = get_global_id(0);
39 109 equemene
    uint z=seed_z+(uint)gid;
40 109 equemene
    uint w=seed_w-(uint)gid;
41 109 equemene

42 112 equemene
    clData[gid].s01234567 = (float8) (box*MWCfp,box*MWCfp,box*MWCfp,0.,0.,0.,0.,0.);
43 109 equemene
}
44 109 equemene

45 112 equemene
__kernel void ExtendSegment(__global float8* clData, float length,
46 109 equemene
                               uint seed_z,uint seed_w)
47 109 equemene
{
48 109 equemene
    int gid = get_global_id(0);
49 109 equemene
    uint z=seed_z+(uint)gid;
50 109 equemene
    uint w=seed_w-(uint)gid;
51 109 equemene

52 109 equemene
    float theta=MWCfp*PI;
53 109 equemene
    float phi=MWCfp*PI*2.;
54 109 equemene
    float sinTheta=sin(theta);
55 112 equemene
    clData[gid].s4=clData[gid].s0+length*sinTheta*cos(phi);
56 112 equemene
    clData[gid].s5=clData[gid].s1+length*sinTheta*sin(phi);
57 112 equemene
    clData[gid].s6=clData[gid].s2+length*cos(theta);
58 109 equemene

59 109 equemene
}
60 109 equemene

61 109 equemene
__kernel void EstimateLength(__global float8* clData,__global float* clSize)
62 109 equemene
{
63 109 equemene
    int gid = get_global_id(0);
64 109 equemene

65 109 equemene
    clSize[gid]=distance(clData[gid].lo,clData[gid].hi);
66 109 equemene
}
67 109 equemene

68 109 equemene
// Get from http://geomalgorithms.com/a07-_distance.html
69 109 equemene

70 109 equemene
__kernel void ShortestDistance(__global float8* clData,__global float* clDistance)
71 109 equemene
{
72 109 equemene
    int gidx = get_global_id(0);
73 109 equemene
    int ggsz = get_global_size(0);
74 109 equemene
    int gidy = get_global_id(1);
75 109 equemene

76 109 equemene
    float4   u = clData[gidx].hi - clData[gidx].lo;
77 109 equemene
    float4   v = clData[gidy].hi - clData[gidy].lo;
78 109 equemene
    float4   w = clData[gidx].lo - clData[gidy].lo;
79 109 equemene

80 109 equemene
    float    a = dot(u,u);         // always >= 0
81 109 equemene
    float    b = dot(u,v);
82 109 equemene
    float    c = dot(v,v);         // always >= 0
83 109 equemene
    float    d = dot(u,w);
84 109 equemene
    float    e = dot(v,w);
85 109 equemene

86 109 equemene
    float    D = a*c - b*b;        // always >= 0
87 109 equemene
    float    sc, sN, sD = D;       // sc = sN / sD, default sD = D >= 0
88 109 equemene
    float    tc, tN, tD = D;       // tc = tN / tD, default tD = D >= 0
89 109 equemene

90 109 equemene
    // compute the line parameters of the two closest points
91 109 equemene
    if (D < SMALL_NUM) { // the lines are almost parallel
92 109 equemene
        sN = 0.0;         // force using point P0 on segment S1
93 109 equemene
        sD = 1.0;         // to prevent possible division by 0.0 later
94 109 equemene
        tN = e;
95 109 equemene
        tD = c;
96 109 equemene
    }
97 109 equemene
    else {                 // get the closest points on the infinite lines
98 109 equemene
        sN = (b*e - c*d);
99 109 equemene
        tN = (a*e - b*d);
100 109 equemene
        if (sN < 0.0) {        // sc < 0 => the s=0 edge is visible
101 109 equemene
            sN = 0.0;
102 109 equemene
            tN = e;
103 109 equemene
            tD = c;
104 109 equemene
        }
105 109 equemene
        else if (sN > sD) {  // sc > 1  => the s=1 edge is visible
106 109 equemene
            sN = sD;
107 109 equemene
            tN = e + b;
108 109 equemene
            tD = c;
109 109 equemene
        }
110 109 equemene
    }
111 109 equemene

112 109 equemene
    if (tN < 0.0) {            // tc < 0 => the t=0 edge is visible
113 109 equemene
        tN = 0.0;
114 109 equemene
        // recompute sc for this edge
115 109 equemene
        if (-d < 0.0)
116 109 equemene
            sN = 0.0;
117 109 equemene
        else if (-d > a)
118 109 equemene
            sN = sD;
119 109 equemene
        else {
120 109 equemene
            sN = -d;
121 109 equemene
            sD = a;
122 109 equemene
        }
123 109 equemene
    }
124 109 equemene
    else if (tN > tD) {      // tc > 1  => the t=1 edge is visible
125 109 equemene
        tN = tD;
126 109 equemene
        // recompute sc for this edge
127 109 equemene
        if ((-d + b) < 0.0)
128 109 equemene
            sN = 0;
129 109 equemene
        else if ((-d + b) > a)
130 109 equemene
            sN = sD;
131 109 equemene
        else {
132 109 equemene
            sN = (-d +  b);
133 109 equemene
            sD = a;
134 109 equemene
        }
135 109 equemene
    }
136 109 equemene
    // finally do the division to get sc and tc
137 109 equemene
    sc = (fabs(sN) < SMALL_NUM ? 0.0 : sN / sD);
138 109 equemene
    tc = (fabs(tN) < SMALL_NUM ? 0.0 : tN / tD);
139 109 equemene

140 109 equemene
    // get the difference of the two closest points
141 109 equemene
    float4   dP = w + (sc * u) - (tc * v);  // =  S1(sc) - S2(tc)
142 109 equemene

143 109 equemene
    clDistance[ggsz*gidy+gidx]=length(dP);   // return the closest distance
144 109 equemene
}
145 109 equemene

146 112 equemene
 """
147 109 equemene
148 112 equemene
if __name__=='__main__':
149 112 equemene
150 112 equemene
    # Set defaults values
151 112 equemene
152 112 equemene
    # Id of Device : 1 is for first find !
153 112 equemene
    Device=1
154 112 equemene
    # Iterations is integer
155 112 equemene
    Number=16384
156 112 equemene
    # Size of box
157 112 equemene
    SizeOfBox=1000.
158 112 equemene
    # Size of segment
159 112 equemene
    LengthOfSegment=1.
160 112 equemene
    # Redo the last process
161 112 equemene
    Redo=1
162 109 equemene
163 112 equemene
    HowToUse='%s -d <DeviceId> -n <NumberOfSegments> -s <SizeOfBox> -l <LengthOfSegment>'
164 109 equemene
165 112 equemene
    try:
166 112 equemene
        opts, args = getopt.getopt(sys.argv[1:],"hd:n:s:l:r:",["device=","number=","size=","length=","redo="])
167 112 equemene
    except getopt.GetoptError:
168 112 equemene
        print HowToUse % sys.argv[0]
169 112 equemene
        sys.exit(2)
170 109 equemene
171 112 equemene
    for opt, arg in opts:
172 112 equemene
        if opt == '-h':
173 112 equemene
            print HowToUse % sys.argv[0]
174 109 equemene
175 112 equemene
            print "\nInformations about devices detected under OpenCL:"
176 112 equemene
            try:
177 112 equemene
                Id=1
178 112 equemene
                for platform in cl.get_platforms():
179 112 equemene
                    for device in platform.get_devices():
180 112 equemene
                        deviceType=cl.device_type.to_string(device.type)
181 112 equemene
                        print "Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip())
182 112 equemene
                        Id=Id+1
183 112 equemene
                sys.exit()
184 112 equemene
            except ImportError:
185 112 equemene
                print "Your platform does not seem to support OpenCL"
186 112 equemene
                sys.exit()
187 109 equemene
188 112 equemene
        elif opt in ("-d", "--device"):
189 112 equemene
            Device=int(arg)
190 112 equemene
        elif opt in ("-n", "--number"):
191 112 equemene
            Number=int(arg)
192 112 equemene
        elif opt in ("-s", "--size"):
193 112 equemene
            SizeOfBox=np.float32(arg)
194 112 equemene
        elif opt in ("-l", "--length"):
195 112 equemene
            LengthOfSegment=np.float32(arg)
196 112 equemene
        elif opt in ("-r", "--redo"):
197 112 equemene
            Redo=int(arg)
198 112 equemene
199 112 equemene
    print "Device choosed : %s" % Device
200 112 equemene
    print "Number of segments : %s" % Number
201 112 equemene
    print "Size of Box : %s" % SizeOfBox
202 112 equemene
    print "Length of Segment % s" % LengthOfSegment
203 112 equemene
    print "Redo the last process % s" % Redo
204 112 equemene
205 112 equemene
    MyData = np.zeros(Number, dtype=cl_array.vec.float8)
206 109 equemene
207 112 equemene
    Id=1
208 112 equemene
    HasXPU=False
209 112 equemene
    for platform in cl.get_platforms():
210 112 equemene
        for device in platform.get_devices():
211 112 equemene
            if Id==Device:
212 112 equemene
                PlatForm=platform
213 112 equemene
                XPU=device
214 112 equemene
                print "CPU/GPU selected: ",device.name.lstrip()
215 112 equemene
                HasXPU=True
216 112 equemene
            Id+=1
217 109 equemene
218 112 equemene
    if HasXPU==False:
219 112 equemene
        print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1)
220 112 equemene
        sys.exit()
221 109 equemene
222 112 equemene
    # Je cree le contexte et la queue pour son execution
223 112 equemene
    try:
224 112 equemene
        ctx = cl.Context([XPU])
225 112 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
226 112 equemene
    except:
227 112 equemene
        print "Crash during context creation"
228 112 equemene
229 109 equemene
230 112 equemene
    MyRoutines = cl.Program(ctx, BlobOpenCL).build()
231 109 equemene
232 112 equemene
    mf = cl.mem_flags
233 112 equemene
    clData = cl.Buffer(ctx, mf.READ_WRITE, MyData.nbytes)
234 109 equemene
235 112 equemene
    print 'Tous au meme endroit',MyData
236 109 equemene
237 112 equemene
    MyRoutines.SplutterPoints(queue,(Number,1),None,clData,np.float32(SizeOfBox-LengthOfSegment),np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32)))
238 109 equemene
239 112 equemene
    cl.enqueue_copy(queue, MyData, clData)
240 109 equemene
241 112 equemene
    print 'Tous distribues',MyData
242 112 equemene
243 112 equemene
    MyRoutines.ExtendSegment(queue,(Number,1),None,clData,np.float32(LengthOfSegment),np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32)))
244 112 equemene
245 112 equemene
    cl.enqueue_copy(queue, MyData, clData)
246 112 equemene
247 112 equemene
    print 'Tous avec leur extremite',MyData
248 112 equemene
249 112 equemene
    MySize = np.zeros(len(MyData), dtype=np.float32)
250 112 equemene
    clSize = cl.Buffer(ctx, mf.READ_WRITE, MySize.nbytes)
251 112 equemene
252 112 equemene
    MyRoutines.EstimateLength(queue, (Number,1), None, clData, clSize)
253 112 equemene
    cl.enqueue_copy(queue, MySize, clSize)
254 112 equemene
255 112 equemene
    print 'La distance de chacun avec son extremite',MySize
256 112 equemene
257 112 equemene
    MyDistance = np.zeros(len(MyData)*len(MyData), dtype=np.float32)
258 112 equemene
    clDistance = cl.Buffer(ctx, mf.READ_WRITE, MyDistance.nbytes)
259 112 equemene
260 112 equemene
    time_start=time.time()
261 112 equemene
    for i in xrange(Redo):
262 112 equemene
        sys.stdout.write('.')
263 112 equemene
        CLLaunch=MyRoutines.ShortestDistance(queue, (Number,Number), None, clData, clDistance)
264 112 equemene
        CLLaunch.wait()
265 112 equemene
    print "Duration for each iteration %s" % ((time.time()-time_start)/Redo)
266 112 equemene
    cl.enqueue_copy(queue, MyDistance, clDistance)
267 112 equemene
268 112 equemene
    MyDistance=np.reshape(MyDistance,(Number,Number))
269 112 equemene
    clDistance.release()
270 112 equemene
271 112 equemene
    print 'La distance de chacun',MyDistance
272 112 equemene
273 112 equemene
    clData.release()