root / NBody / NBody.py @ 116
Historique | Voir | Annoter | Télécharger (6,97 ko)
1 | 116 | equemene | #!/usr/bin/env python
|
---|---|---|---|
2 | 116 | equemene | # -*- coding: utf-8 -*-
|
3 | 116 | equemene | """
|
4 | 116 | equemene | Demonstrateur OpenCL d'interaction NCorps
|
5 | 116 | equemene |
|
6 | 116 | equemene | Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> CeCILLv2
|
7 | 116 | equemene | """
|
8 | 116 | equemene | import getopt |
9 | 116 | equemene | import sys |
10 | 116 | equemene | import time |
11 | 116 | equemene | import numpy as np |
12 | 116 | equemene | import pyopencl as cl |
13 | 116 | equemene | import pyopencl.array as cl_array |
14 | 116 | equemene | from numpy.random import randint as nprnd |
15 | 116 | equemene | |
16 | 116 | equemene | |
17 | 116 | equemene | BlobOpenCL= """
|
18 | 116 | equemene | #define znew ((z=36969*(z&65535)+(z>>16))<<16)
|
19 | 116 | equemene | #define wnew ((w=18000*(w&65535)+(w>>16))&65535)
|
20 | 116 | equemene | #define MWC (znew+wnew)
|
21 | 116 | equemene | #define SHR3 (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
|
22 | 116 | equemene | #define CONG (jcong=69069*jcong+1234567)
|
23 | 116 | equemene | #define KISS ((MWC^CONG)+SHR3)
|
24 | 116 | equemene |
|
25 | 116 | equemene | #define MWCfp MWC * 2.328306435454494e-10f
|
26 | 116 | equemene | #define KISSfp KISS * 2.328306435454494e-10f
|
27 | 116 | equemene | #define SHR3fp SHR3 * 2.328306435454494e-10f
|
28 | 116 | equemene | #define CONGfp CONG * 2.328306435454494e-10f
|
29 | 116 | equemene |
|
30 | 116 | equemene | #define LENGTH 1.
|
31 | 116 | equemene |
|
32 | 116 | equemene | #define PI 3.14159265359
|
33 | 116 | equemene |
|
34 | 116 | equemene | #define SMALL_NUM 0.000000001
|
35 | 116 | equemene |
|
36 | 116 | equemene | float Gravity(float4 m,float4 n)
|
37 | 116 | equemene | {
|
38 | 116 | equemene | return((float)(1./pow(distance(m,n),2)));
|
39 | 116 | equemene | }
|
40 | 116 | equemene |
|
41 | 116 | equemene | float8 RungeKutta(__global float8* clDataIn,int gid,float h)
|
42 | 116 | equemene | {
|
43 | 116 | equemene | float4 c[4],d[4],ct,dt;
|
44 | 116 | equemene |
|
45 | 116 | equemene | c[0]=h*clDataIn[gid].hi;
|
46 | 116 | equemene | c[1]=h*(clDataIn[gid].hi+c[0]/(float)2.);
|
47 | 116 | equemene | c[2]=h*(clDataIn[gid].hi+c[1]/(float)2.);
|
48 | 116 | equemene | c[3]=h*(clDataIn[gid].hi+c[2]);
|
49 | 116 | equemene |
|
50 | 116 | equemene | d[0]=(0.,0.,0.,0.);
|
51 | 116 | equemene | for (int i=0;i<get_global_size(0);i++)
|
52 | 116 | equemene | {
|
53 | 116 | equemene | if (gid != i)
|
54 | 116 | equemene | d[0]+=Gravity(clDataIn[gid].lo,clDataIn[i].lo);
|
55 | 116 | equemene | }
|
56 | 116 | equemene | d[0]*=h;
|
57 | 116 | equemene | d[1]=(0.,0.,0.,0.);
|
58 | 116 | equemene | for (int i=0;i<get_global_size(0);i++)
|
59 | 116 | equemene | {
|
60 | 116 | equemene | if (gid != i)
|
61 | 116 | equemene | d[1]+=Gravity(clDataIn[gid].lo+d[0]/(float)2.,clDataIn[i].lo);
|
62 | 116 | equemene | }
|
63 | 116 | equemene | d[1]*=h;
|
64 | 116 | equemene | d[2]=(0.,0.,0.,0.);
|
65 | 116 | equemene | for (int i=0;i<get_global_size(0);i++)
|
66 | 116 | equemene | {
|
67 | 116 | equemene | if (gid != i)
|
68 | 116 | equemene | d[2]+=Gravity(clDataIn[gid].lo+d[1]/(float)2.,clDataIn[i].lo);
|
69 | 116 | equemene | }
|
70 | 116 | equemene | d[2]*=h;
|
71 | 116 | equemene | d[3]=(0.,0.,0.,0.);
|
72 | 116 | equemene | for (int i=0;i<get_global_size(0);i++)
|
73 | 116 | equemene | {
|
74 | 116 | equemene | if (gid != i)
|
75 | 116 | equemene | d[3]+=Gravity(clDataIn[gid].lo+d[2],clDataIn[i].lo);
|
76 | 116 | equemene | }
|
77 | 116 | equemene | d[3]*=h;
|
78 | 116 | equemene |
|
79 | 116 | equemene | // ct=clDataIn[gid].lo+(c[0]+(float)2.*c[1]+(float)2.*c[2]+c[3])/(float)6.;
|
80 | 116 | equemene | // dt=clDataIn[gid].hi+(d[0]+(float)2.*d[1]+(float)2.*d[2]+d[3])/(float)6.;
|
81 | 116 | equemene | if (gid != 0)
|
82 | 116 | equemene | ct=(float4)distance(clDataIn[gid],clDataIn[0]);
|
83 | 116 | equemene | else
|
84 | 116 | equemene | ct=(float4)3.14159;
|
85 | 116 | equemene | dt=(float4)0;
|
86 | 116 | equemene |
|
87 | 116 | equemene | return((float8)(ct.s0,ct.s1,ct.s2,ct.s3,dt.s0,dt.s1,dt.s2,dt.s3));
|
88 | 116 | equemene | }
|
89 | 116 | equemene |
|
90 | 116 | equemene | __kernel void SplutterPoints(__global float8* clData, float box, float velocity,
|
91 | 116 | equemene | uint seed_z,uint seed_w)
|
92 | 116 | equemene | {
|
93 | 116 | equemene | int gid = get_global_id(0);
|
94 | 116 | equemene | uint z=seed_z+(uint)gid;
|
95 | 116 | equemene | uint w=seed_w-(uint)gid;
|
96 | 116 | equemene | float theta=MWCfp*PI;
|
97 | 116 | equemene | float phi=MWCfp*PI*2.;
|
98 | 116 | equemene | float sinTheta=sin(theta);
|
99 | 116 | equemene | clData[gid].s01234567 = (float8) (box*MWCfp,box*MWCfp,box*MWCfp,0.,0.,0.,0.,0.);
|
100 | 116 | equemene | clData[gid].s4=velocity*sinTheta*cos(phi);
|
101 | 116 | equemene | clData[gid].s5=velocity*sinTheta*sin(phi);
|
102 | 116 | equemene | clData[gid].s6=velocity*cos(theta);
|
103 | 116 | equemene | }
|
104 | 116 | equemene |
|
105 | 116 | equemene | __kernel void Evolution(__global float8* clDataOut,__global float8* clDataIn, float h)
|
106 | 116 | equemene | {
|
107 | 116 | equemene | int gid = get_global_id(0);
|
108 | 116 | equemene |
|
109 | 116 | equemene | clDataOut[gid]=RungeKutta(clDataIn,gid,h);
|
110 | 116 | equemene | barrier(CLK_GLOBAL_MEM_FENCE);
|
111 | 116 | equemene | }
|
112 | 116 | equemene |
|
113 | 116 | equemene | __kernel void Commit(__global float8* clDataOut,__global float8* clDataIn)
|
114 | 116 | equemene | {
|
115 | 116 | equemene | int gid = get_global_id(0);
|
116 | 116 | equemene |
|
117 | 116 | equemene | clDataIn[gid]=clDataOut[gid];
|
118 | 116 | equemene | barrier(CLK_GLOBAL_MEM_FENCE);
|
119 | 116 | equemene | }
|
120 | 116 | equemene |
|
121 | 116 | equemene | """
|
122 | 116 | equemene | |
123 | 116 | equemene | if __name__=='__main__': |
124 | 116 | equemene | |
125 | 116 | equemene | # Set defaults values
|
126 | 116 | equemene | |
127 | 116 | equemene | # Id of Device : 1 is for first find !
|
128 | 116 | equemene | Device=2
|
129 | 116 | equemene | # Iterations is integer
|
130 | 116 | equemene | Number=2
|
131 | 116 | equemene | # Size of box
|
132 | 116 | equemene | SizeOfBox=1000.
|
133 | 116 | equemene | # Initial velocity of particules
|
134 | 116 | equemene | Velocity=10.
|
135 | 116 | equemene | # Redo the last process
|
136 | 116 | equemene | Redo=1
|
137 | 116 | equemene | # Step
|
138 | 116 | equemene | Step=1.
|
139 | 116 | equemene | |
140 | 116 | equemene | HowToUse='%s -d <DeviceId> -n <NumberOfSegments> -z <SizeOfBox> -v <Velocity> -s <Step>'
|
141 | 116 | equemene | |
142 | 116 | equemene | try:
|
143 | 116 | equemene | opts, args = getopt.getopt(sys.argv[1:],"hd:n:z:v:r:",["device=","number=","size=","velocity=","redo=","step=1"]) |
144 | 116 | equemene | except getopt.GetoptError:
|
145 | 116 | equemene | print HowToUse % sys.argv[0] |
146 | 116 | equemene | sys.exit(2)
|
147 | 116 | equemene | |
148 | 116 | equemene | for opt, arg in opts: |
149 | 116 | equemene | if opt == '-h': |
150 | 116 | equemene | print HowToUse % sys.argv[0] |
151 | 116 | equemene | |
152 | 116 | equemene | print "\nInformations about devices detected under OpenCL:" |
153 | 116 | equemene | try:
|
154 | 116 | equemene | Id=1
|
155 | 116 | equemene | for platform in cl.get_platforms(): |
156 | 116 | equemene | for device in platform.get_devices(): |
157 | 116 | equemene | deviceType=cl.device_type.to_string(device.type) |
158 | 116 | equemene | print "Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()) |
159 | 116 | equemene | Id=Id+1
|
160 | 116 | equemene | sys.exit() |
161 | 116 | equemene | except ImportError: |
162 | 116 | equemene | print "Your platform does not seem to support OpenCL" |
163 | 116 | equemene | sys.exit() |
164 | 116 | equemene | |
165 | 116 | equemene | elif opt in ("-d", "--device"): |
166 | 116 | equemene | Device=int(arg)
|
167 | 116 | equemene | elif opt in ("-n", "--number"): |
168 | 116 | equemene | Number=int(arg)
|
169 | 116 | equemene | elif opt in ("-z", "--size"): |
170 | 116 | equemene | SizeOfBox=np.float32(arg) |
171 | 116 | equemene | elif opt in ("-v", "--velocity"): |
172 | 116 | equemene | Velocity=np.float32(arg) |
173 | 116 | equemene | elif opt in ("-s", "--step"): |
174 | 116 | equemene | Step=np.float32(arg) |
175 | 116 | equemene | elif opt in ("-r", "--redo"): |
176 | 116 | equemene | Redo=int(arg)
|
177 | 116 | equemene | |
178 | 116 | equemene | print "Device choosed : %s" % Device |
179 | 116 | equemene | print "Number of segments : %s" % Number |
180 | 116 | equemene | print "Size of Box : %s" % SizeOfBox |
181 | 116 | equemene | print "Initial velocity % s" % Velocity |
182 | 116 | equemene | print "Redo the last process % s" % Redo |
183 | 116 | equemene | print "Step of iteration % s" % Step |
184 | 116 | equemene | |
185 | 116 | equemene | MyData = np.zeros(Number, dtype=cl_array.vec.float8) |
186 | 116 | equemene | |
187 | 116 | equemene | Id=1
|
188 | 116 | equemene | HasXPU=False
|
189 | 116 | equemene | for platform in cl.get_platforms(): |
190 | 116 | equemene | for device in platform.get_devices(): |
191 | 116 | equemene | if Id==Device:
|
192 | 116 | equemene | PlatForm=platform |
193 | 116 | equemene | XPU=device |
194 | 116 | equemene | print "CPU/GPU selected: ",device.name.lstrip() |
195 | 116 | equemene | HasXPU=True
|
196 | 116 | equemene | Id+=1
|
197 | 116 | equemene | |
198 | 116 | equemene | if HasXPU==False: |
199 | 116 | equemene | print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1) |
200 | 116 | equemene | sys.exit() |
201 | 116 | equemene | |
202 | 116 | equemene | # Je cree le contexte et la queue pour son execution
|
203 | 116 | equemene | try:
|
204 | 116 | equemene | ctx = cl.Context([XPU]) |
205 | 116 | equemene | queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE) |
206 | 116 | equemene | except:
|
207 | 116 | equemene | print "Crash during context creation" |
208 | 116 | equemene | |
209 | 116 | equemene | |
210 | 116 | equemene | MyRoutines = cl.Program(ctx, BlobOpenCL).build() |
211 | 116 | equemene | MyData[1][0]=1. |
212 | 116 | equemene | |
213 | 116 | equemene | mf = cl.mem_flags |
214 | 116 | equemene | # clDataIn = cl.Buffer(ctx, mf.READ_WRITE, MyData.nbytes)
|
215 | 116 | equemene | clDataIn = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyData) |
216 | 116 | equemene | clDataOut = cl.Buffer(ctx, mf.READ_WRITE, MyData.nbytes) |
217 | 116 | equemene | |
218 | 116 | equemene | print 'Tous au meme endroit',MyData |
219 | 116 | equemene | |
220 | 116 | equemene | # MyRoutines.SplutterPoints(queue,(Number,1),None,clDataIn,np.float32(SizeOfBox),np.float32(Velocity),np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32)))
|
221 | 116 | equemene | |
222 | 116 | equemene | print 'Tous distribues',MyData |
223 | 116 | equemene | |
224 | 116 | equemene | CLLaunch=MyRoutines.Evolution(queue,(Number,1),None,clDataOut,clDataIn,np.float32(Step)) |
225 | 116 | equemene | CLLaunch.wait() |
226 | 116 | equemene | cl.enqueue_copy(queue, MyData, clDataOut) |
227 | 116 | equemene | print 'Tous calcules',MyData |
228 | 116 | equemene | CLLaunch=MyRoutines.Commit(queue,(Number,1),None,clDataOut,clDataIn) |
229 | 116 | equemene | CLLaunch.wait() |
230 | 116 | equemene | |
231 | 116 | equemene | time_start=time.time() |
232 | 116 | equemene | for i in xrange(Redo): |
233 | 116 | equemene | #CLLaunch=MyRoutines.ShortestDistance(queue, (Number,Number), None, clData, clDistance)
|
234 | 116 | equemene | sys.stdout.write('.')
|
235 | 116 | equemene | #CLLaunch.wait()
|
236 | 116 | equemene | print "\nDuration on %s for each %s" % (Device,(time.time()-time_start)/Redo) |
237 | 116 | equemene | |
238 | 116 | equemene | clDataIn.release() |
239 | 116 | equemene | clDataOut.release() |