Révision 116
NBody/NBody.py (revision 116) | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# -*- coding: utf-8 -*- |
|
3 |
""" |
|
4 |
Demonstrateur OpenCL d'interaction NCorps |
|
5 |
|
|
6 |
Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> CeCILLv2 |
|
7 |
""" |
|
8 |
import getopt |
|
9 |
import sys |
|
10 |
import time |
|
11 |
import numpy as np |
|
12 |
import pyopencl as cl |
|
13 |
import pyopencl.array as cl_array |
|
14 |
from numpy.random import randint as nprnd |
|
15 |
|
|
16 |
|
|
17 |
BlobOpenCL= """ |
|
18 |
#define znew ((z=36969*(z&65535)+(z>>16))<<16) |
|
19 |
#define wnew ((w=18000*(w&65535)+(w>>16))&65535) |
|
20 |
#define MWC (znew+wnew) |
|
21 |
#define SHR3 (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5)) |
|
22 |
#define CONG (jcong=69069*jcong+1234567) |
|
23 |
#define KISS ((MWC^CONG)+SHR3) |
|
24 |
|
|
25 |
#define MWCfp MWC * 2.328306435454494e-10f |
|
26 |
#define KISSfp KISS * 2.328306435454494e-10f |
|
27 |
#define SHR3fp SHR3 * 2.328306435454494e-10f |
|
28 |
#define CONGfp CONG * 2.328306435454494e-10f |
|
29 |
|
|
30 |
#define LENGTH 1. |
|
31 |
|
|
32 |
#define PI 3.14159265359 |
|
33 |
|
|
34 |
#define SMALL_NUM 0.000000001 |
|
35 |
|
|
36 |
float Gravity(float4 m,float4 n) |
|
37 |
{ |
|
38 |
return((float)(1./pow(distance(m,n),2))); |
|
39 |
} |
|
40 |
|
|
41 |
float8 RungeKutta(__global float8* clDataIn,int gid,float h) |
|
42 |
{ |
|
43 |
float4 c[4],d[4],ct,dt; |
|
44 |
|
|
45 |
c[0]=h*clDataIn[gid].hi; |
|
46 |
c[1]=h*(clDataIn[gid].hi+c[0]/(float)2.); |
|
47 |
c[2]=h*(clDataIn[gid].hi+c[1]/(float)2.); |
|
48 |
c[3]=h*(clDataIn[gid].hi+c[2]); |
|
49 |
|
|
50 |
d[0]=(0.,0.,0.,0.); |
|
51 |
for (int i=0;i<get_global_size(0);i++) |
|
52 |
{ |
|
53 |
if (gid != i) |
|
54 |
d[0]+=Gravity(clDataIn[gid].lo,clDataIn[i].lo); |
|
55 |
} |
|
56 |
d[0]*=h; |
|
57 |
d[1]=(0.,0.,0.,0.); |
|
58 |
for (int i=0;i<get_global_size(0);i++) |
|
59 |
{ |
|
60 |
if (gid != i) |
|
61 |
d[1]+=Gravity(clDataIn[gid].lo+d[0]/(float)2.,clDataIn[i].lo); |
|
62 |
} |
|
63 |
d[1]*=h; |
|
64 |
d[2]=(0.,0.,0.,0.); |
|
65 |
for (int i=0;i<get_global_size(0);i++) |
|
66 |
{ |
|
67 |
if (gid != i) |
|
68 |
d[2]+=Gravity(clDataIn[gid].lo+d[1]/(float)2.,clDataIn[i].lo); |
|
69 |
} |
|
70 |
d[2]*=h; |
|
71 |
d[3]=(0.,0.,0.,0.); |
|
72 |
for (int i=0;i<get_global_size(0);i++) |
|
73 |
{ |
|
74 |
if (gid != i) |
|
75 |
d[3]+=Gravity(clDataIn[gid].lo+d[2],clDataIn[i].lo); |
|
76 |
} |
|
77 |
d[3]*=h; |
|
78 |
|
|
79 |
// ct=clDataIn[gid].lo+(c[0]+(float)2.*c[1]+(float)2.*c[2]+c[3])/(float)6.; |
|
80 |
// dt=clDataIn[gid].hi+(d[0]+(float)2.*d[1]+(float)2.*d[2]+d[3])/(float)6.; |
|
81 |
if (gid != 0) |
|
82 |
ct=(float4)distance(clDataIn[gid],clDataIn[0]); |
|
83 |
else |
|
84 |
ct=(float4)3.14159; |
|
85 |
dt=(float4)0; |
|
86 |
|
|
87 |
return((float8)(ct.s0,ct.s1,ct.s2,ct.s3,dt.s0,dt.s1,dt.s2,dt.s3)); |
|
88 |
} |
|
89 |
|
|
90 |
__kernel void SplutterPoints(__global float8* clData, float box, float velocity, |
|
91 |
uint seed_z,uint seed_w) |
|
92 |
{ |
|
93 |
int gid = get_global_id(0); |
|
94 |
uint z=seed_z+(uint)gid; |
|
95 |
uint w=seed_w-(uint)gid; |
|
96 |
float theta=MWCfp*PI; |
|
97 |
float phi=MWCfp*PI*2.; |
|
98 |
float sinTheta=sin(theta); |
|
99 |
clData[gid].s01234567 = (float8) (box*MWCfp,box*MWCfp,box*MWCfp,0.,0.,0.,0.,0.); |
|
100 |
clData[gid].s4=velocity*sinTheta*cos(phi); |
|
101 |
clData[gid].s5=velocity*sinTheta*sin(phi); |
|
102 |
clData[gid].s6=velocity*cos(theta); |
|
103 |
} |
|
104 |
|
|
105 |
__kernel void Evolution(__global float8* clDataOut,__global float8* clDataIn, float h) |
|
106 |
{ |
|
107 |
int gid = get_global_id(0); |
|
108 |
|
|
109 |
clDataOut[gid]=RungeKutta(clDataIn,gid,h); |
|
110 |
barrier(CLK_GLOBAL_MEM_FENCE); |
|
111 |
} |
|
112 |
|
|
113 |
__kernel void Commit(__global float8* clDataOut,__global float8* clDataIn) |
|
114 |
{ |
|
115 |
int gid = get_global_id(0); |
|
116 |
|
|
117 |
clDataIn[gid]=clDataOut[gid]; |
|
118 |
barrier(CLK_GLOBAL_MEM_FENCE); |
|
119 |
} |
|
120 |
|
|
121 |
""" |
|
122 |
|
|
123 |
if __name__=='__main__': |
|
124 |
|
|
125 |
# Set defaults values |
|
126 |
|
|
127 |
# Id of Device : 1 is for first find ! |
|
128 |
Device=2 |
|
129 |
# Iterations is integer |
|
130 |
Number=2 |
|
131 |
# Size of box |
|
132 |
SizeOfBox=1000. |
|
133 |
# Initial velocity of particules |
|
134 |
Velocity=10. |
|
135 |
# Redo the last process |
|
136 |
Redo=1 |
|
137 |
# Step |
|
138 |
Step=1. |
|
139 |
|
|
140 |
HowToUse='%s -d <DeviceId> -n <NumberOfSegments> -z <SizeOfBox> -v <Velocity> -s <Step>' |
|
141 |
|
|
142 |
try: |
|
143 |
opts, args = getopt.getopt(sys.argv[1:],"hd:n:z:v:r:",["device=","number=","size=","velocity=","redo=","step=1"]) |
|
144 |
except getopt.GetoptError: |
|
145 |
print HowToUse % sys.argv[0] |
|
146 |
sys.exit(2) |
|
147 |
|
|
148 |
for opt, arg in opts: |
|
149 |
if opt == '-h': |
|
150 |
print HowToUse % sys.argv[0] |
|
151 |
|
|
152 |
print "\nInformations about devices detected under OpenCL:" |
|
153 |
try: |
|
154 |
Id=1 |
|
155 |
for platform in cl.get_platforms(): |
|
156 |
for device in platform.get_devices(): |
|
157 |
deviceType=cl.device_type.to_string(device.type) |
|
158 |
print "Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()) |
|
159 |
Id=Id+1 |
|
160 |
sys.exit() |
|
161 |
except ImportError: |
|
162 |
print "Your platform does not seem to support OpenCL" |
|
163 |
sys.exit() |
|
164 |
|
|
165 |
elif opt in ("-d", "--device"): |
|
166 |
Device=int(arg) |
|
167 |
elif opt in ("-n", "--number"): |
|
168 |
Number=int(arg) |
|
169 |
elif opt in ("-z", "--size"): |
|
170 |
SizeOfBox=np.float32(arg) |
|
171 |
elif opt in ("-v", "--velocity"): |
|
172 |
Velocity=np.float32(arg) |
|
173 |
elif opt in ("-s", "--step"): |
|
174 |
Step=np.float32(arg) |
|
175 |
elif opt in ("-r", "--redo"): |
|
176 |
Redo=int(arg) |
|
177 |
|
|
178 |
print "Device choosed : %s" % Device |
|
179 |
print "Number of segments : %s" % Number |
|
180 |
print "Size of Box : %s" % SizeOfBox |
|
181 |
print "Initial velocity % s" % Velocity |
|
182 |
print "Redo the last process % s" % Redo |
|
183 |
print "Step of iteration % s" % Step |
|
184 |
|
|
185 |
MyData = np.zeros(Number, dtype=cl_array.vec.float8) |
|
186 |
|
|
187 |
Id=1 |
|
188 |
HasXPU=False |
|
189 |
for platform in cl.get_platforms(): |
|
190 |
for device in platform.get_devices(): |
|
191 |
if Id==Device: |
|
192 |
PlatForm=platform |
|
193 |
XPU=device |
|
194 |
print "CPU/GPU selected: ",device.name.lstrip() |
|
195 |
HasXPU=True |
|
196 |
Id+=1 |
|
197 |
|
|
198 |
if HasXPU==False: |
|
199 |
print "No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1) |
|
200 |
sys.exit() |
|
201 |
|
|
202 |
# Je cree le contexte et la queue pour son execution |
|
203 |
try: |
|
204 |
ctx = cl.Context([XPU]) |
|
205 |
queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE) |
|
206 |
except: |
|
207 |
print "Crash during context creation" |
|
208 |
|
|
209 |
|
|
210 |
MyRoutines = cl.Program(ctx, BlobOpenCL).build() |
|
211 |
MyData[1][0]=1. |
|
212 |
|
|
213 |
mf = cl.mem_flags |
|
214 |
# clDataIn = cl.Buffer(ctx, mf.READ_WRITE, MyData.nbytes) |
|
215 |
clDataIn = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyData) |
|
216 |
clDataOut = cl.Buffer(ctx, mf.READ_WRITE, MyData.nbytes) |
|
217 |
|
|
218 |
print 'Tous au meme endroit',MyData |
|
219 |
|
|
220 |
# MyRoutines.SplutterPoints(queue,(Number,1),None,clDataIn,np.float32(SizeOfBox),np.float32(Velocity),np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32))) |
|
221 |
|
|
222 |
print 'Tous distribues',MyData |
|
223 |
|
|
224 |
CLLaunch=MyRoutines.Evolution(queue,(Number,1),None,clDataOut,clDataIn,np.float32(Step)) |
|
225 |
CLLaunch.wait() |
|
226 |
cl.enqueue_copy(queue, MyData, clDataOut) |
|
227 |
print 'Tous calcules',MyData |
|
228 |
CLLaunch=MyRoutines.Commit(queue,(Number,1),None,clDataOut,clDataIn) |
|
229 |
CLLaunch.wait() |
|
230 |
|
|
231 |
time_start=time.time() |
|
232 |
for i in xrange(Redo): |
|
233 |
#CLLaunch=MyRoutines.ShortestDistance(queue, (Number,Number), None, clData, clDistance) |
|
234 |
sys.stdout.write('.') |
|
235 |
#CLLaunch.wait() |
|
236 |
print "\nDuration on %s for each %s" % (Device,(time.time()-time_start)/Redo) |
|
237 |
|
|
238 |
clDataIn.release() |
|
239 |
clDataOut.release() |
Formats disponibles : Unified diff