Statistiques
| Révision :

root / NBody / NBody.py @ 162

Historique | Voir | Annoter | Télécharger (20,29 ko)

1 128 equemene
#!/usr/bin/env python3
2 136 equemene
# -*- coding: utf-8 -*-
3 116 equemene
"""
4 116 equemene
Demonstrateur OpenCL d'interaction NCorps
5 116 equemene

6 116 equemene
Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> CeCILLv2
7 116 equemene
"""
8 116 equemene
import getopt
9 116 equemene
import sys
10 116 equemene
import time
11 116 equemene
import numpy as np
12 116 equemene
import pyopencl as cl
13 116 equemene
import pyopencl.array as cl_array
14 116 equemene
from numpy.random import randint as nprnd
15 116 equemene
16 132 equemene
def DictionariesAPI():
17 132 equemene
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
18 132 equemene
    Computing={'FP32':0,'FP64':1}
19 132 equemene
    return(Marsaglia,Computing)
20 132 equemene
21 142 equemene
BlobOpenCL= """
22 116 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
23 116 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
24 116 equemene
#define MWC   (znew+wnew)
25 116 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
26 116 equemene
#define CONG  (jcong=69069*jcong+1234567)
27 116 equemene
#define KISS  ((MWC^CONG)+SHR3)
28 116 equemene

29 132 equemene
#define TFP32 0
30 132 equemene
#define TFP64 1
31 132 equemene

32 151 equemene
#define LENGTH 1.e0f
33 116 equemene

34 132 equemene
#if TYPE == TFP32
35 132 equemene
#define MYFLOAT4 float4
36 132 equemene
#define MYFLOAT8 float8
37 132 equemene
#define MYFLOAT float
38 151 equemene
#define DISTANCE fast_distance
39 132 equemene
#else
40 155 equemene
#if defined(cl_khr_fp64)  // Khronos extension available?
41 155 equemene
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
42 155 equemene
#define DOUBLE_SUPPORT_AVAILABLE
43 155 equemene
#elif defined(cl_amd_fp64)  // AMD extension available?
44 155 equemene
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
45 155 equemene
#define DOUBLE_SUPPORT_AVAILABLE
46 155 equemene
#endif
47 132 equemene
#define MYFLOAT4 double4
48 132 equemene
#define MYFLOAT8 double8
49 132 equemene
#define MYFLOAT double
50 151 equemene
#define DISTANCE distance
51 132 equemene
#endif
52 132 equemene

53 160 equemene
#define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f)
54 160 equemene
#define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f)
55 160 equemene
#define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f)
56 160 equemene
#define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f)
57 160 equemene

58 160 equemene
#define PI (MYFLOAT)3.141592653589793238462643197169399375105820974944592307816406286e0f
59 160 equemene

60 160 equemene
#define SMALL_NUM 1.e-9f
61 160 equemene

62 132 equemene
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
63 116 equemene
{
64 160 equemene
    private MYFLOAT r=DISTANCE(n,m);
65 155 equemene

66 160 equemene
    return((n-m)/(MYFLOAT)(r*r*r));
67 116 equemene
}
68 116 equemene

69 141 equemene
MYFLOAT4 InteractionCore(MYFLOAT4 m,MYFLOAT4 n)
70 141 equemene
{
71 160 equemene
    private MYFLOAT core=(MYFLOAT)1.e5f;
72 160 equemene
    private MYFLOAT r=DISTANCE(n,m);
73 160 equemene
    private MYFLOAT d=r*r+core*core;
74 141 equemene

75 155 equemene
    return(core*(n-m)/(MYFLOAT)(d*d));
76 141 equemene
}
77 141 equemene

78 133 equemene
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
79 133 equemene
{
80 151 equemene
    return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
81 133 equemene
}
82 133 equemene

83 160 equemene
MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
84 139 equemene
{
85 160 equemene
    private MYFLOAT potential=(MYFLOAT)0.e0f;
86 160 equemene
    private MYFLOAT4 x=clDataX[gid];
87 139 equemene

88 139 equemene
    for (int i=0;i<get_global_size(0);i++)
89 139 equemene
    {
90 139 equemene
        if (gid != i)
91 160 equemene
        potential+=PairPotential(x,clDataX[i]);
92 139 equemene
    }
93 133 equemene

94 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
95 141 equemene
    return(potential);
96 139 equemene
}
97 139 equemene

98 160 equemene
MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid)
99 139 equemene
{
100 160 equemene
    return(PairPotential(clDataX[gid],clCoM[0]));
101 139 equemene
}
102 139 equemene

103 160 equemene
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
104 116 equemene
{
105 160 equemene
    private MYFLOAT4 a0,v0,x0,a1,v1,x1,a2,v2,x2,a3,v3,x3,a4,v4,x4,xf,vf;
106 160 equemene

107 160 equemene
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
108 160 equemene
    v0=(MYFLOAT4)clDataInV[gid];
109 160 equemene
    x0=(MYFLOAT4)clDataInX[gid];
110 133 equemene
    int N = get_global_size(0);
111 133 equemene

112 133 equemene
    for (int i=0;i<N;i++)
113 121 equemene
    {
114 121 equemene
        if (gid != i)
115 160 equemene
        a0+=Interaction(x0,clDataInX[i]);
116 121 equemene
    }
117 121 equemene

118 160 equemene
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
119 160 equemene
    v1=v0+a0*dt;
120 160 equemene
    x1=x0+v0*dt;
121 133 equemene
    for (int i=0;i<N;i++)
122 121 equemene
    {
123 121 equemene
        if (gid != i)
124 160 equemene
        a1+=Interaction(x1,clDataInX[i]);
125 121 equemene
    }
126 121 equemene

127 160 equemene
    a2=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
128 160 equemene
    v2=v0+a1*dt*(MYFLOAT)5.e-1f;
129 160 equemene
    x2=x0+v1*dt*(MYFLOAT)5.e-1f;
130 133 equemene
    for (int i=0;i<N;i++)
131 121 equemene
    {
132 121 equemene
        if (gid != i)
133 160 equemene
        a2+=Interaction(x2,clDataInX[i]);
134 121 equemene
    }
135 121 equemene

136 160 equemene
    a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
137 160 equemene
    v3=v0+a2*dt*(MYFLOAT)5.e-1f;
138 160 equemene
    x3=x0+v2*dt*(MYFLOAT)5.e-1f;
139 133 equemene
    for (int i=0;i<N;i++)
140 121 equemene
    {
141 121 equemene
        if (gid != i)
142 160 equemene
        a3+=Interaction(x3,clDataInX[i]);
143 121 equemene
    }
144 121 equemene

145 160 equemene
    a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
146 160 equemene
    v4=v0+a3*dt;
147 160 equemene
    x4=x0+v3*dt;
148 141 equemene
    for (int i=0;i<N;i++)
149 141 equemene
    {
150 141 equemene
        if (gid != i)
151 160 equemene
        a4+=Interaction(x4,clDataInX[i]);
152 141 equemene
    }
153 141 equemene

154 160 equemene
    xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
155 160 equemene
    vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
156 121 equemene

157 160 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,1.e0f,vf.s0,vf.s1,vf.s2,1.e0f));
158 121 equemene
}
159 121 equemene

160 121 equemene
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
161 121 equemene

162 160 equemene
MYFLOAT8 AtomicHeun(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
163 121 equemene
{
164 160 equemene
    private MYFLOAT4 x,v,a,xi,vi,ai,xf,vf;
165 116 equemene

166 160 equemene
    x=(MYFLOAT4)clDataInX[gid];
167 160 equemene
    v=(MYFLOAT4)clDataInV[gid];
168 151 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
169 141 equemene

170 141 equemene
    for (int i=0;i<get_global_size(0);i++)
171 116 equemene
    {
172 116 equemene
        if (gid != i)
173 160 equemene
        a+=Interaction(x,clDataInX[i]);
174 116 equemene
    }
175 141 equemene

176 141 equemene
    vi=v+dt*a;
177 141 equemene
    xi=x+dt*vi;
178 151 equemene
    ai=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
179 141 equemene

180 141 equemene
    for (int i=0;i<get_global_size(0);i++)
181 116 equemene
    {
182 116 equemene
        if (gid != i)
183 160 equemene
        ai+=Interaction(xi,clDataInX[i]);
184 116 equemene
    }
185 141 equemene

186 160 equemene
    vf=v+dt*(a+ai)/(MYFLOAT)2.e0f;
187 160 equemene
    xf=x+dt*(v+vi)/(MYFLOAT)2.e0f;
188 118 equemene

189 160 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,1.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
190 119 equemene
}
191 119 equemene

192 160 equemene
MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
193 119 equemene
{
194 160 equemene
    private MYFLOAT4 x,v,a,xf,vf;
195 119 equemene

196 160 equemene
    x=(MYFLOAT4)clDataInX[gid];
197 160 equemene
    v=(MYFLOAT4)clDataInV[gid];
198 151 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
199 160 equemene

200 119 equemene
    for (int i=0;i<get_global_size(0);i++)
201 119 equemene
    {
202 119 equemene
        if (gid != i)
203 160 equemene
        a+=Interaction(x,clDataInX[i]);
204 119 equemene
    }
205 133 equemene

206 119 equemene
    vf=v+dt*a;
207 133 equemene
    xf=x+dt*vf;
208 118 equemene

209 160 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,1.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
210 116 equemene
}
211 116 equemene

212 160 equemene
MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
213 140 equemene
{
214 140 equemene
    MYFLOAT4 x,v,a,xf,vf;
215 140 equemene

216 160 equemene
    x=(MYFLOAT4)clDataInX[gid];
217 160 equemene
    v=(MYFLOAT4)clDataInV[gid];
218 151 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
219 160 equemene

220 140 equemene
    for (int i=0;i<get_global_size(0);i++)
221 140 equemene
    {
222 140 equemene
        if (gid != i)
223 160 equemene
        a+=Interaction(x,clDataInX[i]);
224 140 equemene
    }
225 140 equemene

226 140 equemene
    vf=v+dt*a;
227 140 equemene
    xf=x+dt*v;
228 140 equemene

229 160 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,1.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
230 140 equemene
}
231 140 equemene

232 160 equemene
__kernel void SplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box,
233 139 equemene
                             uint seed_z,uint seed_w)
234 116 equemene
{
235 116 equemene
    int gid = get_global_id(0);
236 116 equemene
    uint z=seed_z+(uint)gid;
237 116 equemene
    uint w=seed_w-(uint)gid;
238 133 equemene

239 155 equemene
    MYFLOAT x0=box*(MYFLOAT)(MWCfp-(MYFLOAT)5.e-1f);
240 155 equemene
    MYFLOAT y0=box*(MYFLOAT)(MWCfp-(MYFLOAT)5.e-1f);
241 155 equemene
    MYFLOAT z0=box*(MYFLOAT)(MWCfp-(MYFLOAT)5.e-1f);
242 137 equemene

243 160 equemene
    clDataX[gid].s0123 = (MYFLOAT4) (x0,y0,z0,1.e0f);
244 116 equemene
}
245 116 equemene

246 160 equemene
__kernel void SplutterStress(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
247 139 equemene
{
248 139 equemene
    int gid = get_global_id(0);
249 142 equemene
    MYFLOAT N = (MYFLOAT)get_global_size(0);
250 139 equemene
    uint z=seed_z+(uint)gid;
251 139 equemene
    uint w=seed_w-(uint)gid;
252 139 equemene

253 139 equemene
    if (velocity<SMALL_NUM) {
254 160 equemene
       MYFLOAT4 SpeedVector=(MYFLOAT4)normalize(cross(clDataX[gid],clCoM[0]))*sqrt(-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f);
255 160 equemene
       clDataV[gid]=SpeedVector;
256 139 equemene
    }
257 139 equemene
    else
258 139 equemene
    {
259 155 equemene
       // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
260 142 equemene
       MYFLOAT theta=MWCfp*PI;
261 151 equemene
       MYFLOAT phi=MWCfp*PI*(MYFLOAT)2.e0f;
262 155 equemene
       MYFLOAT sinTheta=sin((float)theta);
263 142 equemene

264 160 equemene
       clDataV[gid].s0=velocity*sinTheta*cos((float)phi);
265 160 equemene
       clDataV[gid].s1=velocity*sinTheta*sin((float)phi);
266 160 equemene
       clDataV[gid].s2=velocity*cos((float)theta);
267 160 equemene
       clDataV[gid].s3=(MYFLOAT)1.e0f;
268 139 equemene
    }
269 139 equemene
}
270 139 equemene

271 160 equemene
__kernel void RungeKutta(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
272 116 equemene
{
273 116 equemene
    int gid = get_global_id(0);
274 116 equemene

275 160 equemene
    MYFLOAT8 clDataGid=AtomicRungeKutta(clDataX,clDataV,gid,h);
276 116 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
277 160 equemene
    clDataX[gid]=clDataGid.lo;
278 160 equemene
    clDataV[gid]=clDataGid.hi;
279 116 equemene
}
280 116 equemene

281 160 equemene
__kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
282 116 equemene
{
283 116 equemene
    int gid = get_global_id(0);
284 116 equemene

285 160 equemene
    MYFLOAT8 clDataGid=AtomicImplicitEuler(clDataX,clDataV,gid,h);
286 116 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
287 160 equemene
    clDataX[gid]=clDataGid.lo;
288 160 equemene
    clDataV[gid]=clDataGid.hi;
289 116 equemene
}
290 133 equemene

291 160 equemene
__kernel void Heun(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
292 141 equemene
{
293 141 equemene
    int gid = get_global_id(0);
294 141 equemene

295 160 equemene
    MYFLOAT8 clDataGid=AtomicHeun(clDataX,clDataV,gid,h);
296 141 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
297 160 equemene
    clDataX[gid]=clDataGid.lo;
298 160 equemene
    clDataV[gid]=clDataGid.hi;
299 141 equemene
}
300 141 equemene

301 160 equemene
__kernel void ExplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
302 140 equemene
{
303 140 equemene
    int gid = get_global_id(0);
304 140 equemene

305 160 equemene
    MYFLOAT8 clDataGid=AtomicExplicitEuler(clDataX,clDataV,gid,h);
306 140 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
307 160 equemene
    clDataX[gid]=clDataGid.lo;
308 160 equemene
    clDataV[gid]=clDataGid.hi;
309 140 equemene
}
310 139 equemene

311 160 equemene
__kernel void CoMPotential(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
312 133 equemene
{
313 133 equemene
    int gid = get_global_id(0);
314 133 equemene

315 160 equemene
    clPotential[gid]=PairPotential(clDataX[gid],clCoM[0]);
316 139 equemene
}
317 139 equemene

318 160 equemene
__kernel void Potential(__global MYFLOAT4* clDataX,__global MYFLOAT* clPotential)
319 139 equemene
{
320 139 equemene
    int gid = get_global_id(0);
321 139 equemene

322 155 equemene
    MYFLOAT potential=(MYFLOAT)0.e0f;
323 160 equemene
    MYFLOAT4 x=clDataX[gid];
324 133 equemene

325 133 equemene
    for (int i=0;i<get_global_size(0);i++)
326 133 equemene
    {
327 133 equemene
        if (gid != i)
328 160 equemene
        potential+=PairPotential(x,clDataX[i]);
329 133 equemene
    }
330 133 equemene

331 133 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
332 155 equemene
    clPotential[gid]=potential*(MYFLOAT)5.e-1f;
333 133 equemene
}
334 133 equemene

335 160 equemene
__kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int Size)
336 139 equemene
{
337 160 equemene
    MYFLOAT4 CoM=clDataX[0];
338 142 equemene

339 139 equemene
    for (int i=1;i<Size;i++)
340 139 equemene
    {
341 160 equemene
        CoM+=clDataX[i];
342 139 equemene
    }
343 142 equemene

344 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
345 160 equemene
    clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,1.e0f)/(MYFLOAT)Size;
346 139 equemene
}
347 139 equemene

348 160 equemene
__kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
349 133 equemene
{
350 133 equemene
    int gid = get_global_id(0);
351 133 equemene

352 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
353 160 equemene
    MYFLOAT d=(MYFLOAT)length(clDataV[gid]);
354 155 equemene
    clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)(d*d);
355 133 equemene
}
356 116 equemene
"""
357 116 equemene
358 133 equemene
def Energy(MyData):
359 155 equemene
    return(sum(MyData*MyData))
360 133 equemene
361 116 equemene
if __name__=='__main__':
362 116 equemene
363 132 equemene
    # ValueType
364 132 equemene
    ValueType='FP32'
365 132 equemene
    class MyFloat(np.float32):pass
366 160 equemene
    #    clType8=cl_array.vec.float8
367 142 equemene
    clType4=cl_array.vec.float4
368 116 equemene
    # Set defaults values
369 118 equemene
    np.set_printoptions(precision=2)
370 116 equemene
    # Id of Device : 1 is for first find !
371 160 equemene
    Device=0
372 116 equemene
    # Iterations is integer
373 160 equemene
    Number=2
374 116 equemene
    # Size of box
375 132 equemene
    SizeOfBox=MyFloat(1.)
376 116 equemene
    # Initial velocity of particules
377 132 equemene
    Velocity=MyFloat(1.)
378 116 equemene
    # Redo the last process
379 160 equemene
    Iterations=int(np.pi*1024)
380 116 equemene
    # Step
381 160 equemene
    Step=MyFloat(1./1024)
382 121 equemene
    # Method of integration
383 150 equemene
    Method='ImplicitEuler'
384 132 equemene
    # InitialRandom
385 132 equemene
    InitialRandom=False
386 132 equemene
    # RNG Marsaglia Method
387 132 equemene
    RNG='MWC'
388 133 equemene
    # CheckEnergies
389 133 equemene
    CheckEnergies=False
390 134 equemene
    # Display samples in 3D
391 139 equemene
    GraphSamples=False
392 139 equemene
    # Viriel Distribution of stress
393 139 equemene
    VirielStress=True
394 132 equemene
395 160 equemene
    HowToUse='%s -h [Help] -r [InitialRandom] -e [VirielStress] -g [GraphSamples] -c [CheckEnergies] -d <DeviceId> -n <NumberOfParticules> -z <SizeOfBox> -v <Velocity> -s <Step> -i <Iterations> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
396 116 equemene
397 116 equemene
    try:
398 139 equemene
        opts, args = getopt.getopt(sys.argv[1:],"rehgcd:n:z:v:i:s:m:t:",["random","viriel","graph","check","device=","number=","size=","velocity=","iterations=","step=","method=","valuetype="])
399 116 equemene
    except getopt.GetoptError:
400 128 equemene
        print(HowToUse % sys.argv[0])
401 116 equemene
        sys.exit(2)
402 116 equemene
403 116 equemene
    for opt, arg in opts:
404 116 equemene
        if opt == '-h':
405 128 equemene
            print(HowToUse % sys.argv[0])
406 116 equemene
407 128 equemene
            print("\nInformations about devices detected under OpenCL:")
408 116 equemene
            try:
409 132 equemene
                Id=0
410 116 equemene
                for platform in cl.get_platforms():
411 116 equemene
                    for device in platform.get_devices():
412 137 equemene
                        #deviceType=cl.device_type.to_string(device.type)
413 149 equemene
                        deviceType="xPU"
414 128 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
415 116 equemene
                        Id=Id+1
416 116 equemene
                sys.exit()
417 116 equemene
            except ImportError:
418 128 equemene
                print("Your platform does not seem to support OpenCL")
419 116 equemene
                sys.exit()
420 116 equemene
421 132 equemene
        elif opt in ("-t", "--valuetype"):
422 132 equemene
            if arg=='FP64':
423 132 equemene
                class MyFloat(np.float64): pass
424 142 equemene
                clType4=cl_array.vec.double4
425 132 equemene
            else:
426 132 equemene
                class MyFloat(np.float32):pass
427 142 equemene
                clType4=cl_array.vec.float4
428 132 equemene
            ValueType = arg
429 116 equemene
        elif opt in ("-d", "--device"):
430 116 equemene
            Device=int(arg)
431 121 equemene
        elif opt in ("-m", "--method"):
432 121 equemene
            Method=arg
433 116 equemene
        elif opt in ("-n", "--number"):
434 116 equemene
            Number=int(arg)
435 116 equemene
        elif opt in ("-z", "--size"):
436 132 equemene
            SizeOfBox=MyFloat(arg)
437 116 equemene
        elif opt in ("-v", "--velocity"):
438 132 equemene
            Velocity=MyFloat(arg)
439 139 equemene
            VirielStress=False
440 116 equemene
        elif opt in ("-s", "--step"):
441 132 equemene
            Step=MyFloat(arg)
442 120 equemene
        elif opt in ("-i", "--iterations"):
443 120 equemene
            Iterations=int(arg)
444 132 equemene
        elif opt in ("-r", "--random"):
445 132 equemene
            InitialRandom=True
446 133 equemene
        elif opt in ("-c", "--check"):
447 133 equemene
            CheckEnergies=True
448 134 equemene
        elif opt in ("-g", "--graph"):
449 134 equemene
            GraphSamples=True
450 139 equemene
        elif opt in ("-e", "--viriel"):
451 139 equemene
            VirielStress=True
452 134 equemene
453 160 equemene
    SizeOfBox=MyFloat(Number*SizeOfBox)
454 132 equemene
    Velocity=MyFloat(Velocity)
455 132 equemene
    Step=MyFloat(Step)
456 132 equemene
457 128 equemene
    print("Device choosed : %s" % Device)
458 128 equemene
    print("Number of particules : %s" % Number)
459 128 equemene
    print("Size of Box : %s" % SizeOfBox)
460 160 equemene
    print("Initial velocity : %s" % Velocity)
461 160 equemene
    print("Number of iterations : %s" % Iterations)
462 160 equemene
    print("Step of iteration : %s" % Step)
463 160 equemene
    print("Method of resolution : %s" % Method)
464 160 equemene
    print("Initial Random for RNG Seed : %s" % InitialRandom)
465 160 equemene
    print("Check for Energies : %s" % CheckEnergies)
466 160 equemene
    print("Graph for Samples : %s" % GraphSamples)
467 160 equemene
    print("ValueType is : %s" % ValueType)
468 139 equemene
    print("Viriel distribution of stress %s" % VirielStress)
469 116 equemene
470 132 equemene
    # Create Numpy array of CL vector with 8 FP32
471 142 equemene
    MyCoM = np.zeros(1,dtype=clType4)
472 160 equemene
    MyDataX = np.zeros(Number, dtype=clType4)
473 160 equemene
    MyDataV = np.zeros(Number, dtype=clType4)
474 133 equemene
    MyPotential = np.zeros(Number, dtype=MyFloat)
475 133 equemene
    MyKinetic = np.zeros(Number, dtype=MyFloat)
476 132 equemene
477 132 equemene
    Marsaglia,Computing=DictionariesAPI()
478 132 equemene
479 132 equemene
    # Scan the OpenCL arrays
480 132 equemene
    Id=0
481 116 equemene
    HasXPU=False
482 116 equemene
    for platform in cl.get_platforms():
483 116 equemene
        for device in platform.get_devices():
484 116 equemene
            if Id==Device:
485 116 equemene
                PlatForm=platform
486 116 equemene
                XPU=device
487 128 equemene
                print("CPU/GPU selected: ",device.name.lstrip())
488 151 equemene
                print("Platform selected: ",platform.name)
489 116 equemene
                HasXPU=True
490 116 equemene
            Id+=1
491 116 equemene
492 116 equemene
    if HasXPU==False:
493 128 equemene
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
494 116 equemene
        sys.exit()
495 116 equemene
496 132 equemene
    # Create Context
497 116 equemene
    try:
498 116 equemene
        ctx = cl.Context([XPU])
499 116 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
500 116 equemene
    except:
501 128 equemene
        print("Crash during context creation")
502 116 equemene
503 132 equemene
    print(Marsaglia[RNG],Computing[ValueType])
504 132 equemene
    # Build all routines used for the computing
505 151 equemene
    #BuildOptions="-DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
506 160 equemene
    #BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
507 160 equemene
    BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
508 160 equemene
509 160 equemene
    if 'Intel' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name :
510 151 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
511 151 equemene
    else:
512 151 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions+" -cl-strict-aliasing")
513 160 equemene
514 116 equemene
    mf = cl.mem_flags
515 160 equemene
    # clDataX = cl.Buffer(ctx, mf.READ_WRITE, MyDataX.nbytes)
516 160 equemene
    # clDataV = cl.Buffer(ctx, mf.READ_WRITE, MyDataV.nbytes)
517 160 equemene
    # clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
518 160 equemene
    # clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
519 160 equemene
    # clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
520 116 equemene
521 160 equemene
    clDataX = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataX)
522 160 equemene
    clDataV = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataV)
523 160 equemene
    clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential)
524 160 equemene
    clKinetic = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyKinetic)
525 160 equemene
    clCoM = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyCoM)
526 160 equemene
527 134 equemene
    print('All particles superimposed.')
528 116 equemene
529 132 equemene
    print(SizeOfBox.dtype)
530 132 equemene
531 132 equemene
    # Set particles to RNG points
532 132 equemene
    if InitialRandom:
533 160 equemene
        MyRoutines.SplutterPoints(queue,(Number,1),None,clDataX,SizeOfBox,np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32)))
534 132 equemene
    else:
535 160 equemene
        MyRoutines.SplutterPoints(queue,(Number,1),None,clDataX,SizeOfBox,np.uint32(110271),np.uint32(250173))
536 116 equemene
537 132 equemene
    print('All particules distributed')
538 139 equemene
539 160 equemene
    CLLaunch=MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
540 160 equemene
    CLLaunch.wait()
541 142 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
542 142 equemene
    print('Center Of Mass: (%s,%s,%s)' % (MyCoM[0][0],MyCoM[0][1],MyCoM[0][2]))
543 139 equemene
544 139 equemene
    if VirielStress:
545 160 equemene
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,MyFloat(0.),np.uint32(110271),np.uint32(250173))
546 139 equemene
    else:
547 160 equemene
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,Velocity,np.uint32(110271),np.uint32(250173))
548 160 equemene
    CLLaunch.wait()
549 139 equemene
550 139 equemene
    if GraphSamples:
551 160 equemene
        cl.enqueue_copy(queue, MyDataX, clDataX)
552 160 equemene
        t0=np.array([[MyDataX[0][0],MyDataX[0][1],MyDataX[0][2]]])
553 160 equemene
        t1=np.array([[MyDataX[1][0],MyDataX[1][1],MyDataX[1][2]]])
554 160 equemene
        tL=np.array([[MyDataX[-1][0],MyDataX[-1][1],MyDataX[-1][2]]])
555 139 equemene
556 160 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
557 160 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
558 133 equemene
    CLLaunch.wait()
559 141 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
560 141 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
561 142 equemene
    print('Viriel=%s Potential=%s Kinetic=%s'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
562 133 equemene
563 134 equemene
    if GraphSamples:
564 160 equemene
        cl.enqueue_copy(queue, MyDataX, clDataX)
565 160 equemene
        t0=np.array([[MyDataX[0][0],MyDataX[0][1],MyDataX[0][2]]])
566 160 equemene
        t1=np.array([[MyDataX[1][0],MyDataX[1][1],MyDataX[1][2]]])
567 160 equemene
        tL=np.array([[MyDataX[-1][0],MyDataX[-1][1],MyDataX[-1][2]]])
568 116 equemene
569 116 equemene
    time_start=time.time()
570 128 equemene
    for i in range(Iterations):
571 160 equemene
        if Method=="RungeKutta":
572 160 equemene
            CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clDataX,clDataV,Step)
573 140 equemene
        elif Method=="ExplicitEuler":
574 160 equemene
            CLLaunch=MyRoutines.ExplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
575 141 equemene
        elif Method=="Heun":
576 160 equemene
            CLLaunch=MyRoutines.Heun(queue,(Number,1),None,clDataX,clDataV,Step)
577 140 equemene
        else:
578 160 equemene
            CLLaunch=MyRoutines.ImplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
579 118 equemene
        CLLaunch.wait()
580 160 equemene
581 133 equemene
        if CheckEnergies:
582 160 equemene
            CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
583 160 equemene
            CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
584 133 equemene
            CLLaunch.wait()
585 133 equemene
            cl.enqueue_copy(queue,MyPotential,clPotential)
586 133 equemene
            cl.enqueue_copy(queue,MyKinetic,clKinetic)
587 151 equemene
            print(np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic))
588 133 equemene
589 139 equemene
            print(MyPotential,MyKinetic)
590 139 equemene
591 134 equemene
        if GraphSamples:
592 160 equemene
            cl.enqueue_copy(queue, MyDataX, clDataX)
593 160 equemene
            t0=np.append(t0,[MyDataX[0][0],MyDataX[0][1],MyDataX[0][2]])
594 160 equemene
            t1=np.append(t1,[MyDataX[1][0],MyDataX[1][1],MyDataX[1][2]])
595 160 equemene
            tL=np.append(tL,[MyDataX[-1][0],MyDataX[-1][1],MyDataX[-1][2]])
596 160 equemene
    print("\nDuration on %s for each %s\n" % (Device,(time.time()-time_start)/Iterations))
597 135 equemene
598 160 equemene
    MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
599 160 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
600 160 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
601 141 equemene
    CLLaunch.wait()
602 160 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
603 141 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
604 141 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
605 160 equemene
    print('Center Of Mass: (%s,%s,%s)' % (MyCoM[0][0],MyCoM[0][1],MyCoM[0][2]))
606 151 equemene
    print('Viriel=%s Potential=%s Kinetic=%s'% (np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
607 141 equemene
608 135 equemene
    if GraphSamples:
609 135 equemene
        t0=np.transpose(np.reshape(t0,(Iterations+1,3)))
610 135 equemene
        t1=np.transpose(np.reshape(t1,(Iterations+1,3)))
611 135 equemene
        tL=np.transpose(np.reshape(tL,(Iterations+1,3)))
612 118 equemene
613 135 equemene
        import matplotlib.pyplot as plt
614 135 equemene
        from mpl_toolkits.mplot3d import Axes3D
615 119 equemene
616 135 equemene
        fig = plt.figure()
617 135 equemene
        ax = fig.gca(projection='3d')
618 135 equemene
        ax.scatter(t0[0],t0[1],t0[2], marker='^',color='blue')
619 135 equemene
        ax.scatter(t1[0],t1[1],t1[2], marker='o',color='red')
620 135 equemene
        ax.scatter(tL[0],tL[1],tL[2], marker='D',color='green')
621 135 equemene
622 135 equemene
        ax.set_xlabel('X Label')
623 135 equemene
        ax.set_ylabel('Y Label')
624 135 equemene
        ax.set_zlabel('Z Label')
625 135 equemene
626 135 equemene
        plt.show()
627 119 equemene
628 160 equemene
    clDataX.release()
629 160 equemene
    clDataV.release()
630 133 equemene
    clKinetic.release()
631 133 equemene
    clPotential.release()