Statistiques
| Révision :

root / NBody / NBody.py @ 154

Historique | Voir | Annoter | Télécharger (19,03 ko)

1 128 equemene
#!/usr/bin/env python3
2 136 equemene
# -*- coding: utf-8 -*-
3 116 equemene
"""
4 116 equemene
Demonstrateur OpenCL d'interaction NCorps
5 116 equemene

6 116 equemene
Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> CeCILLv2
7 116 equemene
"""
8 116 equemene
import getopt
9 116 equemene
import sys
10 116 equemene
import time
11 116 equemene
import numpy as np
12 116 equemene
import pyopencl as cl
13 116 equemene
import pyopencl.array as cl_array
14 116 equemene
from numpy.random import randint as nprnd
15 116 equemene
16 132 equemene
def DictionariesAPI():
17 132 equemene
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
18 132 equemene
    Computing={'FP32':0,'FP64':1}
19 132 equemene
    return(Marsaglia,Computing)
20 132 equemene
21 142 equemene
BlobOpenCL= """
22 116 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
23 116 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
24 116 equemene
#define MWC   (znew+wnew)
25 116 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
26 116 equemene
#define CONG  (jcong=69069*jcong+1234567)
27 116 equemene
#define KISS  ((MWC^CONG)+SHR3)
28 116 equemene

29 151 equemene
#define MWCfp MWC * 2.3283064365386963e-10f
30 151 equemene
#define KISSfp KISS * 2.3283064365386963e-10f
31 151 equemene
#define SHR3fp SHR3 * 2.3283064365386963e-10f
32 151 equemene
#define CONGfp CONG * 2.3283064365386963e-10f
33 116 equemene

34 132 equemene
#define TFP32 0
35 132 equemene
#define TFP64 1
36 132 equemene

37 151 equemene
#define LENGTH 1.e0f
38 116 equemene

39 151 equemene
#define PI 3.141592653589793238462643197169399375105820974944592307816406286e0f
40 151 equemene
// #define PI M_PI
41 116 equemene

42 151 equemene
#define SMALL_NUM 1.e-9f
43 116 equemene

44 132 equemene
#if TYPE == TFP32
45 132 equemene
#define MYFLOAT4 float4
46 132 equemene
#define MYFLOAT8 float8
47 132 equemene
#define MYFLOAT float
48 151 equemene
#define DISTANCE fast_distance
49 132 equemene
#else
50 135 equemene
#pragma OPENCL EXTENSION cl_khr_fp64: enable
51 132 equemene
#define MYFLOAT4 double4
52 132 equemene
#define MYFLOAT8 double8
53 132 equemene
#define MYFLOAT double
54 151 equemene
#define DISTANCE distance
55 132 equemene
#endif
56 132 equemene

57 132 equemene
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
58 116 equemene
{
59 151 equemene
    return((n-m)/pow(DISTANCE(n,m),3));
60 116 equemene
}
61 116 equemene

62 141 equemene
MYFLOAT4 InteractionCore(MYFLOAT4 m,MYFLOAT4 n)
63 141 equemene
{
64 151 equemene
    MYFLOAT core=1.e5f;
65 151 equemene
    MYFLOAT r=DISTANCE(n,m);
66 141 equemene

67 141 equemene
    return(core*(n-m)/(MYFLOAT)(pow(r*r+core*core,2)));
68 141 equemene
}
69 141 equemene

70 133 equemene
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
71 133 equemene
{
72 151 equemene
    return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
73 133 equemene
}
74 133 equemene

75 139 equemene
MYFLOAT AtomicPotential(__global MYFLOAT8* clData,int gid)
76 139 equemene
{
77 151 equemene
    MYFLOAT potential=0.e0f;
78 139 equemene
    MYFLOAT4 x=clData[gid].lo;
79 139 equemene

80 139 equemene
    for (int i=0;i<get_global_size(0);i++)
81 139 equemene
    {
82 139 equemene
        if (gid != i)
83 139 equemene
        potential+=PairPotential(x,clData[i].lo);
84 139 equemene
    }
85 133 equemene

86 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
87 141 equemene
    return(potential);
88 139 equemene
}
89 139 equemene

90 142 equemene
MYFLOAT AtomicPotentialCoM(__global MYFLOAT8* clData,__global MYFLOAT4* clCoM,int gid)
91 139 equemene
{
92 142 equemene
    return(PairPotential(clData[gid].lo,clCoM[0]));
93 139 equemene
}
94 139 equemene

95 132 equemene
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT8* clDataIn,int gid,MYFLOAT dt)
96 116 equemene
{
97 151 equemene
    MYFLOAT4 a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
98 141 equemene
    MYFLOAT4 v0=(MYFLOAT4)clDataIn[gid].hi;
99 133 equemene
    MYFLOAT4 x0=(MYFLOAT4)clDataIn[gid].lo;
100 133 equemene
    int N = get_global_size(0);
101 133 equemene

102 133 equemene
    for (int i=0;i<N;i++)
103 121 equemene
    {
104 121 equemene
        if (gid != i)
105 121 equemene
        a0+=Interaction(x0,clDataIn[i].lo);
106 121 equemene
    }
107 121 equemene

108 151 equemene
    MYFLOAT4 a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
109 141 equemene
    MYFLOAT4 v1=v0+a0*dt;
110 141 equemene
    MYFLOAT4 x1=x0+v0*dt;
111 133 equemene
    for (int i=0;i<N;i++)
112 121 equemene
    {
113 121 equemene
        if (gid != i)
114 121 equemene
        a1+=Interaction(x1,clDataIn[i].lo);
115 121 equemene
    }
116 121 equemene

117 151 equemene
    MYFLOAT4 a2=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
118 151 equemene
    MYFLOAT4 v2=v0+a1*dt*(MYFLOAT)5.e-1f;
119 151 equemene
    MYFLOAT4 x2=x0+v1*dt*(MYFLOAT)5.e-1f;
120 133 equemene
    for (int i=0;i<N;i++)
121 121 equemene
    {
122 121 equemene
        if (gid != i)
123 121 equemene
        a2+=Interaction(x2,clDataIn[i].lo);
124 121 equemene
    }
125 121 equemene

126 151 equemene
    MYFLOAT4 a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
127 151 equemene
    MYFLOAT4 v3=v0+a2*dt*(MYFLOAT)5.e-1f;
128 151 equemene
    MYFLOAT4 x3=x0+v2*dt*(MYFLOAT)5.e-1f;
129 133 equemene
    for (int i=0;i<N;i++)
130 121 equemene
    {
131 121 equemene
        if (gid != i)
132 121 equemene
        a3+=Interaction(x3,clDataIn[i].lo);
133 121 equemene
    }
134 121 equemene

135 151 equemene
    MYFLOAT4 a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
136 141 equemene
    MYFLOAT4 v4=v0+a3*dt;
137 141 equemene
    MYFLOAT4 x4=x0+v3*dt;
138 141 equemene
    for (int i=0;i<N;i++)
139 141 equemene
    {
140 141 equemene
        if (gid != i)
141 141 equemene
        a4+=Interaction(x4,clDataIn[i].lo);
142 141 equemene
    }
143 141 equemene

144 151 equemene
    MYFLOAT4 xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
145 151 equemene
    MYFLOAT4 vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
146 121 equemene

147 151 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
148 121 equemene
}
149 121 equemene

150 121 equemene
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
151 121 equemene

152 141 equemene
MYFLOAT8 AtomicHeun(__global MYFLOAT8* clDataIn,int gid,MYFLOAT dt)
153 121 equemene
{
154 141 equemene
    MYFLOAT4 x,v,a,xi,vi,ai,xf,vf;
155 116 equemene

156 141 equemene
    x=(MYFLOAT4)clDataIn[gid].lo;
157 141 equemene
    v=(MYFLOAT4)clDataIn[gid].hi;
158 151 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
159 141 equemene

160 141 equemene
    for (int i=0;i<get_global_size(0);i++)
161 116 equemene
    {
162 116 equemene
        if (gid != i)
163 141 equemene
        a+=Interaction(x,clDataIn[i].lo);
164 116 equemene
    }
165 141 equemene

166 141 equemene
    vi=v+dt*a;
167 141 equemene
    xi=x+dt*vi;
168 151 equemene
    ai=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
169 141 equemene

170 141 equemene
    for (int i=0;i<get_global_size(0);i++)
171 116 equemene
    {
172 116 equemene
        if (gid != i)
173 141 equemene
        ai+=Interaction(xi,clDataIn[i].lo);
174 116 equemene
    }
175 141 equemene

176 151 equemene
    vf=v+dt*(a+ai)*5.e-1f;
177 151 equemene
    xf=x+dt*(v+vi)*5.e-1f;
178 118 equemene

179 151 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
180 119 equemene
}
181 119 equemene

182 140 equemene
MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT8* clDataIn,int gid,MYFLOAT dt)
183 119 equemene
{
184 132 equemene
    MYFLOAT4 x,v,a,xf,vf;
185 119 equemene

186 137 equemene
    x=(MYFLOAT4)clDataIn[gid].lo;
187 137 equemene
    v=(MYFLOAT4)clDataIn[gid].hi;
188 151 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
189 119 equemene
    for (int i=0;i<get_global_size(0);i++)
190 119 equemene
    {
191 119 equemene
        if (gid != i)
192 119 equemene
        a+=Interaction(x,clDataIn[i].lo);
193 119 equemene
    }
194 133 equemene

195 119 equemene
    vf=v+dt*a;
196 133 equemene
    xf=x+dt*vf;
197 118 equemene

198 151 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
199 116 equemene
}
200 116 equemene

201 140 equemene
MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT8* clDataIn,int gid,MYFLOAT dt)
202 140 equemene
{
203 140 equemene
    MYFLOAT4 x,v,a,xf,vf;
204 140 equemene

205 140 equemene
    x=(MYFLOAT4)clDataIn[gid].lo;
206 140 equemene
    v=(MYFLOAT4)clDataIn[gid].hi;
207 151 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
208 140 equemene
    for (int i=0;i<get_global_size(0);i++)
209 140 equemene
    {
210 140 equemene
        if (gid != i)
211 140 equemene
        a+=Interaction(x,clDataIn[i].lo);
212 140 equemene
    }
213 140 equemene

214 140 equemene
    vf=v+dt*a;
215 140 equemene
    xf=x+dt*v;
216 140 equemene

217 151 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
218 140 equemene
}
219 140 equemene

220 139 equemene
__kernel void SplutterPoints(__global MYFLOAT8* clData, MYFLOAT box,
221 139 equemene
                             uint seed_z,uint seed_w)
222 116 equemene
{
223 116 equemene
    int gid = get_global_id(0);
224 116 equemene
    uint z=seed_z+(uint)gid;
225 116 equemene
    uint w=seed_w-(uint)gid;
226 133 equemene

227 151 equemene
    MYFLOAT x0=box*(MYFLOAT)(MWCfp-5.e-1f);
228 151 equemene
    MYFLOAT y0=box*(MYFLOAT)(MWCfp-5.e-1f);
229 151 equemene
    MYFLOAT z0=box*(MYFLOAT)(MWCfp-5.e-1f);
230 137 equemene

231 151 equemene
    clData[gid].s01234567 = (MYFLOAT8) (x0,y0,z0,0.e0f,0.e0f,0.e0f,0.e0f,0.e0f);
232 116 equemene
}
233 116 equemene

234 142 equemene
__kernel void SplutterStress(__global MYFLOAT8* clData,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
235 139 equemene
{
236 139 equemene
    int gid = get_global_id(0);
237 142 equemene
    MYFLOAT N = (MYFLOAT)get_global_size(0);
238 139 equemene
    uint z=seed_z+(uint)gid;
239 139 equemene
    uint w=seed_w-(uint)gid;
240 139 equemene

241 139 equemene
    if (velocity<SMALL_NUM) {
242 151 equemene
       MYFLOAT4 SpeedVector=(MYFLOAT4)normalize(cross(clData[gid].lo,clCoM[0]))*sqrt(-AtomicPotential(clData,gid)*5.e-1f);
243 139 equemene
       clData[gid].hi=SpeedVector;
244 139 equemene
    }
245 139 equemene
    else
246 139 equemene
    {
247 142 equemene
       MYFLOAT theta=MWCfp*PI;
248 151 equemene
       MYFLOAT phi=MWCfp*PI*(MYFLOAT)2.e0f;
249 142 equemene
       MYFLOAT sinTheta=sin(theta);
250 142 equemene

251 139 equemene
       clData[gid].s4=velocity*sinTheta*cos(phi);
252 139 equemene
       clData[gid].s5=velocity*sinTheta*sin(phi);
253 139 equemene
       clData[gid].s6=velocity*cos(theta);
254 139 equemene
    }
255 139 equemene
}
256 139 equemene

257 132 equemene
__kernel void RungeKutta(__global MYFLOAT8* clData,MYFLOAT h)
258 116 equemene
{
259 116 equemene
    int gid = get_global_id(0);
260 116 equemene

261 132 equemene
    MYFLOAT8 clDataGid=AtomicRungeKutta(clData,gid,h);
262 116 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
263 121 equemene
    clData[gid]=clDataGid;
264 116 equemene
}
265 116 equemene

266 140 equemene
__kernel void ImplicitEuler(__global MYFLOAT8* clData,MYFLOAT h)
267 116 equemene
{
268 116 equemene
    int gid = get_global_id(0);
269 116 equemene

270 140 equemene
    MYFLOAT8 clDataGid=AtomicImplicitEuler(clData,gid,h);
271 116 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
272 121 equemene
    clData[gid]=clDataGid;
273 116 equemene
}
274 133 equemene

275 141 equemene
__kernel void Heun(__global MYFLOAT8* clData,MYFLOAT h)
276 141 equemene
{
277 141 equemene
    int gid = get_global_id(0);
278 141 equemene

279 141 equemene
    MYFLOAT8 clDataGid=AtomicHeun(clData,gid,h);
280 141 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
281 141 equemene
    clData[gid]=clDataGid;
282 141 equemene
}
283 141 equemene

284 140 equemene
__kernel void ExplicitEuler(__global MYFLOAT8* clData,MYFLOAT h)
285 140 equemene
{
286 140 equemene
    int gid = get_global_id(0);
287 140 equemene

288 140 equemene
    MYFLOAT8 clDataGid=AtomicExplicitEuler(clData,gid,h);
289 140 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
290 140 equemene
    clData[gid]=clDataGid;
291 140 equemene
}
292 139 equemene

293 142 equemene
__kernel void CoMPotential(__global MYFLOAT8* clData,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
294 133 equemene
{
295 133 equemene
    int gid = get_global_id(0);
296 133 equemene

297 142 equemene
    clPotential[gid]=PairPotential(clData[gid].lo,clCoM[0]);
298 139 equemene
}
299 139 equemene

300 142 equemene
__kernel void Potential(__global MYFLOAT8* clData,__global MYFLOAT* clPotential)
301 139 equemene
{
302 139 equemene
    int gid = get_global_id(0);
303 139 equemene

304 151 equemene
    MYFLOAT potential=0.e0f;
305 133 equemene
    MYFLOAT4 x=clData[gid].lo;
306 133 equemene

307 133 equemene
    for (int i=0;i<get_global_size(0);i++)
308 133 equemene
    {
309 133 equemene
        if (gid != i)
310 133 equemene
        potential+=PairPotential(x,clData[i].lo);
311 133 equemene
    }
312 133 equemene

313 133 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
314 151 equemene
    clPotential[gid]=(MYFLOAT)5.e-1f*potential;
315 133 equemene
}
316 133 equemene

317 142 equemene
__kernel void CenterOfMass(__global MYFLOAT8* clData,__global MYFLOAT4* clCoM,int Size)
318 139 equemene
{
319 139 equemene
    MYFLOAT4 CoM=clData[0].lo;
320 142 equemene

321 139 equemene
    for (int i=1;i<Size;i++)
322 139 equemene
    {
323 139 equemene
        CoM+=clData[i].lo;
324 139 equemene
    }
325 142 equemene

326 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
327 151 equemene
    clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,0.e0f)/(MYFLOAT)Size;
328 139 equemene
}
329 139 equemene

330 133 equemene
__kernel void Kinetic(__global MYFLOAT8* clData,__global MYFLOAT* clKinetic)
331 133 equemene
{
332 133 equemene
    int gid = get_global_id(0);
333 133 equemene

334 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
335 151 equemene
    clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)pow(length(clData[gid].hi),2);
336 133 equemene
}
337 116 equemene
"""
338 116 equemene
339 133 equemene
def Energy(MyData):
340 133 equemene
    return(sum(pow(MyData,2)))
341 133 equemene
342 116 equemene
if __name__=='__main__':
343 116 equemene
344 132 equemene
    # ValueType
345 132 equemene
    ValueType='FP32'
346 132 equemene
    class MyFloat(np.float32):pass
347 142 equemene
    clType8=cl_array.vec.float8
348 142 equemene
    clType4=cl_array.vec.float4
349 116 equemene
    # Set defaults values
350 118 equemene
    np.set_printoptions(precision=2)
351 116 equemene
    # Id of Device : 1 is for first find !
352 120 equemene
    Device=1
353 116 equemene
    # Iterations is integer
354 133 equemene
    Number=4
355 116 equemene
    # Size of box
356 132 equemene
    SizeOfBox=MyFloat(1.)
357 116 equemene
    # Initial velocity of particules
358 132 equemene
    Velocity=MyFloat(1.)
359 116 equemene
    # Redo the last process
360 133 equemene
    Iterations=100
361 116 equemene
    # Step
362 133 equemene
    Step=MyFloat(0.01)
363 121 equemene
    # Method of integration
364 150 equemene
    Method='ImplicitEuler'
365 132 equemene
    # InitialRandom
366 132 equemene
    InitialRandom=False
367 132 equemene
    # RNG Marsaglia Method
368 132 equemene
    RNG='MWC'
369 133 equemene
    # CheckEnergies
370 133 equemene
    CheckEnergies=False
371 134 equemene
    # Display samples in 3D
372 139 equemene
    GraphSamples=False
373 139 equemene
    # Viriel Distribution of stress
374 139 equemene
    VirielStress=True
375 132 equemene
376 142 equemene
    HowToUse='%s -h [Help] -r [InitialRandom] -e [VirielStress] -g [GraphSamples] -c [CheckEnergies] -d <DeviceId> -n <NumberOfParticules> -z <SizeOfBox> -v <Velocity> -s <Step> -i <Iterations> -m <RungeKutta|ImplicitEuler|ExplicitEuler|Heun> -t <FP32|FP64>'
377 116 equemene
378 116 equemene
    try:
379 139 equemene
        opts, args = getopt.getopt(sys.argv[1:],"rehgcd:n:z:v:i:s:m:t:",["random","viriel","graph","check","device=","number=","size=","velocity=","iterations=","step=","method=","valuetype="])
380 116 equemene
    except getopt.GetoptError:
381 128 equemene
        print(HowToUse % sys.argv[0])
382 116 equemene
        sys.exit(2)
383 116 equemene
384 116 equemene
    for opt, arg in opts:
385 116 equemene
        if opt == '-h':
386 128 equemene
            print(HowToUse % sys.argv[0])
387 116 equemene
388 128 equemene
            print("\nInformations about devices detected under OpenCL:")
389 116 equemene
            try:
390 132 equemene
                Id=0
391 116 equemene
                for platform in cl.get_platforms():
392 116 equemene
                    for device in platform.get_devices():
393 137 equemene
                        #deviceType=cl.device_type.to_string(device.type)
394 149 equemene
                        deviceType="xPU"
395 128 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
396 116 equemene
                        Id=Id+1
397 116 equemene
                sys.exit()
398 116 equemene
            except ImportError:
399 128 equemene
                print("Your platform does not seem to support OpenCL")
400 116 equemene
                sys.exit()
401 116 equemene
402 132 equemene
        elif opt in ("-t", "--valuetype"):
403 132 equemene
            if arg=='FP64':
404 132 equemene
                class MyFloat(np.float64): pass
405 142 equemene
                clType8=cl_array.vec.double8
406 142 equemene
                clType4=cl_array.vec.double4
407 132 equemene
            else:
408 132 equemene
                class MyFloat(np.float32):pass
409 142 equemene
                clType8=cl_array.vec.float8
410 142 equemene
                clType4=cl_array.vec.float4
411 132 equemene
            ValueType = arg
412 116 equemene
        elif opt in ("-d", "--device"):
413 116 equemene
            Device=int(arg)
414 121 equemene
        elif opt in ("-m", "--method"):
415 121 equemene
            Method=arg
416 116 equemene
        elif opt in ("-n", "--number"):
417 116 equemene
            Number=int(arg)
418 116 equemene
        elif opt in ("-z", "--size"):
419 132 equemene
            SizeOfBox=MyFloat(arg)
420 116 equemene
        elif opt in ("-v", "--velocity"):
421 132 equemene
            Velocity=MyFloat(arg)
422 139 equemene
            VirielStress=False
423 116 equemene
        elif opt in ("-s", "--step"):
424 132 equemene
            Step=MyFloat(arg)
425 120 equemene
        elif opt in ("-i", "--iterations"):
426 120 equemene
            Iterations=int(arg)
427 132 equemene
        elif opt in ("-r", "--random"):
428 132 equemene
            InitialRandom=True
429 133 equemene
        elif opt in ("-c", "--check"):
430 133 equemene
            CheckEnergies=True
431 134 equemene
        elif opt in ("-g", "--graph"):
432 134 equemene
            GraphSamples=True
433 139 equemene
        elif opt in ("-e", "--viriel"):
434 139 equemene
            VirielStress=True
435 134 equemene
436 141 equemene
    SizeOfBox=MyFloat(np.power(Number,1./3.)*SizeOfBox)
437 132 equemene
    Velocity=MyFloat(Velocity)
438 132 equemene
    Step=MyFloat(Step)
439 132 equemene
440 128 equemene
    print("Device choosed : %s" % Device)
441 128 equemene
    print("Number of particules : %s" % Number)
442 128 equemene
    print("Size of Box : %s" % SizeOfBox)
443 139 equemene
    print("Initial velocity %s" % Velocity)
444 139 equemene
    print("Number of iterations %s" % Iterations)
445 139 equemene
    print("Step of iteration %s" % Step)
446 139 equemene
    print("Method of resolution %s" % Method)
447 139 equemene
    print("Initial Random for RNG Seed %s" % InitialRandom)
448 139 equemene
    print("Check for Energies %s" % CheckEnergies)
449 139 equemene
    print("Graph for Samples %s" % GraphSamples)
450 139 equemene
    print("ValueType is %s" % ValueType)
451 139 equemene
    print("Viriel distribution of stress %s" % VirielStress)
452 116 equemene
453 132 equemene
    # Create Numpy array of CL vector with 8 FP32
454 142 equemene
    MyCoM = np.zeros(1,dtype=clType4)
455 142 equemene
    MyData = np.zeros(Number, dtype=clType8)
456 133 equemene
    MyPotential = np.zeros(Number, dtype=MyFloat)
457 133 equemene
    MyKinetic = np.zeros(Number, dtype=MyFloat)
458 132 equemene
459 132 equemene
    Marsaglia,Computing=DictionariesAPI()
460 132 equemene
461 132 equemene
    # Scan the OpenCL arrays
462 132 equemene
    Id=0
463 116 equemene
    HasXPU=False
464 116 equemene
    for platform in cl.get_platforms():
465 116 equemene
        for device in platform.get_devices():
466 116 equemene
            if Id==Device:
467 116 equemene
                PlatForm=platform
468 116 equemene
                XPU=device
469 128 equemene
                print("CPU/GPU selected: ",device.name.lstrip())
470 151 equemene
                print("Platform selected: ",platform.name)
471 116 equemene
                HasXPU=True
472 116 equemene
            Id+=1
473 116 equemene
474 116 equemene
    if HasXPU==False:
475 128 equemene
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
476 116 equemene
        sys.exit()
477 116 equemene
478 132 equemene
    # Create Context
479 116 equemene
    try:
480 116 equemene
        ctx = cl.Context([XPU])
481 116 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
482 116 equemene
    except:
483 128 equemene
        print("Crash during context creation")
484 116 equemene
485 132 equemene
    print(Marsaglia[RNG],Computing[ValueType])
486 132 equemene
    # Build all routines used for the computing
487 151 equemene
    #BuildOptions="-DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
488 151 equemene
    #BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
489 151 equemene
    BuildOptions="-cl-mad-enable -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
490 151 equemene
    if 'Intel' in PlatForm.name:
491 151 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
492 151 equemene
    else:
493 151 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions+" -cl-strict-aliasing")
494 119 equemene
495 116 equemene
    mf = cl.mem_flags
496 119 equemene
    clData = cl.Buffer(ctx, mf.READ_WRITE, MyData.nbytes)
497 133 equemene
    clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
498 133 equemene
    clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
499 139 equemene
    clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
500 119 equemene
    #clData = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyData)
501 116 equemene
502 134 equemene
    print('All particles superimposed.')
503 116 equemene
504 132 equemene
    print(SizeOfBox.dtype)
505 132 equemene
506 132 equemene
    # Set particles to RNG points
507 132 equemene
    if InitialRandom:
508 139 equemene
        MyRoutines.SplutterPoints(queue,(Number,1),None,clData,SizeOfBox,np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32)))
509 132 equemene
    else:
510 139 equemene
        MyRoutines.SplutterPoints(queue,(Number,1),None,clData,SizeOfBox,np.uint32(110271),np.uint32(250173))
511 116 equemene
512 132 equemene
    print('All particules distributed')
513 139 equemene
514 139 equemene
    MyRoutines.CenterOfMass(queue,(1,1),None,clData,clCoM,np.int32(Number))
515 142 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
516 142 equemene
    print('Center Of Mass: (%s,%s,%s)' % (MyCoM[0][0],MyCoM[0][1],MyCoM[0][2]))
517 139 equemene
518 139 equemene
    if VirielStress:
519 150 equemene
        MyRoutines.SplutterStress(queue,(Number,1),None,clData,clCoM,MyFloat(0.),np.uint32(110271),np.uint32(250173))
520 139 equemene
    else:
521 139 equemene
        MyRoutines.SplutterStress(queue,(Number,1),None,clData,clCoM,Velocity,np.uint32(110271),np.uint32(250173))
522 139 equemene
523 139 equemene
    if GraphSamples:
524 139 equemene
        cl.enqueue_copy(queue, MyData, clData)
525 139 equemene
        t0=np.array([[MyData[0][0],MyData[0][1],MyData[0][2]]])
526 139 equemene
        t1=np.array([[MyData[1][0],MyData[1][1],MyData[1][2]]])
527 139 equemene
        tL=np.array([[MyData[-1][0],MyData[-1][1],MyData[-1][2]]])
528 139 equemene
        s0=np.array([[MyData[0][4],MyData[0][5],MyData[0][6],MyData[0][7]]])
529 139 equemene
        s1=np.array([[MyData[1][4],MyData[1][5],MyData[1][6],MyData[1][7]]])
530 139 equemene
        sL=np.array([[MyData[-1][4],MyData[-1][5],MyData[-1][6],MyData[-1][7]]])
531 139 equemene
532 141 equemene
        #print(t0,t1,tL)
533 141 equemene
        #print(s0,s1,sL)
534 139 equemene
535 142 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clData,clPotential)
536 141 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clData,clKinetic)
537 133 equemene
    CLLaunch.wait()
538 141 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
539 141 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
540 142 equemene
    print('Viriel=%s Potential=%s Kinetic=%s'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
541 133 equemene
542 134 equemene
    if GraphSamples:
543 134 equemene
        cl.enqueue_copy(queue, MyData, clData)
544 134 equemene
        t0=np.array([[MyData[0][0],MyData[0][1],MyData[0][2]]])
545 134 equemene
        t1=np.array([[MyData[1][0],MyData[1][1],MyData[1][2]]])
546 134 equemene
        tL=np.array([[MyData[-1][0],MyData[-1][1],MyData[-1][2]]])
547 116 equemene
548 116 equemene
    time_start=time.time()
549 128 equemene
    for i in range(Iterations):
550 140 equemene
        if Method=="ImplicitEuler":
551 140 equemene
            CLLaunch=MyRoutines.ImplicitEuler(queue,(Number,1),None,clData,Step)
552 140 equemene
        elif Method=="ExplicitEuler":
553 140 equemene
            CLLaunch=MyRoutines.ExplicitEuler(queue,(Number,1),None,clData,Step)
554 141 equemene
        elif Method=="Heun":
555 141 equemene
            CLLaunch=MyRoutines.Heun(queue,(Number,1),None,clData,Step)
556 140 equemene
        else:
557 132 equemene
            CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clData,Step)
558 118 equemene
        CLLaunch.wait()
559 133 equemene
        if CheckEnergies:
560 142 equemene
            CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clData,clPotential)
561 141 equemene
            CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clData,clKinetic)
562 133 equemene
            CLLaunch.wait()
563 133 equemene
            cl.enqueue_copy(queue,MyPotential,clPotential)
564 133 equemene
            cl.enqueue_copy(queue,MyKinetic,clKinetic)
565 151 equemene
            print(np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic))
566 133 equemene
567 139 equemene
            print(MyPotential,MyKinetic)
568 139 equemene
569 134 equemene
        if GraphSamples:
570 134 equemene
            cl.enqueue_copy(queue, MyData, clData)
571 134 equemene
            t0=np.append(t0,[MyData[0][0],MyData[0][1],MyData[0][2]])
572 134 equemene
            t1=np.append(t1,[MyData[1][0],MyData[1][1],MyData[1][2]])
573 134 equemene
            tL=np.append(tL,[MyData[-1][0],MyData[-1][1],MyData[-1][2]])
574 128 equemene
    print("\nDuration on %s for each %s" % (Device,(time.time()-time_start)/Iterations))
575 135 equemene
576 142 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clData,clPotential)
577 141 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clData,clKinetic)
578 141 equemene
    CLLaunch.wait()
579 141 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
580 141 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
581 151 equemene
    print('Viriel=%s Potential=%s Kinetic=%s'% (np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
582 142 equemene
    MyRoutines.CenterOfMass(queue,(1,1),None,clData,clCoM,np.int32(Number))
583 142 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
584 142 equemene
    print('Center Of Mass: (%s,%s,%s)' % (MyCoM[0][0],MyCoM[0][1],MyCoM[0][2]))
585 141 equemene
586 135 equemene
    if GraphSamples:
587 135 equemene
        t0=np.transpose(np.reshape(t0,(Iterations+1,3)))
588 135 equemene
        t1=np.transpose(np.reshape(t1,(Iterations+1,3)))
589 135 equemene
        tL=np.transpose(np.reshape(tL,(Iterations+1,3)))
590 118 equemene
591 135 equemene
        import matplotlib.pyplot as plt
592 135 equemene
        from mpl_toolkits.mplot3d import Axes3D
593 119 equemene
594 135 equemene
        fig = plt.figure()
595 135 equemene
        ax = fig.gca(projection='3d')
596 135 equemene
        ax.scatter(t0[0],t0[1],t0[2], marker='^',color='blue')
597 135 equemene
        ax.scatter(t1[0],t1[1],t1[2], marker='o',color='red')
598 135 equemene
        ax.scatter(tL[0],tL[1],tL[2], marker='D',color='green')
599 135 equemene
600 135 equemene
        ax.set_xlabel('X Label')
601 135 equemene
        ax.set_ylabel('Y Label')
602 135 equemene
        ax.set_zlabel('Z Label')
603 135 equemene
604 135 equemene
        plt.show()
605 119 equemene
606 119 equemene
    clData.release()
607 133 equemene
    clKinetic.release()
608 133 equemene
    clPotential.release()