Statistiques
| Révision :

root / NBody / NBody.py @ 149

Historique | Voir | Annoter | Télécharger (18,63 ko)

1 128 equemene
#!/usr/bin/env python3
2 136 equemene
# -*- coding: utf-8 -*-
3 116 equemene
"""
4 116 equemene
Demonstrateur OpenCL d'interaction NCorps
5 116 equemene

6 116 equemene
Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> CeCILLv2
7 116 equemene
"""
8 116 equemene
import getopt
9 116 equemene
import sys
10 116 equemene
import time
11 116 equemene
import numpy as np
12 116 equemene
import pyopencl as cl
13 116 equemene
import pyopencl.array as cl_array
14 116 equemene
from numpy.random import randint as nprnd
15 116 equemene
16 132 equemene
def DictionariesAPI():
17 132 equemene
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
18 132 equemene
    Computing={'FP32':0,'FP64':1}
19 132 equemene
    return(Marsaglia,Computing)
20 132 equemene
21 137 equemene
22 137 equemene
23 137 equemene
24 137 equemene
25 137 equemene
26 137 equemene
27 137 equemene
28 137 equemene
29 137 equemene
30 142 equemene
31 142 equemene
BlobOpenCL= """
32 116 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
33 116 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
34 116 equemene
#define MWC   (znew+wnew)
35 116 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
36 116 equemene
#define CONG  (jcong=69069*jcong+1234567)
37 116 equemene
#define KISS  ((MWC^CONG)+SHR3)
38 116 equemene

39 116 equemene
#define MWCfp MWC * 2.328306435454494e-10f
40 116 equemene
#define KISSfp KISS * 2.328306435454494e-10f
41 116 equemene
#define SHR3fp SHR3 * 2.328306435454494e-10f
42 116 equemene
#define CONGfp CONG * 2.328306435454494e-10f
43 116 equemene

44 132 equemene
#define TFP32 0
45 132 equemene
#define TFP64 1
46 132 equemene

47 137 equemene
#define LENGTH 1e0f
48 116 equemene

49 137 equemene
#define PI 3.14159265359e0f
50 116 equemene

51 137 equemene
#define SMALL_NUM 1e-9f
52 116 equemene

53 132 equemene
#if TYPE == TFP32
54 132 equemene
#define MYFLOAT4 float4
55 132 equemene
#define MYFLOAT8 float8
56 132 equemene
#define MYFLOAT float
57 132 equemene
#else
58 135 equemene
#pragma OPENCL EXTENSION cl_khr_fp64: enable
59 132 equemene
#define MYFLOAT4 double4
60 132 equemene
#define MYFLOAT8 double8
61 132 equemene
#define MYFLOAT double
62 132 equemene
#endif
63 132 equemene

64 132 equemene
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
65 116 equemene
{
66 139 equemene
    return((n-m)/pow(distance(n,m),3));
67 116 equemene
}
68 116 equemene

69 141 equemene
MYFLOAT4 InteractionCore(MYFLOAT4 m,MYFLOAT4 n)
70 141 equemene
{
71 141 equemene
    MYFLOAT core=1e5f;
72 141 equemene
    MYFLOAT r=distance(n,m);
73 141 equemene

74 141 equemene
    return(core*(n-m)/(MYFLOAT)(pow(r*r+core*core,2)));
75 141 equemene
}
76 141 equemene

77 133 equemene
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
78 133 equemene
{
79 137 equemene
    return((MYFLOAT)(-1e0f)/(distance(n,m)));
80 133 equemene
}
81 133 equemene

82 139 equemene
MYFLOAT AtomicPotential(__global MYFLOAT8* clData,int gid)
83 139 equemene
{
84 139 equemene
    MYFLOAT potential=0e0f;
85 139 equemene
    MYFLOAT4 x=clData[gid].lo;
86 139 equemene

87 139 equemene
    for (int i=0;i<get_global_size(0);i++)
88 139 equemene
    {
89 139 equemene
        if (gid != i)
90 139 equemene
        potential+=PairPotential(x,clData[i].lo);
91 139 equemene
    }
92 133 equemene

93 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
94 142 equemene
    //return(5e-1f*potential);
95 141 equemene
    return(potential);
96 139 equemene
}
97 139 equemene

98 142 equemene
MYFLOAT AtomicPotentialCoM(__global MYFLOAT8* clData,__global MYFLOAT4* clCoM,int gid)
99 139 equemene
{
100 142 equemene
    return(PairPotential(clData[gid].lo,clCoM[0]));
101 139 equemene
}
102 139 equemene

103 132 equemene
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT8* clDataIn,int gid,MYFLOAT dt)
104 116 equemene
{
105 141 equemene
    MYFLOAT4 a0=(MYFLOAT4)(0e0f,0e0f,0e0f,0e0f);
106 141 equemene
    MYFLOAT4 v0=(MYFLOAT4)clDataIn[gid].hi;
107 133 equemene
    MYFLOAT4 x0=(MYFLOAT4)clDataIn[gid].lo;
108 133 equemene
    int N = get_global_size(0);
109 133 equemene

110 133 equemene
    for (int i=0;i<N;i++)
111 121 equemene
    {
112 121 equemene
        if (gid != i)
113 121 equemene
        a0+=Interaction(x0,clDataIn[i].lo);
114 121 equemene
    }
115 121 equemene

116 139 equemene
    MYFLOAT4 a1=(MYFLOAT4)(0e0f,0e0f,0e0f,0e0f);
117 141 equemene
    MYFLOAT4 v1=v0+a0*dt;
118 141 equemene
    MYFLOAT4 x1=x0+v0*dt;
119 133 equemene
    for (int i=0;i<N;i++)
120 121 equemene
    {
121 121 equemene
        if (gid != i)
122 121 equemene
        a1+=Interaction(x1,clDataIn[i].lo);
123 121 equemene
    }
124 121 equemene

125 141 equemene
    MYFLOAT4 a2=(MYFLOAT4)(0e0f,0e0f,0e0f,0e0f);
126 141 equemene
    MYFLOAT4 v2=v0+a1*dt*(MYFLOAT)5e-1f;
127 137 equemene
    MYFLOAT4 x2=x0+v1*dt*(MYFLOAT)5e-1f;
128 133 equemene
    for (int i=0;i<N;i++)
129 121 equemene
    {
130 121 equemene
        if (gid != i)
131 121 equemene
        a2+=Interaction(x2,clDataIn[i].lo);
132 121 equemene
    }
133 121 equemene

134 139 equemene
    MYFLOAT4 a3=(MYFLOAT4)(0e0f,0e0f,0e0f,0e0f);
135 141 equemene
    MYFLOAT4 v3=v0+a2*dt*(MYFLOAT)5e-1f;
136 141 equemene
    MYFLOAT4 x3=x0+v2*dt*(MYFLOAT)5e-1f;
137 133 equemene
    for (int i=0;i<N;i++)
138 121 equemene
    {
139 121 equemene
        if (gid != i)
140 121 equemene
        a3+=Interaction(x3,clDataIn[i].lo);
141 121 equemene
    }
142 121 equemene

143 141 equemene
    MYFLOAT4 a4=(MYFLOAT4)(0e0f,0e0f,0e0f,0e0f);
144 141 equemene
    MYFLOAT4 v4=v0+a3*dt;
145 141 equemene
    MYFLOAT4 x4=x0+v3*dt;
146 141 equemene
    for (int i=0;i<N;i++)
147 141 equemene
    {
148 141 equemene
        if (gid != i)
149 141 equemene
        a4+=Interaction(x4,clDataIn[i].lo);
150 141 equemene
    }
151 141 equemene

152 141 equemene
    MYFLOAT4 xf=x0+dt*(v1+(MYFLOAT)2e0f*(v2+v3)+v4)/(MYFLOAT)6e0f;
153 141 equemene
    MYFLOAT4 vf=v0+dt*(a1+(MYFLOAT)2e0f*(a2+a3)+a4)/(MYFLOAT)6e0f;
154 121 equemene

155 139 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0e0f,vf.s0,vf.s1,vf.s2,0e0f));
156 121 equemene
}
157 121 equemene

158 121 equemene
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
159 121 equemene

160 141 equemene
MYFLOAT8 AtomicHeun(__global MYFLOAT8* clDataIn,int gid,MYFLOAT dt)
161 121 equemene
{
162 141 equemene
    MYFLOAT4 x,v,a,xi,vi,ai,xf,vf;
163 116 equemene

164 141 equemene
    x=(MYFLOAT4)clDataIn[gid].lo;
165 141 equemene
    v=(MYFLOAT4)clDataIn[gid].hi;
166 141 equemene
    a=(MYFLOAT4)(0e0f,0e0f,0e0f,0e0f);
167 141 equemene

168 141 equemene
    for (int i=0;i<get_global_size(0);i++)
169 116 equemene
    {
170 116 equemene
        if (gid != i)
171 141 equemene
        a+=Interaction(x,clDataIn[i].lo);
172 116 equemene
    }
173 141 equemene

174 141 equemene
    vi=v+dt*a;
175 141 equemene
    xi=x+dt*vi;
176 141 equemene
    ai=(MYFLOAT4)(0e0f,0e0f,0e0f,0e0f);
177 141 equemene

178 141 equemene
    for (int i=0;i<get_global_size(0);i++)
179 116 equemene
    {
180 116 equemene
        if (gid != i)
181 141 equemene
        ai+=Interaction(xi,clDataIn[i].lo);
182 116 equemene
    }
183 141 equemene

184 141 equemene
    vf=v+dt*(a+ai)*5e-1f;
185 141 equemene
    xf=x+dt*(v+vi)*5e-1f;
186 118 equemene

187 139 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0e0f,vf.s0,vf.s1,vf.s2,0e0f));
188 119 equemene
}
189 119 equemene

190 140 equemene
MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT8* clDataIn,int gid,MYFLOAT dt)
191 119 equemene
{
192 132 equemene
    MYFLOAT4 x,v,a,xf,vf;
193 119 equemene

194 137 equemene
    x=(MYFLOAT4)clDataIn[gid].lo;
195 137 equemene
    v=(MYFLOAT4)clDataIn[gid].hi;
196 139 equemene
    a=(MYFLOAT4)(0e0f,0e0f,0e0f,0e0f);
197 119 equemene
    for (int i=0;i<get_global_size(0);i++)
198 119 equemene
    {
199 119 equemene
        if (gid != i)
200 119 equemene
        a+=Interaction(x,clDataIn[i].lo);
201 141 equemene
        //a+=InteractionCore(x,clDataIn[i].lo);
202 119 equemene
    }
203 133 equemene

204 119 equemene
    vf=v+dt*a;
205 133 equemene
    xf=x+dt*vf;
206 118 equemene

207 139 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0e0f,vf.s0,vf.s1,vf.s2,0e0f));
208 116 equemene
}
209 116 equemene

210 140 equemene
MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT8* clDataIn,int gid,MYFLOAT dt)
211 140 equemene
{
212 140 equemene
    MYFLOAT4 x,v,a,xf,vf;
213 140 equemene

214 140 equemene
    x=(MYFLOAT4)clDataIn[gid].lo;
215 140 equemene
    v=(MYFLOAT4)clDataIn[gid].hi;
216 140 equemene
    a=(MYFLOAT4)(0e0f,0e0f,0e0f,0e0f);
217 140 equemene
    for (int i=0;i<get_global_size(0);i++)
218 140 equemene
    {
219 140 equemene
        if (gid != i)
220 140 equemene
        a+=Interaction(x,clDataIn[i].lo);
221 140 equemene
    }
222 140 equemene

223 140 equemene
    vf=v+dt*a;
224 140 equemene
    xf=x+dt*v;
225 140 equemene

226 140 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0e0f,vf.s0,vf.s1,vf.s2,0e0f));
227 140 equemene
}
228 140 equemene

229 139 equemene
__kernel void SplutterPoints(__global MYFLOAT8* clData, MYFLOAT box,
230 139 equemene
                             uint seed_z,uint seed_w)
231 116 equemene
{
232 116 equemene
    int gid = get_global_id(0);
233 116 equemene
    uint z=seed_z+(uint)gid;
234 116 equemene
    uint w=seed_w-(uint)gid;
235 133 equemene

236 137 equemene
    MYFLOAT x0=box*(MYFLOAT)(MWCfp-5e-1f);
237 137 equemene
    MYFLOAT y0=box*(MYFLOAT)(MWCfp-5e-1f);
238 137 equemene
    MYFLOAT z0=box*(MYFLOAT)(MWCfp-5e-1f);
239 137 equemene

240 137 equemene
    clData[gid].s01234567 = (MYFLOAT8) (x0,y0,z0,0e0f,0e0f,0e0f,0e0f,0e0f);
241 116 equemene
}
242 116 equemene

243 142 equemene
__kernel void SplutterStress(__global MYFLOAT8* clData,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
244 139 equemene
{
245 139 equemene
    int gid = get_global_id(0);
246 142 equemene
    MYFLOAT N = (MYFLOAT)get_global_size(0);
247 139 equemene
    uint z=seed_z+(uint)gid;
248 139 equemene
    uint w=seed_w-(uint)gid;
249 139 equemene

250 139 equemene
    if (velocity<SMALL_NUM) {
251 142 equemene
       MYFLOAT4 SpeedVector=(MYFLOAT4)normalize(cross(clData[gid].lo,clCoM[0]))*sqrt(-AtomicPotential(clData,gid)*5e-1f);
252 139 equemene
       clData[gid].hi=SpeedVector;
253 139 equemene
    }
254 139 equemene
    else
255 139 equemene
    {
256 142 equemene
       MYFLOAT theta=MWCfp*PI;
257 142 equemene
       MYFLOAT phi=MWCfp*PI*(MYFLOAT)2e0f;
258 142 equemene
       MYFLOAT sinTheta=sin(theta);
259 142 equemene

260 139 equemene
       clData[gid].s4=velocity*sinTheta*cos(phi);
261 139 equemene
       clData[gid].s5=velocity*sinTheta*sin(phi);
262 139 equemene
       clData[gid].s6=velocity*cos(theta);
263 139 equemene
    }
264 139 equemene
}
265 139 equemene

266 132 equemene
__kernel void RungeKutta(__global MYFLOAT8* clData,MYFLOAT h)
267 116 equemene
{
268 116 equemene
    int gid = get_global_id(0);
269 116 equemene

270 132 equemene
    MYFLOAT8 clDataGid=AtomicRungeKutta(clData,gid,h);
271 116 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
272 121 equemene
    clData[gid]=clDataGid;
273 116 equemene
}
274 116 equemene

275 140 equemene
__kernel void ImplicitEuler(__global MYFLOAT8* clData,MYFLOAT h)
276 116 equemene
{
277 116 equemene
    int gid = get_global_id(0);
278 116 equemene

279 140 equemene
    MYFLOAT8 clDataGid=AtomicImplicitEuler(clData,gid,h);
280 116 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
281 121 equemene
    clData[gid]=clDataGid;
282 116 equemene
}
283 133 equemene

284 141 equemene
__kernel void Heun(__global MYFLOAT8* clData,MYFLOAT h)
285 141 equemene
{
286 141 equemene
    int gid = get_global_id(0);
287 141 equemene

288 141 equemene
    MYFLOAT8 clDataGid=AtomicHeun(clData,gid,h);
289 141 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
290 141 equemene
    clData[gid]=clDataGid;
291 141 equemene
}
292 141 equemene

293 140 equemene
__kernel void ExplicitEuler(__global MYFLOAT8* clData,MYFLOAT h)
294 140 equemene
{
295 140 equemene
    int gid = get_global_id(0);
296 140 equemene

297 140 equemene
    MYFLOAT8 clDataGid=AtomicExplicitEuler(clData,gid,h);
298 140 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
299 140 equemene
    clData[gid]=clDataGid;
300 140 equemene
}
301 139 equemene

302 142 equemene
__kernel void CoMPotential(__global MYFLOAT8* clData,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
303 133 equemene
{
304 133 equemene
    int gid = get_global_id(0);
305 133 equemene

306 142 equemene
    clPotential[gid]=PairPotential(clData[gid].lo,clCoM[0]);
307 139 equemene
}
308 139 equemene

309 142 equemene
__kernel void Potential(__global MYFLOAT8* clData,__global MYFLOAT* clPotential)
310 139 equemene
{
311 139 equemene
    int gid = get_global_id(0);
312 139 equemene

313 137 equemene
    MYFLOAT potential=0e0f;
314 133 equemene
    MYFLOAT4 x=clData[gid].lo;
315 133 equemene

316 133 equemene
    for (int i=0;i<get_global_size(0);i++)
317 133 equemene
    {
318 133 equemene
        if (gid != i)
319 133 equemene
        potential+=PairPotential(x,clData[i].lo);
320 133 equemene
    }
321 133 equemene

322 133 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
323 137 equemene
    clPotential[gid]=(MYFLOAT)5e-1f*potential;
324 133 equemene
}
325 133 equemene

326 142 equemene
__kernel void CenterOfMass(__global MYFLOAT8* clData,__global MYFLOAT4* clCoM,int Size)
327 139 equemene
{
328 139 equemene
    MYFLOAT4 CoM=clData[0].lo;
329 142 equemene

330 139 equemene
    for (int i=1;i<Size;i++)
331 139 equemene
    {
332 139 equemene
        CoM+=clData[i].lo;
333 139 equemene
    }
334 142 equemene

335 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
336 142 equemene
    clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,0e0f)/(MYFLOAT)Size;
337 139 equemene
}
338 139 equemene

339 133 equemene
__kernel void Kinetic(__global MYFLOAT8* clData,__global MYFLOAT* clKinetic)
340 133 equemene
{
341 133 equemene
    int gid = get_global_id(0);
342 133 equemene

343 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
344 142 equemene
    clKinetic[gid]=(MYFLOAT)5e-1f*(MYFLOAT)pow(length(clData[gid].hi),2);
345 133 equemene
}
346 116 equemene
"""
347 116 equemene
348 133 equemene
def Energy(MyData):
349 133 equemene
    return(sum(pow(MyData,2)))
350 133 equemene
351 116 equemene
if __name__=='__main__':
352 116 equemene
353 132 equemene
    # ValueType
354 132 equemene
    ValueType='FP32'
355 132 equemene
    class MyFloat(np.float32):pass
356 142 equemene
    clType8=cl_array.vec.float8
357 142 equemene
    clType4=cl_array.vec.float4
358 116 equemene
    # Set defaults values
359 118 equemene
    np.set_printoptions(precision=2)
360 116 equemene
    # Id of Device : 1 is for first find !
361 120 equemene
    Device=1
362 116 equemene
    # Iterations is integer
363 133 equemene
    Number=4
364 116 equemene
    # Size of box
365 132 equemene
    SizeOfBox=MyFloat(1.)
366 116 equemene
    # Initial velocity of particules
367 132 equemene
    Velocity=MyFloat(1.)
368 116 equemene
    # Redo the last process
369 133 equemene
    Iterations=100
370 116 equemene
    # Step
371 133 equemene
    Step=MyFloat(0.01)
372 121 equemene
    # Method of integration
373 121 equemene
    Method='RungeKutta'
374 132 equemene
    # InitialRandom
375 132 equemene
    InitialRandom=False
376 132 equemene
    # RNG Marsaglia Method
377 132 equemene
    RNG='MWC'
378 133 equemene
    # CheckEnergies
379 133 equemene
    CheckEnergies=False
380 134 equemene
    # Display samples in 3D
381 139 equemene
    GraphSamples=False
382 139 equemene
    # Viriel Distribution of stress
383 139 equemene
    VirielStress=True
384 132 equemene
385 142 equemene
    HowToUse='%s -h [Help] -r [InitialRandom] -e [VirielStress] -g [GraphSamples] -c [CheckEnergies] -d <DeviceId> -n <NumberOfParticules> -z <SizeOfBox> -v <Velocity> -s <Step> -i <Iterations> -m <RungeKutta|ImplicitEuler|ExplicitEuler|Heun> -t <FP32|FP64>'
386 116 equemene
387 116 equemene
    try:
388 139 equemene
        opts, args = getopt.getopt(sys.argv[1:],"rehgcd:n:z:v:i:s:m:t:",["random","viriel","graph","check","device=","number=","size=","velocity=","iterations=","step=","method=","valuetype="])
389 116 equemene
    except getopt.GetoptError:
390 128 equemene
        print(HowToUse % sys.argv[0])
391 116 equemene
        sys.exit(2)
392 116 equemene
393 116 equemene
    for opt, arg in opts:
394 116 equemene
        if opt == '-h':
395 128 equemene
            print(HowToUse % sys.argv[0])
396 116 equemene
397 128 equemene
            print("\nInformations about devices detected under OpenCL:")
398 116 equemene
            try:
399 132 equemene
                Id=0
400 116 equemene
                for platform in cl.get_platforms():
401 116 equemene
                    for device in platform.get_devices():
402 137 equemene
                        #deviceType=cl.device_type.to_string(device.type)
403 149 equemene
                        deviceType="xPU"
404 128 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
405 116 equemene
                        Id=Id+1
406 116 equemene
                sys.exit()
407 116 equemene
            except ImportError:
408 128 equemene
                print("Your platform does not seem to support OpenCL")
409 116 equemene
                sys.exit()
410 116 equemene
411 132 equemene
        elif opt in ("-t", "--valuetype"):
412 132 equemene
            if arg=='FP64':
413 132 equemene
                class MyFloat(np.float64): pass
414 142 equemene
                clType8=cl_array.vec.double8
415 142 equemene
                clType4=cl_array.vec.double4
416 132 equemene
            else:
417 132 equemene
                class MyFloat(np.float32):pass
418 142 equemene
                clType8=cl_array.vec.float8
419 142 equemene
                clType4=cl_array.vec.float4
420 132 equemene
            ValueType = arg
421 116 equemene
        elif opt in ("-d", "--device"):
422 116 equemene
            Device=int(arg)
423 121 equemene
        elif opt in ("-m", "--method"):
424 121 equemene
            Method=arg
425 116 equemene
        elif opt in ("-n", "--number"):
426 116 equemene
            Number=int(arg)
427 116 equemene
        elif opt in ("-z", "--size"):
428 132 equemene
            SizeOfBox=MyFloat(arg)
429 116 equemene
        elif opt in ("-v", "--velocity"):
430 132 equemene
            Velocity=MyFloat(arg)
431 139 equemene
            VirielStress=False
432 116 equemene
        elif opt in ("-s", "--step"):
433 132 equemene
            Step=MyFloat(arg)
434 120 equemene
        elif opt in ("-i", "--iterations"):
435 120 equemene
            Iterations=int(arg)
436 132 equemene
        elif opt in ("-r", "--random"):
437 132 equemene
            InitialRandom=True
438 133 equemene
        elif opt in ("-c", "--check"):
439 133 equemene
            CheckEnergies=True
440 134 equemene
        elif opt in ("-g", "--graph"):
441 134 equemene
            GraphSamples=True
442 139 equemene
        elif opt in ("-e", "--viriel"):
443 139 equemene
            VirielStress=True
444 134 equemene
445 141 equemene
    SizeOfBox=MyFloat(np.power(Number,1./3.)*SizeOfBox)
446 132 equemene
    Velocity=MyFloat(Velocity)
447 132 equemene
    Step=MyFloat(Step)
448 132 equemene
449 128 equemene
    print("Device choosed : %s" % Device)
450 128 equemene
    print("Number of particules : %s" % Number)
451 128 equemene
    print("Size of Box : %s" % SizeOfBox)
452 139 equemene
    print("Initial velocity %s" % Velocity)
453 139 equemene
    print("Number of iterations %s" % Iterations)
454 139 equemene
    print("Step of iteration %s" % Step)
455 139 equemene
    print("Method of resolution %s" % Method)
456 139 equemene
    print("Initial Random for RNG Seed %s" % InitialRandom)
457 139 equemene
    print("Check for Energies %s" % CheckEnergies)
458 139 equemene
    print("Graph for Samples %s" % GraphSamples)
459 139 equemene
    print("ValueType is %s" % ValueType)
460 139 equemene
    print("Viriel distribution of stress %s" % VirielStress)
461 116 equemene
462 132 equemene
    # Create Numpy array of CL vector with 8 FP32
463 142 equemene
    MyCoM = np.zeros(1,dtype=clType4)
464 142 equemene
    MyData = np.zeros(Number, dtype=clType8)
465 133 equemene
    MyPotential = np.zeros(Number, dtype=MyFloat)
466 133 equemene
    MyKinetic = np.zeros(Number, dtype=MyFloat)
467 132 equemene
468 132 equemene
    Marsaglia,Computing=DictionariesAPI()
469 132 equemene
470 132 equemene
    # Scan the OpenCL arrays
471 132 equemene
    Id=0
472 116 equemene
    HasXPU=False
473 116 equemene
    for platform in cl.get_platforms():
474 116 equemene
        for device in platform.get_devices():
475 116 equemene
            if Id==Device:
476 116 equemene
                PlatForm=platform
477 116 equemene
                XPU=device
478 128 equemene
                print("CPU/GPU selected: ",device.name.lstrip())
479 116 equemene
                HasXPU=True
480 116 equemene
            Id+=1
481 116 equemene
482 116 equemene
    if HasXPU==False:
483 128 equemene
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
484 116 equemene
        sys.exit()
485 116 equemene
486 132 equemene
    # Create Context
487 116 equemene
    try:
488 116 equemene
        ctx = cl.Context([XPU])
489 116 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
490 116 equemene
    except:
491 128 equemene
        print("Crash during context creation")
492 116 equemene
493 132 equemene
    print(Marsaglia[RNG],Computing[ValueType])
494 132 equemene
    # Build all routines used for the computing
495 137 equemene
#    MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = "-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType]))
496 137 equemene
    MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = "-DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType]))
497 119 equemene
498 116 equemene
    mf = cl.mem_flags
499 119 equemene
    clData = cl.Buffer(ctx, mf.READ_WRITE, MyData.nbytes)
500 133 equemene
    clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
501 133 equemene
    clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
502 139 equemene
    clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
503 119 equemene
    #clData = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyData)
504 116 equemene
505 134 equemene
    print('All particles superimposed.')
506 116 equemene
507 132 equemene
    print(SizeOfBox.dtype)
508 132 equemene
509 132 equemene
    # Set particles to RNG points
510 132 equemene
    if InitialRandom:
511 139 equemene
        MyRoutines.SplutterPoints(queue,(Number,1),None,clData,SizeOfBox,np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32)))
512 132 equemene
    else:
513 139 equemene
        MyRoutines.SplutterPoints(queue,(Number,1),None,clData,SizeOfBox,np.uint32(110271),np.uint32(250173))
514 116 equemene
515 132 equemene
    print('All particules distributed')
516 139 equemene
517 139 equemene
    MyRoutines.CenterOfMass(queue,(1,1),None,clData,clCoM,np.int32(Number))
518 142 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
519 142 equemene
    print('Center Of Mass: (%s,%s,%s)' % (MyCoM[0][0],MyCoM[0][1],MyCoM[0][2]))
520 139 equemene
521 139 equemene
    if VirielStress:
522 139 equemene
        MyRoutines.SplutterStress(queue,(Number,1),None,clData,clCoM,np.float32(0.),np.uint32(110271),np.uint32(250173))
523 139 equemene
    else:
524 139 equemene
        MyRoutines.SplutterStress(queue,(Number,1),None,clData,clCoM,Velocity,np.uint32(110271),np.uint32(250173))
525 139 equemene
526 139 equemene
    if GraphSamples:
527 139 equemene
        cl.enqueue_copy(queue, MyData, clData)
528 139 equemene
        t0=np.array([[MyData[0][0],MyData[0][1],MyData[0][2]]])
529 139 equemene
        t1=np.array([[MyData[1][0],MyData[1][1],MyData[1][2]]])
530 139 equemene
        tL=np.array([[MyData[-1][0],MyData[-1][1],MyData[-1][2]]])
531 139 equemene
        s0=np.array([[MyData[0][4],MyData[0][5],MyData[0][6],MyData[0][7]]])
532 139 equemene
        s1=np.array([[MyData[1][4],MyData[1][5],MyData[1][6],MyData[1][7]]])
533 139 equemene
        sL=np.array([[MyData[-1][4],MyData[-1][5],MyData[-1][6],MyData[-1][7]]])
534 139 equemene
535 141 equemene
        #print(t0,t1,tL)
536 141 equemene
        #print(s0,s1,sL)
537 139 equemene
538 142 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clData,clPotential)
539 141 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clData,clKinetic)
540 133 equemene
    CLLaunch.wait()
541 141 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
542 141 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
543 142 equemene
    print('Viriel=%s Potential=%s Kinetic=%s'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
544 133 equemene
545 134 equemene
    if GraphSamples:
546 134 equemene
        cl.enqueue_copy(queue, MyData, clData)
547 134 equemene
        t0=np.array([[MyData[0][0],MyData[0][1],MyData[0][2]]])
548 134 equemene
        t1=np.array([[MyData[1][0],MyData[1][1],MyData[1][2]]])
549 134 equemene
        tL=np.array([[MyData[-1][0],MyData[-1][1],MyData[-1][2]]])
550 116 equemene
551 116 equemene
    time_start=time.time()
552 128 equemene
    for i in range(Iterations):
553 140 equemene
        if Method=="ImplicitEuler":
554 140 equemene
            CLLaunch=MyRoutines.ImplicitEuler(queue,(Number,1),None,clData,Step)
555 140 equemene
        elif Method=="ExplicitEuler":
556 140 equemene
            CLLaunch=MyRoutines.ExplicitEuler(queue,(Number,1),None,clData,Step)
557 141 equemene
        elif Method=="Heun":
558 141 equemene
            CLLaunch=MyRoutines.Heun(queue,(Number,1),None,clData,Step)
559 140 equemene
        else:
560 132 equemene
            CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clData,Step)
561 118 equemene
        CLLaunch.wait()
562 133 equemene
        if CheckEnergies:
563 142 equemene
            CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clData,clPotential)
564 141 equemene
            CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clData,clKinetic)
565 133 equemene
            CLLaunch.wait()
566 133 equemene
            cl.enqueue_copy(queue,MyPotential,clPotential)
567 133 equemene
            cl.enqueue_copy(queue,MyKinetic,clKinetic)
568 133 equemene
            print(np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic))
569 133 equemene
570 139 equemene
            print(MyPotential,MyKinetic)
571 139 equemene
572 134 equemene
        if GraphSamples:
573 134 equemene
            cl.enqueue_copy(queue, MyData, clData)
574 134 equemene
            t0=np.append(t0,[MyData[0][0],MyData[0][1],MyData[0][2]])
575 134 equemene
            t1=np.append(t1,[MyData[1][0],MyData[1][1],MyData[1][2]])
576 134 equemene
            tL=np.append(tL,[MyData[-1][0],MyData[-1][1],MyData[-1][2]])
577 128 equemene
    print("\nDuration on %s for each %s" % (Device,(time.time()-time_start)/Iterations))
578 135 equemene
579 142 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clData,clPotential)
580 141 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clData,clKinetic)
581 141 equemene
    CLLaunch.wait()
582 141 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
583 141 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
584 142 equemene
    print('Viriel=%s Potential=%s Kinetic=%s'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
585 142 equemene
    MyRoutines.CenterOfMass(queue,(1,1),None,clData,clCoM,np.int32(Number))
586 142 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
587 142 equemene
    print('Center Of Mass: (%s,%s,%s)' % (MyCoM[0][0],MyCoM[0][1],MyCoM[0][2]))
588 141 equemene
589 135 equemene
    if GraphSamples:
590 135 equemene
        t0=np.transpose(np.reshape(t0,(Iterations+1,3)))
591 135 equemene
        t1=np.transpose(np.reshape(t1,(Iterations+1,3)))
592 135 equemene
        tL=np.transpose(np.reshape(tL,(Iterations+1,3)))
593 118 equemene
594 135 equemene
        import matplotlib.pyplot as plt
595 135 equemene
        from mpl_toolkits.mplot3d import Axes3D
596 119 equemene
597 135 equemene
        fig = plt.figure()
598 135 equemene
        ax = fig.gca(projection='3d')
599 135 equemene
        ax.scatter(t0[0],t0[1],t0[2], marker='^',color='blue')
600 135 equemene
        ax.scatter(t1[0],t1[1],t1[2], marker='o',color='red')
601 135 equemene
        ax.scatter(tL[0],tL[1],tL[2], marker='D',color='green')
602 135 equemene
603 135 equemene
        ax.set_xlabel('X Label')
604 135 equemene
        ax.set_ylabel('Y Label')
605 135 equemene
        ax.set_zlabel('Z Label')
606 135 equemene
607 135 equemene
        plt.show()
608 119 equemene
609 119 equemene
    clData.release()
610 133 equemene
    clKinetic.release()
611 133 equemene
    clPotential.release()