Statistiques
| Révision :

root / NBody / NBody.py @ 279

Historique | Voir | Annoter | Télécharger (29,1 ko)

1 128 equemene
#!/usr/bin/env python3
2 136 equemene
# -*- coding: utf-8 -*-
3 116 equemene
"""
4 171 equemene
NBody Demonstrator implemented in OpenCL, rendering OpenGL
5 116 equemene

6 260 equemene
By default, rendering in OpenGL is disabled. Add -g option to activate.
7 174 equemene

8 260 equemene
Part of matrix programs from: https://forge.cbp.ens-lyon.fr/svn/bench4gpu/
9 260 equemene

10 260 equemene
CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr>
11 260 equemene

12 174 equemene
Thanks to Andreas Klockner for PyOpenCL:
13 174 equemene
http://mathema.tician.de/software/pyopencl
14 174 equemene

15 116 equemene
"""
16 116 equemene
import getopt
17 116 equemene
import sys
18 116 equemene
import time
19 116 equemene
import numpy as np
20 116 equemene
import pyopencl as cl
21 116 equemene
import pyopencl.array as cl_array
22 116 equemene
from numpy.random import randint as nprnd
23 170 equemene
import string, sys
24 170 equemene
from OpenGL.GL import *
25 170 equemene
from OpenGL.GLUT import *
26 116 equemene
27 132 equemene
def DictionariesAPI():
28 132 equemene
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
29 132 equemene
    Computing={'FP32':0,'FP64':1}
30 170 equemene
    Interaction={'Force':0,'Potential':1}
31 175 equemene
    Artevasion={'None':0,'NegExp':1,'CorRad':2}
32 171 equemene
    return(Marsaglia,Computing,Interaction,Artevasion)
33 132 equemene
34 142 equemene
BlobOpenCL= """
35 132 equemene
#define TFP32 0
36 132 equemene
#define TFP64 1
37 132 equemene

38 170 equemene
#define TFORCE 0
39 170 equemene
#define TPOTENTIAL 1
40 116 equemene

41 171 equemene
#define NONE 0
42 171 equemene
#define NEGEXP 1
43 175 equemene
#define CORRAD 2
44 171 equemene

45 132 equemene
#if TYPE == TFP32
46 132 equemene
#define MYFLOAT4 float4
47 132 equemene
#define MYFLOAT8 float8
48 132 equemene
#define MYFLOAT float
49 151 equemene
#define DISTANCE fast_distance
50 132 equemene
#else
51 132 equemene
#define MYFLOAT4 double4
52 132 equemene
#define MYFLOAT8 double8
53 132 equemene
#define MYFLOAT double
54 151 equemene
#define DISTANCE distance
55 170 equemene
#if defined(cl_khr_fp64)  // Khronos extension available?
56 170 equemene
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
57 132 equemene
#endif
58 170 equemene
#endif
59 132 equemene

60 170 equemene
#define znew  ((zmwc=36969*(zmwc&65535)+(zmwc>>16))<<16)
61 170 equemene
#define wnew  ((wmwc=18000*(wmwc&65535)+(wmwc>>16))&65535)
62 170 equemene
#define MWC   (znew+wnew)
63 170 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
64 170 equemene
#define CONG  (jcong=69069*jcong+1234567)
65 170 equemene
#define KISS  ((MWC^CONG)+SHR3)
66 170 equemene

67 160 equemene
#define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f)
68 160 equemene
#define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f)
69 160 equemene
#define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f)
70 160 equemene
#define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f)
71 160 equemene

72 171 equemene
#define PI (MYFLOAT)3.141592653589793238e0f
73 160 equemene

74 170 equemene
#define SMALL_NUM (MYFLOAT)1.e-9f
75 160 equemene

76 175 equemene
#define CoreRadius (MYFLOAT)(1.e0f)
77 175 equemene

78 170 equemene
// Create my own Distance implementation: distance buggy on Oland AMD chipset
79 116 equemene

80 170 equemene
MYFLOAT MyDistance(MYFLOAT4 n,MYFLOAT4 m)
81 141 equemene
{
82 170 equemene
    private MYFLOAT x2,y2,z2;
83 170 equemene
    x2=n.s0-m.s0;
84 170 equemene
    x2*=x2;
85 170 equemene
    y2=n.s1-m.s1;
86 170 equemene
    y2*=y2;
87 170 equemene
    z2=n.s2-m.s2;
88 170 equemene
    z2*=z2;
89 170 equemene
    return(sqrt(x2+y2+z2));
90 141 equemene
}
91 141 equemene

92 171 equemene
// Potential between 2 m,n bodies
93 133 equemene
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
94 171 equemene
#if ARTEVASION == NEGEXP
95 171 equemene
// Add exp(-r) to numerator to avoid divergence for low distances
96 133 equemene
{
97 171 equemene
    MYFLOAT r=DISTANCE(n,m);
98 171 equemene
    return((-1.e0f+exp(-r))/r);
99 171 equemene
}
100 175 equemene
#elif ARTEVASION == CORRAD
101 175 equemene
// Add Core Radius to avoid divergence for low distances
102 175 equemene
{
103 175 equemene
    MYFLOAT r=DISTANCE(n,m);
104 175 equemene
    return(-1.e0f/sqrt(r*r+CoreRadius*CoreRadius));
105 175 equemene
}
106 171 equemene
#else
107 171 equemene
// Classical potential in 1/r
108 171 equemene
{
109 170 equemene
//    return((MYFLOAT)(-1.e0f)/(MyDistance(m,n)));
110 151 equemene
    return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
111 133 equemene
}
112 171 equemene
#endif
113 133 equemene

114 176 equemene
// Interaction based of Force as gradient of Potential
115 170 equemene
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
116 170 equemene
#if INTERACTION == TFORCE
117 175 equemene
#if ARTEVASION == NEGEXP
118 175 equemene
// Force gradient of potential, set as (1-exp(-r))/r
119 175 equemene
{
120 175 equemene
    private MYFLOAT r=MyDistance(n,m);
121 175 equemene
    private MYFLOAT num=1.e0f+exp(-r)*(r-1.e0f);
122 175 equemene
    return((n-m)*num/(MYFLOAT)(r*r*r));
123 175 equemene
}
124 175 equemene
#elif ARTEVASION == CORRAD
125 175 equemene
// Force gradient of potential, (Core Radius) set as 1/sqrt(r**2+CoreRadius**2)
126 175 equemene
{
127 175 equemene
    private MYFLOAT r=MyDistance(n,m);
128 175 equemene
    private MYFLOAT den=sqrt(r*r+CoreRadius*CoreRadius);
129 175 equemene
    return((n-m)/(MYFLOAT)(den*den*den));
130 175 equemene
}
131 175 equemene
#else
132 170 equemene
// Simplest implementation of force (equals to acceleration)
133 170 equemene
// seems to bo bad (numerous artevasions)
134 170 equemene
// MYFLOAT4 InteractionForce(MYFLOAT4 m,MYFLOAT4 n)
135 170 equemene
{
136 170 equemene
    private MYFLOAT r=MyDistance(n,m);
137 170 equemene
    return((n-m)/(MYFLOAT)(r*r*r));
138 170 equemene
}
139 175 equemene
#endif
140 170 equemene
#else
141 170 equemene
// Force definited as gradient of potential
142 170 equemene
// Estimate potential and proximate potential to estimate force
143 170 equemene
{
144 171 equemene
    // 1/1024 seems to be a good factor: larger one provides bad results
145 171 equemene
    private MYFLOAT epsilon=(MYFLOAT)(1.e0f/1024);
146 170 equemene
    private MYFLOAT4 er=normalize(n-m);
147 170 equemene
    private MYFLOAT4 dr=er*(MYFLOAT)epsilon;
148 170 equemene

149 171 equemene
    return(er/epsilon*(PairPotential(m,n)-PairPotential(m+dr,n)));
150 170 equemene
}
151 170 equemene
#endif
152 170 equemene

153 160 equemene
MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
154 139 equemene
{
155 160 equemene
    private MYFLOAT potential=(MYFLOAT)0.e0f;
156 160 equemene
    private MYFLOAT4 x=clDataX[gid];
157 139 equemene

158 139 equemene
    for (int i=0;i<get_global_size(0);i++)
159 139 equemene
    {
160 139 equemene
        if (gid != i)
161 160 equemene
        potential+=PairPotential(x,clDataX[i]);
162 139 equemene
    }
163 133 equemene

164 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
165 141 equemene
    return(potential);
166 139 equemene
}
167 139 equemene

168 160 equemene
MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid)
169 139 equemene
{
170 160 equemene
    return(PairPotential(clDataX[gid],clCoM[0]));
171 139 equemene
}
172 139 equemene

173 170 equemene
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
174 170 equemene

175 160 equemene
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
176 116 equemene
{
177 160 equemene
    private MYFLOAT4 a0,v0,x0,a1,v1,x1,a2,v2,x2,a3,v3,x3,a4,v4,x4,xf,vf;
178 170 equemene
    MYFLOAT4 DT=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
179 160 equemene

180 160 equemene
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
181 160 equemene
    v0=(MYFLOAT4)clDataInV[gid];
182 160 equemene
    x0=(MYFLOAT4)clDataInX[gid];
183 133 equemene
    int N = get_global_size(0);
184 133 equemene

185 170 equemene
    for (private int i=0;i<N;i++)
186 121 equemene
    {
187 121 equemene
        if (gid != i)
188 160 equemene
        a0+=Interaction(x0,clDataInX[i]);
189 121 equemene
    }
190 121 equemene

191 160 equemene
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
192 170 equemene
    v1=a0*dt+v0;
193 170 equemene
    x1=v0*dt+x0;
194 170 equemene
    for (private int j=0;j<N;j++)
195 121 equemene
    {
196 170 equemene
        if (gid != j)
197 170 equemene
        a1+=Interaction(x1,clDataInX[j]);
198 121 equemene
    }
199 121 equemene

200 160 equemene
    a2=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
201 170 equemene
    v2=a1*(MYFLOAT)(dt/2.e0f)+v0;
202 170 equemene
    x2=v1*(MYFLOAT)(dt/2.e0f)+x0;
203 170 equemene
    for (private int k=0;k<N;k++)
204 121 equemene
    {
205 170 equemene
        if (gid != k)
206 170 equemene
        a2+=Interaction(x2,clDataInX[k]);
207 121 equemene
    }
208 121 equemene

209 160 equemene
    a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
210 170 equemene
    v3=a2*(MYFLOAT)(dt/2.e0f)+v0;
211 170 equemene
    x3=v2*(MYFLOAT)(dt/2.e0f)+x0;
212 170 equemene
    for (private int l=0;l<N;l++)
213 121 equemene
    {
214 170 equemene
        if (gid != l)
215 170 equemene
        a3+=Interaction(x3,clDataInX[l]);
216 121 equemene
    }
217 121 equemene

218 160 equemene
    a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
219 170 equemene
    v4=a3*dt+v0;
220 170 equemene
    x4=v3*dt+x0;
221 170 equemene
    for (private int m=0;m<N;m++)
222 141 equemene
    {
223 170 equemene
        if (gid != m)
224 170 equemene
        a4+=Interaction(x4,clDataInX[m]);
225 141 equemene
    }
226 141 equemene

227 160 equemene
    xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
228 160 equemene
    vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
229 121 equemene

230 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
231 121 equemene
}
232 121 equemene

233 160 equemene
MYFLOAT8 AtomicHeun(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
234 121 equemene
{
235 170 equemene
    private MYFLOAT4 x0,v0,a0,x1,v1,a1,xf,vf;
236 170 equemene
    MYFLOAT4 Dt=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
237 116 equemene

238 170 equemene
    x0=(MYFLOAT4)clDataInX[gid];
239 170 equemene
    v0=(MYFLOAT4)clDataInV[gid];
240 170 equemene
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
241 141 equemene

242 170 equemene
    for (private int i=0;i<get_global_size(0);i++)
243 116 equemene
    {
244 116 equemene
        if (gid != i)
245 170 equemene
        a0+=Interaction(x0,clDataInX[i]);
246 116 equemene
    }
247 141 equemene

248 170 equemene
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
249 170 equemene
    //v1=v0+dt*a0;
250 170 equemene
    //x1=x0+dt*v0;
251 170 equemene
    v1=dt*a0+v0;
252 170 equemene
    x1=dt*v0+x0;
253 141 equemene

254 170 equemene
    for (private int j=0;j<get_global_size(0);j++)
255 116 equemene
    {
256 170 equemene
        if (gid != j)
257 170 equemene
        a1+=Interaction(x1,clDataInX[j]);
258 116 equemene
    }
259 118 equemene

260 170 equemene
    vf=v0+dt*(a0+a1)/(MYFLOAT)2.e0f;
261 170 equemene
    xf=x0+dt*(v0+v1)/(MYFLOAT)2.e0f;
262 170 equemene

263 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
264 119 equemene
}
265 119 equemene

266 160 equemene
MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
267 119 equemene
{
268 170 equemene
    MYFLOAT4 x0,v0,a,xf,vf;
269 119 equemene

270 170 equemene
    x0=(MYFLOAT4)clDataInX[gid];
271 170 equemene
    v0=(MYFLOAT4)clDataInV[gid];
272 151 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
273 160 equemene

274 170 equemene
    for (private int i=0;i<get_global_size(0);i++)
275 119 equemene
    {
276 119 equemene
        if (gid != i)
277 170 equemene
          a+=Interaction(x0,clDataInX[i]);
278 119 equemene
    }
279 133 equemene

280 170 equemene
    vf=v0+dt*a;
281 170 equemene
    xf=x0+dt*vf;
282 170 equemene

283 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
284 116 equemene
}
285 116 equemene

286 160 equemene
MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
287 140 equemene
{
288 170 equemene
    MYFLOAT4 x0,v0,a,xf,vf;
289 140 equemene

290 170 equemene
    x0=(MYFLOAT4)clDataInX[gid];
291 170 equemene
    v0=(MYFLOAT4)clDataInV[gid];
292 151 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
293 160 equemene

294 170 equemene
    for (private int i=0;i<get_global_size(0);i++)
295 140 equemene
    {
296 140 equemene
        if (gid != i)
297 170 equemene
        a+=Interaction(x0,clDataInX[i]);
298 140 equemene
    }
299 140 equemene

300 170 equemene
    vf=v0+dt*a;
301 170 equemene
    xf=x0+dt*v0;
302 140 equemene

303 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
304 140 equemene
}
305 140 equemene

306 170 equemene
__kernel void InBallSplutterPoints(__global MYFLOAT4* clDataX,
307 170 equemene
                                   MYFLOAT diameter,uint seed_z,uint seed_w)
308 170 equemene
{
309 170 equemene
    private int gid=get_global_id(0);
310 170 equemene
    private uint zmwc=seed_z+gid;
311 170 equemene
    private uint wmwc=seed_w+(gid+1)%2;
312 170 equemene
    private MYFLOAT Heat;
313 170 equemene

314 170 equemene
    for (int i=0;i<gid;i++)
315 170 equemene
    {
316 170 equemene
        Heat=MWCfp;
317 170 equemene
    }
318 170 equemene

319 170 equemene
// More accurate distribution based on spherical coordonates
320 170 equemene
// Disactivated because of AMD Oland GPU crash on launch
321 170 equemene
//     private MYFLOAT Radius,Theta,Phi,PosX,PosY,PosZ,SinTheta;
322 170 equemene
//     Radius=MWCfp*diameter/2.e0f;
323 170 equemene
//     Theta=(MYFLOAT)acos((float)(-2.e0f*MWCfp+1.0e0f));
324 170 equemene
//     Phi=(MYFLOAT)(2.e0f*PI*MWCfp);
325 170 equemene
//     SinTheta=sin((float)Theta);
326 170 equemene
//     PosX=cos((float)Phi)*Radius*SinTheta;
327 170 equemene
//     PosY=sin((float)Phi)*Radius*SinTheta;
328 170 equemene
//     PosZ=cos((float)Theta)*Radius;
329 170 equemene
//     clDataX[gid]=(MYFLOAT4)(PosX,PosY,PosZ,0.e0f);
330 170 equemene

331 170 equemene
    private MYFLOAT Radius=diameter/2.e0f;
332 170 equemene
    private MYFLOAT Length=diameter;
333 170 equemene
    private MYFLOAT4 Position;
334 170 equemene
    while (Length>Radius) {
335 170 equemene
       Position=(MYFLOAT4)((MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,0.e0f);
336 170 equemene
       Length=(MYFLOAT)length((MYFLOAT4)Position);
337 170 equemene
    }
338 170 equemene

339 170 equemene
    clDataX[gid]=Position;
340 170 equemene

341 170 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
342 170 equemene
}
343 170 equemene

344 170 equemene
__kernel void InBoxSplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box,
345 139 equemene
                             uint seed_z,uint seed_w)
346 116 equemene
{
347 170 equemene
    int gid=get_global_id(0);
348 170 equemene
    uint zmwc=seed_z+gid;
349 170 equemene
    uint wmwc=seed_w-gid;
350 170 equemene
    private MYFLOAT Heat;
351 170 equemene

352 170 equemene
    for (int i=0;i<gid;i++)
353 170 equemene
    {
354 170 equemene
        Heat=MWCfp;
355 170 equemene
    }
356 137 equemene

357 170 equemene
    clDataX[gid]=(MYFLOAT4)((MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,0.e0f);
358 170 equemene

359 170 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
360 116 equemene
}
361 116 equemene

362 160 equemene
__kernel void SplutterStress(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
363 139 equemene
{
364 139 equemene
    int gid = get_global_id(0);
365 142 equemene
    MYFLOAT N = (MYFLOAT)get_global_size(0);
366 170 equemene
    uint zmwc=seed_z+(uint)gid;
367 170 equemene
    uint wmwc=seed_w-(uint)gid;
368 178 equemene
    MYFLOAT4 CrossVector,SpeedVector,FromCoM;
369 178 equemene
    MYFLOAT Heat,ThetaA,PhiA,ThetaB,PhiB,Length,tA,tB,Polar;
370 139 equemene

371 178 equemene
    for (int i=0;i<gid;i++)
372 178 equemene
    {
373 178 equemene
        Heat=MWCfp;
374 178 equemene
    }
375 178 equemene

376 178 equemene
    // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
377 178 equemene
    // Implemention on AMD Oland are probably broken in float
378 178 equemene

379 178 equemene
    FromCoM=(MYFLOAT4)(clDataX[gid]-clCoM[0]);
380 178 equemene
    Length=length(FromCoM);
381 178 equemene
    //Theta=acos(FromCoM.z/Length);
382 178 equemene
    //Phi=atan(FromCoM.y/FromCoM.x);
383 178 equemene
    // First tangential vector to sphere of length radius
384 178 equemene
    ThetaA=acos(FromCoM.x/Length)+5.e-1f*PI;
385 178 equemene
    PhiA=atan(FromCoM.y/FromCoM.z);
386 178 equemene
    // Second tangential vector to sphere of length radius
387 179 equemene
    ThetaB=acos((float)(FromCoM.x/Length));
388 179 equemene
    PhiB=atan((float)(FromCoM.y/FromCoM.z))+5.e-1f*PI;
389 178 equemene
    // (x,y) random coordonates to plane tangential to sphere
390 178 equemene
    Polar=MWCfp*2.e0f*PI;
391 179 equemene
    tA=cos((float)Polar);
392 179 equemene
    tB=sin((float)Polar);
393 178 equemene

394 178 equemene
    // Exception for 2 particules to ovoid shifting
395 171 equemene
    if (get_global_size(0)==2) {
396 171 equemene
       CrossVector=(MYFLOAT4)(1.e0f,1.e0f,1.e0f,0.e0f);
397 171 equemene
    } else {
398 179 equemene
       CrossVector.s0=tA*cos((float)ThetaA)+tB*cos((float)ThetaB);
399 179 equemene
       CrossVector.s1=tA*sin((float)ThetaA)*sin((float)PhiA)+tB*sin((float)ThetaB)*sin((float)PhiB);
400 179 equemene
       CrossVector.s2=tA*sin((float)ThetaA)*cos((float)PhiA)+tB*sin((float)ThetaB)*cos((float)PhiB);
401 178 equemene
       CrossVector.s3=0.e0f;
402 171 equemene
    }
403 171 equemene

404 139 equemene
    if (velocity<SMALL_NUM) {
405 178 equemene
       SpeedVector=(MYFLOAT4)normalize(cross(FromCoM,CrossVector))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
406 139 equemene
    }
407 139 equemene
    else
408 178 equemene
    {
409 170 equemene

410 176 equemene
       SpeedVector=(MYFLOAT4)((MWCfp-5e-1f)*velocity,(MWCfp-5e-1f)*velocity,
411 176 equemene
                              (MWCfp-5e-1f)*velocity,0.e0f);
412 139 equemene
    }
413 170 equemene
    clDataV[gid]=SpeedVector;
414 170 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
415 139 equemene
}
416 139 equemene

417 160 equemene
__kernel void RungeKutta(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
418 116 equemene
{
419 170 equemene
    private int gid = get_global_id(0);
420 170 equemene
    private MYFLOAT8 clDataGid;
421 116 equemene

422 170 equemene
    clDataGid=AtomicRungeKutta(clDataX,clDataV,gid,h);
423 116 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
424 170 equemene
    clDataX[gid]=clDataGid.s0123;
425 170 equemene
    clDataV[gid]=clDataGid.s4567;
426 116 equemene
}
427 116 equemene

428 170 equemene
__kernel void Heun(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
429 116 equemene
{
430 170 equemene
    private int gid = get_global_id(0);
431 170 equemene
    private MYFLOAT8 clDataGid;
432 116 equemene

433 170 equemene
    clDataGid=AtomicHeun(clDataX,clDataV,gid,h);
434 116 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
435 170 equemene
    clDataX[gid]=clDataGid.s0123;
436 170 equemene
    clDataV[gid]=clDataGid.s4567;
437 116 equemene
}
438 133 equemene

439 170 equemene
__kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
440 141 equemene
{
441 170 equemene
    private int gid = get_global_id(0);
442 170 equemene
    private MYFLOAT8 clDataGid;
443 141 equemene

444 170 equemene
    clDataGid=AtomicImplicitEuler(clDataX,clDataV,gid,h);
445 141 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
446 170 equemene
    clDataX[gid]=clDataGid.s0123;
447 170 equemene
    clDataV[gid]=clDataGid.s4567;
448 141 equemene
}
449 141 equemene

450 160 equemene
__kernel void ExplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
451 140 equemene
{
452 170 equemene
    private int gid = get_global_id(0);
453 170 equemene
    private MYFLOAT8 clDataGid;
454 170 equemene

455 170 equemene
    clDataGid=AtomicExplicitEuler(clDataX,clDataV,gid,h);
456 140 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
457 170 equemene
    clDataX[gid]=clDataGid.s0123;
458 170 equemene
    clDataV[gid]=clDataGid.s4567;
459 140 equemene
}
460 139 equemene

461 160 equemene
__kernel void CoMPotential(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
462 133 equemene
{
463 133 equemene
    int gid = get_global_id(0);
464 133 equemene

465 160 equemene
    clPotential[gid]=PairPotential(clDataX[gid],clCoM[0]);
466 139 equemene
}
467 139 equemene

468 160 equemene
__kernel void Potential(__global MYFLOAT4* clDataX,__global MYFLOAT* clPotential)
469 139 equemene
{
470 139 equemene
    int gid = get_global_id(0);
471 139 equemene

472 155 equemene
    MYFLOAT potential=(MYFLOAT)0.e0f;
473 160 equemene
    MYFLOAT4 x=clDataX[gid];
474 133 equemene

475 133 equemene
    for (int i=0;i<get_global_size(0);i++)
476 133 equemene
    {
477 133 equemene
        if (gid != i)
478 160 equemene
        potential+=PairPotential(x,clDataX[i]);
479 133 equemene
    }
480 133 equemene

481 133 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
482 155 equemene
    clPotential[gid]=potential*(MYFLOAT)5.e-1f;
483 133 equemene
}
484 133 equemene

485 160 equemene
__kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int Size)
486 139 equemene
{
487 160 equemene
    MYFLOAT4 CoM=clDataX[0];
488 142 equemene

489 139 equemene
    for (int i=1;i<Size;i++)
490 139 equemene
    {
491 160 equemene
        CoM+=clDataX[i];
492 139 equemene
    }
493 142 equemene

494 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
495 170 equemene
    clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,0.e0f)/(MYFLOAT)Size;
496 139 equemene
}
497 139 equemene

498 160 equemene
__kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
499 133 equemene
{
500 133 equemene
    int gid = get_global_id(0);
501 133 equemene

502 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
503 160 equemene
    MYFLOAT d=(MYFLOAT)length(clDataV[gid]);
504 155 equemene
    clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)(d*d);
505 133 equemene
}
506 170 equemene

507 116 equemene
"""
508 116 equemene
509 170 equemene
def MainOpenCL(clDataX,clDataV,Step,Method):
510 170 equemene
    time_start=time.time()
511 170 equemene
    if Method=="RungeKutta":
512 170 equemene
        CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clDataX,clDataV,Step)
513 170 equemene
    elif Method=="ExplicitEuler":
514 170 equemene
        CLLaunch=MyRoutines.ExplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
515 170 equemene
    elif Method=="Heun":
516 170 equemene
        CLLaunch=MyRoutines.Heun(queue,(Number,1),None,clDataX,clDataV,Step)
517 170 equemene
    else:
518 170 equemene
        CLLaunch=MyRoutines.ImplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
519 170 equemene
    CLLaunch.wait()
520 170 equemene
    Elapsed=time.time()-time_start
521 170 equemene
    return(Elapsed)
522 170 equemene
523 170 equemene
def display(*args):
524 133 equemene
525 170 equemene
    global MyDataX,MyDataV,clDataX,clDataV,Step,Method,Number,Iterations,Durations,Verbose,SpeedRendering
526 170 equemene
527 170 equemene
    glClearColor(0.0, 0.0, 0.0, 0.0)
528 170 equemene
    glClear(GL_COLOR_BUFFER_BIT)
529 170 equemene
    glColor3f(1.0,1.0,1.0)
530 170 equemene
531 170 equemene
    Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
532 170 equemene
    if SpeedRendering:
533 170 equemene
        cl.enqueue_copy(queue, MyDataV, clDataV)
534 170 equemene
        MyDataV.reshape(Number,4)[:,3]=1
535 170 equemene
        glVertexPointerf(MyDataV.reshape(Number,4))
536 170 equemene
    else:
537 170 equemene
        cl.enqueue_copy(queue, MyDataX, clDataX)
538 170 equemene
        MyDataX.reshape(Number,4)[:,3]=1
539 170 equemene
        glVertexPointerf(MyDataX.reshape(Number,4))
540 170 equemene
541 170 equemene
    if Verbose:
542 170 equemene
        print("Positions for #%s iteration: %s" % (Iterations,MyDataX))
543 170 equemene
    else:
544 170 equemene
        sys.stdout.write('.')
545 170 equemene
        sys.stdout.flush()
546 170 equemene
    Durations=np.append(Durations,MainOpenCL(clDataX,clDataV,Step,Method))
547 170 equemene
    glEnableClientState(GL_VERTEX_ARRAY)
548 170 equemene
    glDrawArrays(GL_POINTS, 0, Number)
549 170 equemene
    glDisableClientState(GL_VERTEX_ARRAY)
550 170 equemene
    glFlush()
551 170 equemene
    Iterations+=1
552 170 equemene
    glutSwapBuffers()
553 170 equemene
554 170 equemene
def halt():
555 170 equemene
    pass
556 170 equemene
557 170 equemene
def keyboard(k,x,y):
558 174 equemene
    global ViewRZ,SpeedRendering
559 170 equemene
    LC_Z = as_8_bit( 'z' )
560 170 equemene
    UC_Z = as_8_bit( 'Z' )
561 170 equemene
    Plus = as_8_bit( '+' )
562 170 equemene
    Minus = as_8_bit( '-' )
563 170 equemene
    Switch = as_8_bit( 's' )
564 170 equemene
565 170 equemene
    Zoom=1
566 175 equemene
    if k == LC_Z:
567 170 equemene
        ViewRZ += 1.0
568 170 equemene
    elif k == UC_Z:
569 170 equemene
        ViewRZ -= 1.0
570 170 equemene
    elif k == Plus:
571 170 equemene
        Zoom *= 2.0
572 170 equemene
    elif k == Minus:
573 170 equemene
        Zoom /= 2.0
574 170 equemene
    elif k == Switch:
575 170 equemene
        if SpeedRendering:
576 170 equemene
            SpeedRendering=False
577 170 equemene
        else:
578 170 equemene
            SpeedRendering=True
579 170 equemene
    elif ord(k) == 27: # Escape
580 170 equemene
        glutLeaveMainLoop()
581 170 equemene
        return(False)
582 170 equemene
    else:
583 170 equemene
        return
584 170 equemene
    glRotatef(ViewRZ, 0.0, 0.0, 1.0)
585 170 equemene
    glScalef(Zoom,Zoom,Zoom)
586 170 equemene
    glutPostRedisplay()
587 170 equemene
588 170 equemene
def special(k,x,y):
589 174 equemene
    global ViewRX, ViewRY
590 170 equemene
591 178 equemene
    Step=1.
592 170 equemene
    if k == GLUT_KEY_UP:
593 178 equemene
        ViewRX += Step
594 170 equemene
    elif k == GLUT_KEY_DOWN:
595 178 equemene
        ViewRX -= Step
596 170 equemene
    elif k == GLUT_KEY_LEFT:
597 178 equemene
        ViewRY += Step
598 170 equemene
    elif k == GLUT_KEY_RIGHT:
599 178 equemene
        ViewRY -= Step
600 170 equemene
    else:
601 170 equemene
        return
602 170 equemene
    glRotatef(ViewRX, 1.0, 0.0, 0.0)
603 170 equemene
    glRotatef(ViewRY, 0.0, 1.0, 0.0)
604 170 equemene
    glutPostRedisplay()
605 170 equemene
606 170 equemene
def setup_viewport():
607 170 equemene
    global SizeOfBox
608 170 equemene
    glMatrixMode(GL_PROJECTION)
609 170 equemene
    glLoadIdentity()
610 170 equemene
    glOrtho(-SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox)
611 170 equemene
    glutPostRedisplay()
612 170 equemene
613 170 equemene
def reshape(w, h):
614 170 equemene
    glViewport(0, 0, w, h)
615 170 equemene
    setup_viewport()
616 170 equemene
617 116 equemene
if __name__=='__main__':
618 170 equemene
619 170 equemene
    global Number,Step,clDataX,clDataV,MyDataX,MyDataV,Method,SizeOfBox,Iterations,Verbose,Durations
620 116 equemene
621 132 equemene
    # ValueType
622 132 equemene
    ValueType='FP32'
623 132 equemene
    class MyFloat(np.float32):pass
624 160 equemene
    #    clType8=cl_array.vec.float8
625 116 equemene
    # Set defaults values
626 118 equemene
    np.set_printoptions(precision=2)
627 116 equemene
    # Id of Device : 1 is for first find !
628 160 equemene
    Device=0
629 170 equemene
    # Number of bodies is integer
630 160 equemene
    Number=2
631 170 equemene
    # Number of iterations (for standalone execution)
632 170 equemene
    Iterations=10
633 170 equemene
    # Size of shape
634 170 equemene
    SizeOfShape=MyFloat(1.)
635 116 equemene
    # Initial velocity of particules
636 132 equemene
    Velocity=MyFloat(1.)
637 116 equemene
    # Step
638 170 equemene
    Step=MyFloat(1./32)
639 121 equemene
    # Method of integration
640 150 equemene
    Method='ImplicitEuler'
641 132 equemene
    # InitialRandom
642 132 equemene
    InitialRandom=False
643 132 equemene
    # RNG Marsaglia Method
644 132 equemene
    RNG='MWC'
645 139 equemene
    # Viriel Distribution of stress
646 139 equemene
    VirielStress=True
647 170 equemene
    # Verbose
648 170 equemene
    Verbose=False
649 170 equemene
    # OpenGL real time rendering
650 170 equemene
    OpenGL=False
651 170 equemene
    # Speed rendering
652 170 equemene
    SpeedRendering=False
653 171 equemene
    # Counter ArtEvasions Measures (artefact evasion)
654 171 equemene
    CoArEv='None'
655 170 equemene
    # Shape to distribute
656 170 equemene
    Shape='Ball'
657 170 equemene
    # Type of Interaction
658 172 equemene
    InterType='Force'
659 132 equemene
660 175 equemene
    HowToUse='%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [Potential] -x <None|NegExp|CorRad> -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
661 116 equemene
662 116 equemene
    try:
663 175 equemene
        opts, args = getopt.getopt(sys.argv[1:],"rpgehod:n:i:z:v:s:m:t:b:x:",["random","potential","coarev","opengl","viriel","verbose","device=","number=","iterations=","size=","velocity=","step=","method=","valuetype=","shape="])
664 116 equemene
    except getopt.GetoptError:
665 128 equemene
        print(HowToUse % sys.argv[0])
666 116 equemene
        sys.exit(2)
667 116 equemene
668 116 equemene
    for opt, arg in opts:
669 116 equemene
        if opt == '-h':
670 128 equemene
            print(HowToUse % sys.argv[0])
671 116 equemene
672 128 equemene
            print("\nInformations about devices detected under OpenCL:")
673 116 equemene
            try:
674 132 equemene
                Id=0
675 116 equemene
                for platform in cl.get_platforms():
676 116 equemene
                    for device in platform.get_devices():
677 170 equemene
                        # Failed now because of POCL implementation
678 137 equemene
                        #deviceType=cl.device_type.to_string(device.type)
679 149 equemene
                        deviceType="xPU"
680 128 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
681 116 equemene
                        Id=Id+1
682 116 equemene
                sys.exit()
683 116 equemene
            except ImportError:
684 128 equemene
                print("Your platform does not seem to support OpenCL")
685 116 equemene
                sys.exit()
686 116 equemene
687 132 equemene
        elif opt in ("-t", "--valuetype"):
688 132 equemene
            if arg=='FP64':
689 132 equemene
                class MyFloat(np.float64): pass
690 132 equemene
            else:
691 132 equemene
                class MyFloat(np.float32):pass
692 132 equemene
            ValueType = arg
693 116 equemene
        elif opt in ("-d", "--device"):
694 116 equemene
            Device=int(arg)
695 121 equemene
        elif opt in ("-m", "--method"):
696 121 equemene
            Method=arg
697 170 equemene
        elif opt in ("-b", "--shape"):
698 170 equemene
            Shape=arg
699 175 equemene
            if Shape!='Ball' or Shape!='Box':
700 175 equemene
                print('Wrong argument: set to Ball')
701 116 equemene
        elif opt in ("-n", "--number"):
702 116 equemene
            Number=int(arg)
703 170 equemene
        elif opt in ("-i", "--iterations"):
704 170 equemene
            Iterations=int(arg)
705 116 equemene
        elif opt in ("-z", "--size"):
706 170 equemene
            SizeOfShape=MyFloat(arg)
707 116 equemene
        elif opt in ("-v", "--velocity"):
708 132 equemene
            Velocity=MyFloat(arg)
709 139 equemene
            VirielStress=False
710 116 equemene
        elif opt in ("-s", "--step"):
711 132 equemene
            Step=MyFloat(arg)
712 132 equemene
        elif opt in ("-r", "--random"):
713 132 equemene
            InitialRandom=True
714 133 equemene
        elif opt in ("-c", "--check"):
715 133 equemene
            CheckEnergies=True
716 139 equemene
        elif opt in ("-e", "--viriel"):
717 139 equemene
            VirielStress=True
718 170 equemene
        elif opt in ("-g", "--opengl"):
719 170 equemene
            OpenGL=True
720 172 equemene
        elif opt in ("-p", "--potential"):
721 172 equemene
            InterType='Potential'
722 175 equemene
        elif opt in ("-x", "--coarev"):
723 175 equemene
            CoArEv=arg
724 170 equemene
        elif opt in ("-o", "--verbose"):
725 170 equemene
            Verbose=True
726 170 equemene
727 175 equemene
    SizeOfShape=np.sqrt(MyFloat(SizeOfShape*Number))
728 132 equemene
    Velocity=MyFloat(Velocity)
729 132 equemene
    Step=MyFloat(Step)
730 132 equemene
731 128 equemene
    print("Device choosed : %s" % Device)
732 128 equemene
    print("Number of particules : %s" % Number)
733 170 equemene
    print("Size of Shape : %s" % SizeOfShape)
734 160 equemene
    print("Initial velocity : %s" % Velocity)
735 170 equemene
    print("Step of iteration : %s" % Step)
736 160 equemene
    print("Number of iterations : %s" % Iterations)
737 160 equemene
    print("Method of resolution : %s" % Method)
738 160 equemene
    print("Initial Random for RNG Seed : %s" % InitialRandom)
739 160 equemene
    print("ValueType is : %s" % ValueType)
740 170 equemene
    print("Viriel distribution of stress : %s" % VirielStress)
741 170 equemene
    print("OpenGL real time rendering : %s" % OpenGL)
742 170 equemene
    print("Speed rendering : %s" % SpeedRendering)
743 170 equemene
    print("Interaction type : %s" % InterType)
744 171 equemene
    print("Counter Artevasion type : %s" % CoArEv)
745 116 equemene
746 132 equemene
    # Create Numpy array of CL vector with 8 FP32
747 170 equemene
    MyCoM = np.zeros(4,dtype=MyFloat)
748 170 equemene
    MyDataX = np.zeros(Number*4, dtype=MyFloat)
749 170 equemene
    MyDataV = np.zeros(Number*4, dtype=MyFloat)
750 133 equemene
    MyPotential = np.zeros(Number, dtype=MyFloat)
751 133 equemene
    MyKinetic = np.zeros(Number, dtype=MyFloat)
752 132 equemene
753 171 equemene
    Marsaglia,Computing,Interaction,Artevasion=DictionariesAPI()
754 132 equemene
755 132 equemene
    # Scan the OpenCL arrays
756 132 equemene
    Id=0
757 116 equemene
    HasXPU=False
758 116 equemene
    for platform in cl.get_platforms():
759 116 equemene
        for device in platform.get_devices():
760 116 equemene
            if Id==Device:
761 116 equemene
                PlatForm=platform
762 116 equemene
                XPU=device
763 128 equemene
                print("CPU/GPU selected: ",device.name.lstrip())
764 151 equemene
                print("Platform selected: ",platform.name)
765 116 equemene
                HasXPU=True
766 116 equemene
            Id+=1
767 116 equemene
768 116 equemene
    if HasXPU==False:
769 128 equemene
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
770 116 equemene
        sys.exit()
771 116 equemene
772 132 equemene
    # Create Context
773 116 equemene
    try:
774 116 equemene
        ctx = cl.Context([XPU])
775 116 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
776 116 equemene
    except:
777 128 equemene
        print("Crash during context creation")
778 116 equemene
779 132 equemene
    # Build all routines used for the computing
780 170 equemene
781 170 equemene
    #BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
782 171 equemene
    BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i -DARTEVASION=%i" % (Marsaglia[RNG],Computing[ValueType],Interaction[InterType],Artevasion[CoArEv])
783 170 equemene
784 170 equemene
    if 'Intel' in PlatForm.name or 'Experimental' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name :
785 151 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
786 151 equemene
    else:
787 151 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions+" -cl-strict-aliasing")
788 170 equemene
789 170 equemene
    mf = cl.mem_flags
790 170 equemene
    # Read/Write approach for buffering
791 170 equemene
    clDataX = cl.Buffer(ctx, mf.READ_WRITE, MyDataX.nbytes)
792 170 equemene
    clDataV = cl.Buffer(ctx, mf.READ_WRITE, MyDataV.nbytes)
793 170 equemene
    clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
794 170 equemene
    clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
795 170 equemene
    clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
796 160 equemene
797 170 equemene
    # Write/HostPointer approach for buffering
798 170 equemene
    # clDataX = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataX)
799 170 equemene
    # clDataV = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataV)
800 170 equemene
    # clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential)
801 170 equemene
    # clKinetic = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyKinetic)
802 170 equemene
    # clCoM = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyCoM)
803 116 equemene
804 134 equemene
    print('All particles superimposed.')
805 116 equemene
806 132 equemene
    # Set particles to RNG points
807 132 equemene
    if InitialRandom:
808 170 equemene
        seed_w=np.uint32(nprnd(2**32))
809 170 equemene
        seed_z=np.uint32(nprnd(2**32))
810 132 equemene
    else:
811 170 equemene
        seed_w=np.uint32(19710211)
812 170 equemene
        seed_z=np.uint32(20081010)
813 170 equemene
814 170 equemene
    if Shape=='Ball':
815 170 equemene
        MyRoutines.InBallSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
816 170 equemene
    else:
817 170 equemene
        MyRoutines.InBoxSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
818 116 equemene
819 132 equemene
    print('All particules distributed')
820 139 equemene
821 160 equemene
    CLLaunch=MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
822 160 equemene
    CLLaunch.wait()
823 142 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
824 170 equemene
    print('Center Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
825 139 equemene
826 139 equemene
    if VirielStress:
827 160 equemene
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,MyFloat(0.),np.uint32(110271),np.uint32(250173))
828 139 equemene
    else:
829 160 equemene
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,Velocity,np.uint32(110271),np.uint32(250173))
830 160 equemene
    CLLaunch.wait()
831 139 equemene
832 170 equemene
    print('All particules stressed')
833 170 equemene
834 160 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
835 160 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
836 133 equemene
    CLLaunch.wait()
837 141 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
838 141 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
839 170 equemene
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
840 116 equemene
841 170 equemene
    if SpeedRendering:
842 170 equemene
        SizeOfBox=max(2*MyKinetic)
843 170 equemene
    else:
844 170 equemene
        SizeOfBox=SizeOfShape
845 170 equemene
846 174 equemene
    if OpenGL:
847 174 equemene
        print('\tTiny documentation to interact OpenGL rendering:\n')
848 174 equemene
        print('\t<Left|Right> Rotate around X axis')
849 174 equemene
        print('\t  <Up|Down>  Rotate around Y axis')
850 174 equemene
        print('\t   <z|Z>     Rotate around Z axis')
851 174 equemene
        print('\t   <-|+>     Unzoom/Zoom')
852 174 equemene
        print('\t    <s>      Toggle to display Positions or Velocities')
853 174 equemene
        print('\t   <Esc>     Quit\n')
854 174 equemene
855 170 equemene
    wall_time_start=time.time()
856 160 equemene
857 170 equemene
    Durations=np.array([],dtype=MyFloat)
858 170 equemene
    print('Starting!')
859 170 equemene
    if OpenGL:
860 170 equemene
        global ViewRX,ViewRY,ViewRZ
861 170 equemene
        Iterations=0
862 170 equemene
        ViewRX,ViewRY,ViewRZ = 0.,0.,0.
863 170 equemene
        # Launch OpenGL Loop
864 170 equemene
        glutInit(sys.argv)
865 170 equemene
        glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB)
866 170 equemene
        glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE,GLUT_ACTION_CONTINUE_EXECUTION)
867 170 equemene
        glutInitWindowSize(512,512)
868 170 equemene
        glutCreateWindow(b'NBodyGL')
869 170 equemene
        setup_viewport()
870 170 equemene
        glutReshapeFunc(reshape)
871 170 equemene
        glutDisplayFunc(display)
872 170 equemene
        glutIdleFunc(display)
873 170 equemene
        #   glutMouseFunc(mouse)
874 170 equemene
        glutSpecialFunc(special)
875 170 equemene
        glutKeyboardFunc(keyboard)
876 170 equemene
        glutMainLoop()
877 170 equemene
    else:
878 170 equemene
        for iteration in range(Iterations):
879 170 equemene
            Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
880 170 equemene
            if Verbose:
881 170 equemene
                # print("Duration of #%s iteration: %s" % (iteration,Elapsed))
882 170 equemene
                cl.enqueue_copy(queue, MyDataX, clDataX)
883 170 equemene
                print("Positions for #%s iteration: %s" % (iteration,MyDataX))
884 170 equemene
            else:
885 170 equemene
                sys.stdout.write('.')
886 170 equemene
                sys.stdout.flush()
887 170 equemene
            Durations=np.append(Durations,Elapsed)
888 133 equemene
889 170 equemene
    print('\nEnding!')
890 139 equemene
891 160 equemene
    MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
892 160 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
893 160 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
894 141 equemene
    CLLaunch.wait()
895 160 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
896 141 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
897 141 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
898 170 equemene
    print('\nCenter Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
899 170 equemene
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
900 135 equemene
901 170 equemene
    print("Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n" % (Device,Iterations,np.mean(Durations),np.median(Durations),np.std(Durations),np.min(Durations),np.max(Durations),np.std(Durations)/np.median(Durations)))
902 170 equemene
903 171 equemene
    # FPS: 1/Elapsed
904 171 equemene
    FPS=np.ones(len(Durations))
905 171 equemene
    FPS/=Durations
906 171 equemene
907 171 equemene
    print("FPS stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" % (Device,Iterations,np.mean(FPS),np.median(FPS),np.std(FPS),np.min(FPS),np.max(FPS)))
908 171 equemene
909 170 equemene
    # Contraction of Square*Size*Hertz: Size*Size/Elapsed
910 170 equemene
    Squertz=np.ones(len(Durations))
911 170 equemene
    Squertz*=Number*Number
912 170 equemene
    Squertz/=Durations
913 170 equemene
914 172 equemene
    print("Squertz in log10 & complete stats on device %s with %s iterations :\n\tMean:\t%s\t%s\n\tMedian:\t%s\t%s\n\tStddev:\t%s\t%s\n\tMin:\t%s\t%s\n\tMax:\t%s\t%s\n" % (Device,Iterations,np.log10(np.mean(Squertz)),np.mean(Squertz),np.log10(np.median(Squertz)),np.median(Squertz),np.log10(np.std(Squertz)),np.std(Squertz),np.log10(np.min(Squertz)),np.min(Squertz),np.log10(np.max(Squertz)),np.max(Squertz)))
915 170 equemene
916 160 equemene
    clDataX.release()
917 160 equemene
    clDataV.release()
918 133 equemene
    clKinetic.release()
919 133 equemene
    clPotential.release()