Statistiques
| Révision :

root / NBody / NBodyGL.py @ 171

Historique | Voir | Annoter | Télécharger (24,98 ko)

1 161 equemene
#!/usr/bin/env python3
2 161 equemene
# -*- coding: utf-8 -*-
3 161 equemene
"""
4 161 equemene
Demonstrateur OpenCL d'interaction NCorps
5 161 equemene

6 161 equemene
Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> CeCILLv2
7 161 equemene
"""
8 161 equemene
import getopt
9 161 equemene
import sys
10 161 equemene
import time
11 161 equemene
import numpy as np
12 161 equemene
import pyopencl as cl
13 161 equemene
import pyopencl.array as cl_array
14 161 equemene
from numpy.random import randint as nprnd
15 161 equemene
import string, sys
16 161 equemene
from OpenGL.GL import *
17 161 equemene
from OpenGL.GLUT import *
18 161 equemene
19 161 equemene
def DictionariesAPI():
20 161 equemene
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
21 161 equemene
    Computing={'FP32':0,'FP64':1}
22 161 equemene
    return(Marsaglia,Computing)
23 161 equemene
24 166 equemene
25 167 equemene
26 167 equemene
27 167 equemene
28 167 equemene
29 167 equemene
30 167 equemene
31 167 equemene
32 167 equemene
33 167 equemene
34 167 equemene
35 167 equemene
36 167 equemene
37 167 equemene
38 167 equemene
39 167 equemene
40 167 equemene
41 167 equemene
42 167 equemene
43 167 equemene
44 167 equemene
45 167 equemene
46 167 equemene
47 167 equemene
48 167 equemene
49 167 equemene
50 167 equemene
51 167 equemene
52 167 equemene
53 167 equemene
54 167 equemene
55 167 equemene
56 167 equemene
57 167 equemene
58 167 equemene
59 167 equemene
60 167 equemene
61 167 equemene
62 167 equemene
63 167 equemene
64 167 equemene
65 167 equemene
66 167 equemene
67 167 equemene
68 167 equemene
69 167 equemene
70 167 equemene
71 167 equemene
72 167 equemene
73 167 equemene
74 167 equemene
75 167 equemene
76 167 equemene
77 167 equemene
78 167 equemene
79 167 equemene
80 167 equemene
81 167 equemene
82 167 equemene
83 167 equemene
84 167 equemene
85 167 equemene
86 167 equemene
87 161 equemene
BlobOpenCL= """
88 167 equemene
#define TFP32 0
89 167 equemene
#define TFP64 1
90 167 equemene

91 166 equemene
#if TYPE == TFP32
92 166 equemene
#define MYFLOAT4 float4
93 166 equemene
#define MYFLOAT8 float8
94 166 equemene
#define MYFLOAT float
95 166 equemene
#define DISTANCE fast_distance
96 166 equemene
#else
97 166 equemene
#define MYFLOAT4 double4
98 166 equemene
#define MYFLOAT8 double8
99 166 equemene
#define MYFLOAT double
100 166 equemene
#define DISTANCE distance
101 166 equemene
#if defined(cl_khr_fp64)  // Khronos extension available?
102 166 equemene
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
103 166 equemene
#endif
104 166 equemene
#endif
105 166 equemene

106 166 equemene

107 166 equemene
#define znew  ((zmwc=36969*(zmwc&65535)+(zmwc>>16))<<16)
108 166 equemene
#define wnew  ((wmwc=18000*(wmwc&65535)+(wmwc>>16))&65535)
109 161 equemene
#define MWC   (znew+wnew)
110 161 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
111 161 equemene
#define CONG  (jcong=69069*jcong+1234567)
112 161 equemene
#define KISS  ((MWC^CONG)+SHR3)
113 161 equemene

114 161 equemene

115 166 equemene
#define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f)
116 166 equemene
#define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f)
117 166 equemene
#define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f)
118 166 equemene
#define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f)
119 161 equemene

120 166 equemene
#define PI (MYFLOAT)3.141592653589793238462643197169399375105820974944592307816406286e0f
121 161 equemene

122 166 equemene
#define SMALL_NUM (MYFLOAT)1.e-9f
123 161 equemene

124 161 equemene
#define LENGTH 1.e0f
125 161 equemene

126 166 equemene
// Create my own Distance implementation: distance buggy on Oland AMD chipset
127 161 equemene

128 166 equemene
MYFLOAT MyDistance(MYFLOAT4 n,MYFLOAT4 m)
129 166 equemene
{
130 166 equemene
    private MYFLOAT x2,y2,z2;
131 166 equemene
    x2=n.s0-m.s0;
132 166 equemene
    x2*=x2;
133 166 equemene
    y2=n.s1-m.s1;
134 166 equemene
    y2*=y2;
135 166 equemene
    z2=n.s2-m.s2;
136 166 equemene
    z2*=z2;
137 166 equemene
    return(sqrt(x2+y2+z2));
138 166 equemene
}
139 161 equemene

140 170 equemene
// MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
141 170 equemene
// {
142 170 equemene
//     private MYFLOAT r=MyDistance((MYFLOAT4)n,(MYFLOAT4)m);
143 170 equemene
//
144 170 equemene
//     return(((MYFLOAT4)n-(MYFLOAT4)m)/(MYFLOAT)(r*r*r));
145 170 equemene
// }
146 170 equemene

147 161 equemene
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
148 161 equemene
{
149 166 equemene
    private MYFLOAT r=MyDistance((MYFLOAT4)n,(MYFLOAT4)m);
150 170 equemene
    private MYFLOAT r2=r*r;
151 170 equemene
    private MYFLOAT c=1.e0f/(MYFLOAT)get_global_size(0);
152 161 equemene

153 170 equemene
    return(((MYFLOAT4)n-(MYFLOAT4)m)*(MYFLOAT)(1.e0f-exp(-c*r2))/(MYFLOAT)(r*r2));
154 161 equemene
}
155 161 equemene

156 161 equemene
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
157 161 equemene
{
158 161 equemene
    return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
159 167 equemene

160 167 equemene
//    return((MYFLOAT)(-1.e0f)/(MyDistance(n,m)));
161 161 equemene
}
162 161 equemene

163 161 equemene
MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
164 161 equemene
{
165 161 equemene
    private MYFLOAT potential=(MYFLOAT)0.e0f;
166 161 equemene
    private MYFLOAT4 x=clDataX[gid];
167 161 equemene

168 161 equemene
    for (int i=0;i<get_global_size(0);i++)
169 161 equemene
    {
170 161 equemene
        if (gid != i)
171 161 equemene
        potential+=PairPotential(x,clDataX[i]);
172 161 equemene
    }
173 161 equemene

174 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
175 161 equemene
    return(potential);
176 161 equemene
}
177 161 equemene

178 161 equemene
MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid)
179 161 equemene
{
180 161 equemene
    return(PairPotential(clDataX[gid],clCoM[0]));
181 161 equemene
}
182 161 equemene

183 166 equemene
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
184 166 equemene

185 161 equemene
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
186 161 equemene
{
187 161 equemene
    private MYFLOAT4 a0,v0,x0,a1,v1,x1,a2,v2,x2,a3,v3,x3,a4,v4,x4,xf,vf;
188 166 equemene
    MYFLOAT4 DT=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
189 161 equemene

190 161 equemene
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
191 161 equemene
    v0=(MYFLOAT4)clDataInV[gid];
192 161 equemene
    x0=(MYFLOAT4)clDataInX[gid];
193 161 equemene
    int N = get_global_size(0);
194 161 equemene

195 166 equemene
    for (private int i=0;i<N;i++)
196 161 equemene
    {
197 161 equemene
        if (gid != i)
198 161 equemene
        a0+=Interaction(x0,clDataInX[i]);
199 161 equemene
    }
200 161 equemene

201 161 equemene
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
202 166 equemene
    v1=a0*dt+v0;
203 166 equemene
    x1=v0*dt+x0;
204 166 equemene
    for (private int j=0;j<N;j++)
205 161 equemene
    {
206 166 equemene
        if (gid != j)
207 166 equemene
        a1+=Interaction(x1,clDataInX[j]);
208 161 equemene
    }
209 161 equemene

210 161 equemene
    a2=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
211 166 equemene
    v2=a1*(MYFLOAT)(dt/2.e0f)+v0;
212 166 equemene
    x2=v1*(MYFLOAT)(dt/2.e0f)+x0;
213 166 equemene
    for (private int k=0;k<N;k++)
214 161 equemene
    {
215 166 equemene
        if (gid != k)
216 166 equemene
        a2+=Interaction(x2,clDataInX[k]);
217 161 equemene
    }
218 161 equemene

219 161 equemene
    a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
220 166 equemene
    v3=a2*(MYFLOAT)(dt/2.e0f)+v0;
221 166 equemene
    x3=v2*(MYFLOAT)(dt/2.e0f)+x0;
222 166 equemene
    for (private int l=0;l<N;l++)
223 161 equemene
    {
224 166 equemene
        if (gid != l)
225 166 equemene
        a3+=Interaction(x3,clDataInX[l]);
226 161 equemene
    }
227 161 equemene

228 161 equemene
    a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
229 166 equemene
    v4=a3*dt+v0;
230 166 equemene
    x4=v3*dt+x0;
231 166 equemene
    for (private int m=0;m<N;m++)
232 161 equemene
    {
233 166 equemene
        if (gid != m)
234 166 equemene
        a4+=Interaction(x4,clDataInX[m]);
235 161 equemene
    }
236 161 equemene

237 161 equemene
    xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
238 161 equemene
    vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
239 161 equemene

240 166 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
241 161 equemene
}
242 161 equemene

243 161 equemene
MYFLOAT8 AtomicHeun(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
244 161 equemene
{
245 166 equemene
    private MYFLOAT4 x0,v0,a0,x1,v1,a1,xf,vf;
246 167 equemene
    MYFLOAT4 Dt=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
247 161 equemene

248 166 equemene
    x0=(MYFLOAT4)clDataInX[gid];
249 166 equemene
    v0=(MYFLOAT4)clDataInV[gid];
250 166 equemene
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
251 161 equemene

252 166 equemene
    for (private int i=0;i<get_global_size(0);i++)
253 161 equemene
    {
254 161 equemene
        if (gid != i)
255 166 equemene
        a0+=Interaction(x0,clDataInX[i]);
256 161 equemene
    }
257 161 equemene

258 166 equemene
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
259 167 equemene
    //v1=v0+dt*a0;
260 167 equemene
    //x1=x0+dt*v0;
261 166 equemene
    v1=dt*a0+v0;
262 166 equemene
    x1=dt*v0+x0;
263 161 equemene

264 166 equemene
    for (private int j=0;j<get_global_size(0);j++)
265 161 equemene
    {
266 166 equemene
        if (gid != j)
267 166 equemene
        a1+=Interaction(x1,clDataInX[j]);
268 161 equemene
    }
269 161 equemene

270 166 equemene
    vf=v0+dt*(a0+a1)/(MYFLOAT)2.e0f;
271 166 equemene
    xf=x0+dt*(v0+v1)/(MYFLOAT)2.e0f;
272 161 equemene

273 166 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
274 161 equemene
}
275 161 equemene

276 161 equemene
MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
277 161 equemene
{
278 166 equemene
    MYFLOAT4 x0,v0,a,xf,vf;
279 161 equemene

280 166 equemene
    x0=(MYFLOAT4)clDataInX[gid];
281 166 equemene
    v0=(MYFLOAT4)clDataInV[gid];
282 161 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
283 161 equemene

284 166 equemene
    for (private int i=0;i<get_global_size(0);i++)
285 161 equemene
    {
286 161 equemene
        if (gid != i)
287 166 equemene
          a+=Interaction(x0,clDataInX[i]);
288 161 equemene
    }
289 161 equemene

290 166 equemene
    vf=v0+dt*a;
291 166 equemene
    xf=x0+dt*vf;
292 166 equemene

293 166 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
294 161 equemene
}
295 161 equemene

296 161 equemene
MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
297 161 equemene
{
298 166 equemene
    MYFLOAT4 x0,v0,a,xf,vf;
299 161 equemene

300 166 equemene
    x0=(MYFLOAT4)clDataInX[gid];
301 166 equemene
    v0=(MYFLOAT4)clDataInV[gid];
302 161 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
303 161 equemene

304 166 equemene
    for (private int i=0;i<get_global_size(0);i++)
305 161 equemene
    {
306 161 equemene
        if (gid != i)
307 166 equemene
        a+=Interaction(x0,clDataInX[i]);
308 161 equemene
    }
309 161 equemene

310 166 equemene
    vf=v0+dt*a;
311 166 equemene
    xf=x0+dt*v0;
312 161 equemene

313 166 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
314 161 equemene
}
315 161 equemene

316 168 equemene
__kernel void InBallSplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT radius,
317 161 equemene
                             uint seed_z,uint seed_w)
318 161 equemene
{
319 168 equemene
    private int gid=get_global_id(0);
320 168 equemene
    private uint zmwc=seed_z+gid;
321 168 equemene
    private uint wmwc=seed_w+(gid+1)%2;
322 168 equemene
    private MYFLOAT Heat,Radius,Theta,Phi,PosX,PosY,PosZ,SinTheta;
323 166 equemene

324 162 equemene
    for (int i=0;i<gid;i++)
325 162 equemene
    {
326 167 equemene
        Heat=MWCfp;
327 162 equemene
    }
328 166 equemene

329 167 equemene
// More accurate distribution based on spherical coordonates
330 168 equemene
// Disactivated because of AMD Oland GPU crash on launch
331 168 equemene
//     Radius=MWCfp*radius;
332 168 equemene
//     Theta=(MYFLOAT)acos((float)(-2.e0f*MWCfp+1.0e0f));
333 168 equemene
//     Phi=(MYFLOAT)(2.e0f*PI*MWCfp);
334 168 equemene
//     SinTheta=sin((float)Theta);
335 168 equemene
//     PosX=cos((float)Phi)*Radius*SinTheta;
336 168 equemene
//     PosY=sin((float)Phi)*Radius*SinTheta;
337 168 equemene
//     PosZ=cos((float)Theta)*Radius;
338 167 equemene
//     clDataX[gid]=(MYFLOAT4)(PosX,PosY,PosZ,0.e0f);
339 167 equemene

340 168 equemene
    MYFLOAT4 Position=(MYFLOAT4)((MWCfp-0.5e0f)*radius,(MWCfp-0.5e0f)*radius,(MWCfp-0.5e0f)*radius,0.e0f);
341 167 equemene
    MYFLOAT Length=(MYFLOAT)length((MYFLOAT4)Position);
342 168 equemene
    clDataX[gid]=Position/Length*fmod(radius,Length);
343 167 equemene

344 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
345 161 equemene
}
346 161 equemene

347 168 equemene
__kernel void InBoxSplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box,
348 168 equemene
                             uint seed_z,uint seed_w)
349 168 equemene
{
350 168 equemene
    int gid=get_global_id(0);
351 168 equemene
    uint zmwc=seed_z+gid;
352 168 equemene
    uint wmwc=seed_w-gid;
353 168 equemene
    private MYFLOAT Heat;
354 168 equemene

355 168 equemene
    for (int i=0;i<gid;i++)
356 168 equemene
    {
357 168 equemene
        Heat=MWCfp;
358 168 equemene
    }
359 168 equemene

360 168 equemene
    clDataX[gid]=(MYFLOAT4)((MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,0.e0f);
361 168 equemene

362 168 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
363 168 equemene
}
364 168 equemene

365 161 equemene
__kernel void SplutterStress(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
366 161 equemene
{
367 161 equemene
    int gid = get_global_id(0);
368 161 equemene
    MYFLOAT N = (MYFLOAT)get_global_size(0);
369 166 equemene
    uint zmwc=seed_z+(uint)gid;
370 166 equemene
    uint wmwc=seed_w-(uint)gid;
371 166 equemene
    MYFLOAT4 SpeedVector;
372 161 equemene

373 161 equemene
    if (velocity<SMALL_NUM) {
374 167 equemene
       SpeedVector=(MYFLOAT4)normalize(cross(clDataX[gid],clCoM[0]))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
375 161 equemene
    }
376 161 equemene
    else
377 161 equemene
    {
378 161 equemene
       // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
379 166 equemene
       // Implemention on AMD Oland are probably broken in float
380 166 equemene

381 166 equemene
       MYFLOAT theta=acos((float)(1.0e0f-2.e0f*MWCfp));
382 161 equemene
       MYFLOAT phi=MWCfp*PI*(MYFLOAT)2.e0f;
383 161 equemene
       MYFLOAT sinTheta=sin((float)theta);
384 166 equemene
       MYFLOAT sinPhi=sin((float)phi);
385 161 equemene

386 166 equemene
       SpeedVector=(MYFLOAT4)((MWCfp-0.5e0f)*velocity,(MWCfp-0.5e0f)*velocity,
387 166 equemene
                              (MWCfp-0.5e0f)*velocity,0.e0f);
388 161 equemene
    }
389 166 equemene
    clDataV[gid]=SpeedVector;
390 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
391 161 equemene
}
392 161 equemene

393 161 equemene
__kernel void RungeKutta(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
394 161 equemene
{
395 166 equemene
    private int gid = get_global_id(0);
396 166 equemene
    private MYFLOAT8 clDataGid;
397 161 equemene

398 166 equemene
    clDataGid=AtomicRungeKutta(clDataX,clDataV,gid,h);
399 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
400 166 equemene
    clDataX[gid]=clDataGid.s0123;
401 166 equemene
    clDataV[gid]=clDataGid.s4567;
402 161 equemene
}
403 161 equemene

404 166 equemene
__kernel void Heun(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
405 161 equemene
{
406 166 equemene
    private int gid = get_global_id(0);
407 166 equemene
    private MYFLOAT8 clDataGid;
408 161 equemene

409 166 equemene
    clDataGid=AtomicHeun(clDataX,clDataV,gid,h);
410 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
411 166 equemene
    clDataX[gid]=clDataGid.s0123;
412 166 equemene
    clDataV[gid]=clDataGid.s4567;
413 161 equemene
}
414 161 equemene

415 166 equemene
__kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
416 161 equemene
{
417 166 equemene
    private int gid = get_global_id(0);
418 166 equemene
    private MYFLOAT8 clDataGid;
419 161 equemene

420 166 equemene
    clDataGid=AtomicImplicitEuler(clDataX,clDataV,gid,h);
421 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
422 166 equemene
    clDataX[gid]=clDataGid.s0123;
423 166 equemene
    clDataV[gid]=clDataGid.s4567;
424 161 equemene
}
425 161 equemene

426 161 equemene
__kernel void ExplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
427 161 equemene
{
428 166 equemene
    private int gid = get_global_id(0);
429 166 equemene
    private MYFLOAT8 clDataGid;
430 166 equemene

431 166 equemene
    clDataGid=AtomicExplicitEuler(clDataX,clDataV,gid,h);
432 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
433 166 equemene
    clDataX[gid]=clDataGid.s0123;
434 166 equemene
    clDataV[gid]=clDataGid.s4567;
435 161 equemene
}
436 161 equemene

437 161 equemene
__kernel void CoMPotential(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
438 161 equemene
{
439 161 equemene
    int gid = get_global_id(0);
440 161 equemene

441 161 equemene
    clPotential[gid]=PairPotential(clDataX[gid],clCoM[0]);
442 161 equemene
}
443 161 equemene

444 161 equemene
__kernel void Potential(__global MYFLOAT4* clDataX,__global MYFLOAT* clPotential)
445 161 equemene
{
446 161 equemene
    int gid = get_global_id(0);
447 161 equemene

448 161 equemene
    MYFLOAT potential=(MYFLOAT)0.e0f;
449 161 equemene
    MYFLOAT4 x=clDataX[gid];
450 161 equemene

451 161 equemene
    for (int i=0;i<get_global_size(0);i++)
452 161 equemene
    {
453 161 equemene
        if (gid != i)
454 161 equemene
        potential+=PairPotential(x,clDataX[i]);
455 161 equemene
    }
456 161 equemene

457 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
458 161 equemene
    clPotential[gid]=potential*(MYFLOAT)5.e-1f;
459 161 equemene
}
460 161 equemene

461 161 equemene
__kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int Size)
462 161 equemene
{
463 161 equemene
    MYFLOAT4 CoM=clDataX[0];
464 161 equemene

465 161 equemene
    for (int i=1;i<Size;i++)
466 161 equemene
    {
467 161 equemene
        CoM+=clDataX[i];
468 161 equemene
    }
469 161 equemene

470 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
471 166 equemene
    clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,0.e0f)/(MYFLOAT)Size;
472 161 equemene
}
473 161 equemene

474 161 equemene
__kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
475 161 equemene
{
476 161 equemene
    int gid = get_global_id(0);
477 161 equemene

478 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
479 161 equemene
    MYFLOAT d=(MYFLOAT)length(clDataV[gid]);
480 161 equemene
    clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)(d*d);
481 161 equemene
}
482 166 equemene

483 161 equemene
"""
484 161 equemene
485 169 equemene
def MainOpenCL(clDataX,clDataV,Step,Method):
486 161 equemene
    time_start=time.time()
487 166 equemene
    if Method=="RungeKutta":
488 161 equemene
        CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clDataX,clDataV,Step)
489 161 equemene
    elif Method=="ExplicitEuler":
490 161 equemene
        CLLaunch=MyRoutines.ExplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
491 161 equemene
    elif Method=="Heun":
492 161 equemene
        CLLaunch=MyRoutines.Heun(queue,(Number,1),None,clDataX,clDataV,Step)
493 161 equemene
    else:
494 161 equemene
        CLLaunch=MyRoutines.ImplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
495 161 equemene
    CLLaunch.wait()
496 164 equemene
    Elapsed=time.time()-time_start
497 169 equemene
    return(Elapsed)
498 169 equemene
499 169 equemene
def display(*args):
500 169 equemene
501 169 equemene
    global MyDataX,MyDataV,clDataX,clDataV,Step,Method,Number,Iterations,Durations,Verbose,SpeedRendering
502 169 equemene
503 169 equemene
    glClearColor(0.0, 0.0, 0.0, 0.0)
504 169 equemene
    glClear(GL_COLOR_BUFFER_BIT)
505 169 equemene
    glColor3f(1.0,1.0,1.0)
506 169 equemene
507 169 equemene
    Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
508 169 equemene
    if SpeedRendering:
509 169 equemene
        cl.enqueue_copy(queue, MyDataV, clDataV)
510 169 equemene
        MyDataV.reshape(Number,4)[:,3]=1
511 169 equemene
        glVertexPointerf(MyDataV.reshape(Number,4))
512 169 equemene
    else:
513 169 equemene
        cl.enqueue_copy(queue, MyDataX, clDataX)
514 169 equemene
        MyDataX.reshape(Number,4)[:,3]=1
515 169 equemene
        glVertexPointerf(MyDataX.reshape(Number,4))
516 169 equemene
517 164 equemene
    if Verbose:
518 164 equemene
        print("Duration of #%s iteration: %s" % (Iterations,Elapsed))
519 164 equemene
    else:
520 164 equemene
        sys.stdout.write('.')
521 164 equemene
        sys.stdout.flush()
522 169 equemene
    Durations=np.append(Durations,MainOpenCL(clDataX,clDataV,Step,Method))
523 161 equemene
    glEnableClientState(GL_VERTEX_ARRAY)
524 161 equemene
    glDrawArrays(GL_POINTS, 0, Number)
525 161 equemene
    glDisableClientState(GL_VERTEX_ARRAY)
526 161 equemene
    glFlush()
527 162 equemene
    Iterations+=1
528 161 equemene
    glutSwapBuffers()
529 161 equemene
530 161 equemene
def halt():
531 161 equemene
    pass
532 161 equemene
533 164 equemene
def keyboard(k,x,y):
534 169 equemene
    global ViewRZ,SizeOfBox,SpeedRendering
535 162 equemene
    LC_Z = as_8_bit( 'z' )
536 162 equemene
    UC_Z = as_8_bit( 'Z' )
537 164 equemene
    Plus = as_8_bit( '+' )
538 164 equemene
    Minus = as_8_bit( '-' )
539 169 equemene
    Switch = as_8_bit( 's' )
540 161 equemene
541 164 equemene
    Zoom=1
542 162 equemene
    if k == LC_Z:
543 163 equemene
        ViewRZ += 1.0
544 162 equemene
    elif k == UC_Z:
545 163 equemene
        ViewRZ -= 1.0
546 164 equemene
    elif k == Plus:
547 164 equemene
        Zoom *= 2.0
548 164 equemene
    elif k == Minus:
549 164 equemene
        Zoom /= 2.0
550 169 equemene
    elif k == Switch:
551 169 equemene
        if SpeedRendering:
552 169 equemene
            SpeedRendering=False
553 169 equemene
        else:
554 169 equemene
            SpeedRendering=True
555 162 equemene
    elif ord(k) == 27: # Escape
556 162 equemene
        glutLeaveMainLoop()
557 162 equemene
        return(False)
558 162 equemene
    else:
559 162 equemene
        return
560 163 equemene
    glRotatef(ViewRZ, 0.0, 0.0, 1.0)
561 164 equemene
    glScalef(Zoom,Zoom,Zoom)
562 162 equemene
    glutPostRedisplay()
563 162 equemene
564 164 equemene
def special(k,x,y):
565 163 equemene
    global ViewRX, ViewRY, ViewRZ
566 162 equemene
567 162 equemene
    if k == GLUT_KEY_UP:
568 163 equemene
        ViewRX += 1.0
569 162 equemene
    elif k == GLUT_KEY_DOWN:
570 163 equemene
        ViewRX -= 1.0
571 162 equemene
    elif k == GLUT_KEY_LEFT:
572 163 equemene
        ViewRY += 1.0
573 162 equemene
    elif k == GLUT_KEY_RIGHT:
574 163 equemene
        ViewRY -= 1.0
575 162 equemene
    else:
576 162 equemene
        return
577 163 equemene
    glRotatef(ViewRX, 1.0, 0.0, 0.0)
578 163 equemene
    glRotatef(ViewRY, 0.0, 1.0, 0.0)
579 162 equemene
    glutPostRedisplay()
580 162 equemene
581 161 equemene
def setup_viewport():
582 161 equemene
    global SizeOfBox
583 161 equemene
    glMatrixMode(GL_PROJECTION)
584 161 equemene
    glLoadIdentity()
585 161 equemene
    glOrtho(-SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox)
586 164 equemene
    glutPostRedisplay()
587 162 equemene
588 161 equemene
def reshape(w, h):
589 161 equemene
    glViewport(0, 0, w, h)
590 161 equemene
    setup_viewport()
591 161 equemene
592 161 equemene
if __name__=='__main__':
593 161 equemene
594 168 equemene
    global Number,Step,clDataX,clDataV,MyDataX,MyDataV,Method,SizeOfBox,Iterations,Verbose,Durations
595 161 equemene
596 161 equemene
    # ValueType
597 161 equemene
    ValueType='FP32'
598 161 equemene
    class MyFloat(np.float32):pass
599 161 equemene
    #    clType8=cl_array.vec.float8
600 161 equemene
    # Set defaults values
601 161 equemene
    np.set_printoptions(precision=2)
602 161 equemene
    # Id of Device : 1 is for first find !
603 161 equemene
    Device=0
604 164 equemene
    # Number of bodies is integer
605 161 equemene
    Number=2
606 168 equemene
    # Number of iterations (for standalone execution)
607 168 equemene
    Iterations=100
608 168 equemene
    # Size of shape
609 168 equemene
    SizeOfShape=MyFloat(1.)
610 161 equemene
    # Initial velocity of particules
611 161 equemene
    Velocity=MyFloat(1.)
612 161 equemene
    # Step
613 168 equemene
    Step=MyFloat(1./32)
614 161 equemene
    # Method of integration
615 161 equemene
    Method='ImplicitEuler'
616 161 equemene
    # InitialRandom
617 161 equemene
    InitialRandom=False
618 161 equemene
    # RNG Marsaglia Method
619 161 equemene
    RNG='MWC'
620 161 equemene
    # Viriel Distribution of stress
621 161 equemene
    VirielStress=True
622 164 equemene
    # Verbose
623 164 equemene
    Verbose=False
624 169 equemene
    # OpenGL real time rendering
625 167 equemene
    OpenGL=False
626 169 equemene
    # Speed rendering
627 169 equemene
    SpeedRendering=False
628 168 equemene
    # Shape to distribute
629 168 equemene
    Shape='Box'
630 161 equemene
631 169 equemene
    HowToUse='%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -d <DeviceId> -p [SpeedRendering] -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
632 161 equemene
633 161 equemene
    try:
634 169 equemene
        opts, args = getopt.getopt(sys.argv[1:],"rpgehd:n:i:z:v:s:m:t:b:",["random","rendering","opengl","viriel","device=","number=","iterations=","size=","velocity=","step=","method=","valuetype=","shape="])
635 161 equemene
    except getopt.GetoptError:
636 161 equemene
        print(HowToUse % sys.argv[0])
637 161 equemene
        sys.exit(2)
638 161 equemene
639 161 equemene
    for opt, arg in opts:
640 161 equemene
        if opt == '-h':
641 161 equemene
            print(HowToUse % sys.argv[0])
642 161 equemene
643 161 equemene
            print("\nInformations about devices detected under OpenCL:")
644 161 equemene
            try:
645 161 equemene
                Id=0
646 161 equemene
                for platform in cl.get_platforms():
647 161 equemene
                    for device in platform.get_devices():
648 168 equemene
                        # Failed now because of POCL implementation
649 161 equemene
                        #deviceType=cl.device_type.to_string(device.type)
650 161 equemene
                        deviceType="xPU"
651 161 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
652 161 equemene
                        Id=Id+1
653 161 equemene
                sys.exit()
654 161 equemene
            except ImportError:
655 161 equemene
                print("Your platform does not seem to support OpenCL")
656 161 equemene
                sys.exit()
657 161 equemene
658 161 equemene
        elif opt in ("-t", "--valuetype"):
659 161 equemene
            if arg=='FP64':
660 161 equemene
                class MyFloat(np.float64): pass
661 161 equemene
            else:
662 161 equemene
                class MyFloat(np.float32):pass
663 161 equemene
            ValueType = arg
664 161 equemene
        elif opt in ("-d", "--device"):
665 161 equemene
            Device=int(arg)
666 161 equemene
        elif opt in ("-m", "--method"):
667 161 equemene
            Method=arg
668 168 equemene
        elif opt in ("-b", "--shape"):
669 168 equemene
            Shape=arg
670 161 equemene
        elif opt in ("-n", "--number"):
671 161 equemene
            Number=int(arg)
672 168 equemene
        elif opt in ("-i", "--iterations"):
673 168 equemene
            Iterations=int(arg)
674 161 equemene
        elif opt in ("-z", "--size"):
675 168 equemene
            SizeOfShape=MyFloat(arg)
676 161 equemene
        elif opt in ("-v", "--velocity"):
677 161 equemene
            Velocity=MyFloat(arg)
678 161 equemene
            VirielStress=False
679 161 equemene
        elif opt in ("-s", "--step"):
680 161 equemene
            Step=MyFloat(arg)
681 161 equemene
        elif opt in ("-r", "--random"):
682 161 equemene
            InitialRandom=True
683 161 equemene
        elif opt in ("-c", "--check"):
684 161 equemene
            CheckEnergies=True
685 161 equemene
        elif opt in ("-e", "--viriel"):
686 161 equemene
            VirielStress=True
687 167 equemene
        elif opt in ("-g", "--opengl"):
688 167 equemene
            OpenGL=True
689 169 equemene
        elif opt in ("-p", "--rendering"):
690 169 equemene
            SpeedRendering=True
691 161 equemene
692 168 equemene
    SizeOfShape=MyFloat(SizeOfShape*Number)
693 161 equemene
    Velocity=MyFloat(Velocity)
694 161 equemene
    Step=MyFloat(Step)
695 161 equemene
696 161 equemene
    print("Device choosed : %s" % Device)
697 161 equemene
    print("Number of particules : %s" % Number)
698 168 equemene
    print("Size of Shape : %s" % SizeOfShape)
699 161 equemene
    print("Initial velocity : %s" % Velocity)
700 161 equemene
    print("Step of iteration : %s" % Step)
701 168 equemene
    print("Number of iterations : %s" % Iterations)
702 161 equemene
    print("Method of resolution : %s" % Method)
703 161 equemene
    print("Initial Random for RNG Seed : %s" % InitialRandom)
704 161 equemene
    print("ValueType is : %s" % ValueType)
705 168 equemene
    print("Viriel distribution of stress : %s" % VirielStress)
706 169 equemene
    print("OpenGL real time rendering : %s" % OpenGL)
707 169 equemene
    print("Speed rendering : %s" % SpeedRendering)
708 161 equemene
709 161 equemene
    # Create Numpy array of CL vector with 8 FP32
710 161 equemene
    MyCoM = np.zeros(4,dtype=MyFloat)
711 161 equemene
    MyDataX = np.zeros(Number*4, dtype=MyFloat)
712 161 equemene
    MyDataV = np.zeros(Number*4, dtype=MyFloat)
713 161 equemene
    MyPotential = np.zeros(Number, dtype=MyFloat)
714 161 equemene
    MyKinetic = np.zeros(Number, dtype=MyFloat)
715 161 equemene
716 161 equemene
    Marsaglia,Computing=DictionariesAPI()
717 161 equemene
718 161 equemene
    # Scan the OpenCL arrays
719 161 equemene
    Id=0
720 161 equemene
    HasXPU=False
721 161 equemene
    for platform in cl.get_platforms():
722 161 equemene
        for device in platform.get_devices():
723 161 equemene
            if Id==Device:
724 161 equemene
                PlatForm=platform
725 161 equemene
                XPU=device
726 161 equemene
                print("CPU/GPU selected: ",device.name.lstrip())
727 161 equemene
                print("Platform selected: ",platform.name)
728 161 equemene
                HasXPU=True
729 161 equemene
            Id+=1
730 161 equemene
731 161 equemene
    if HasXPU==False:
732 161 equemene
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
733 161 equemene
        sys.exit()
734 161 equemene
735 161 equemene
    # Create Context
736 161 equemene
    try:
737 161 equemene
        ctx = cl.Context([XPU])
738 161 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
739 161 equemene
    except:
740 161 equemene
        print("Crash during context creation")
741 161 equemene
742 161 equemene
    # Build all routines used for the computing
743 166 equemene
744 166 equemene
    #BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
745 166 equemene
    BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
746 166 equemene
747 162 equemene
    if 'Intel' in PlatForm.name or 'Experimental' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name :
748 161 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
749 161 equemene
    else:
750 161 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions+" -cl-strict-aliasing")
751 166 equemene
752 161 equemene
    mf = cl.mem_flags
753 162 equemene
    # Read/Write approach for buffering
754 168 equemene
    clDataX = cl.Buffer(ctx, mf.READ_WRITE, MyDataX.nbytes)
755 168 equemene
    clDataV = cl.Buffer(ctx, mf.READ_WRITE, MyDataV.nbytes)
756 168 equemene
    clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
757 168 equemene
    clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
758 168 equemene
    clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
759 166 equemene
760 166 equemene
    # Write/HostPointer approach for buffering
761 168 equemene
    # clDataX = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataX)
762 168 equemene
    # clDataV = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataV)
763 168 equemene
    # clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential)
764 168 equemene
    # clKinetic = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyKinetic)
765 168 equemene
    # clCoM = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyCoM)
766 161 equemene
767 161 equemene
    print('All particles superimposed.')
768 161 equemene
769 161 equemene
    # Set particles to RNG points
770 161 equemene
    if InitialRandom:
771 168 equemene
        seed_w=np.uint32(nprnd(2**32))
772 168 equemene
        seed_z=np.uint32(nprnd(2**32))
773 161 equemene
    else:
774 168 equemene
        seed_w=np.uint32(19710211)
775 168 equemene
        seed_z=np.uint32(20081010)
776 168 equemene
777 168 equemene
    if Shape=='Ball':
778 168 equemene
        MyRoutines.InBallSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
779 168 equemene
    else:
780 168 equemene
        MyRoutines.InBoxSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
781 161 equemene
782 161 equemene
    print('All particules distributed')
783 161 equemene
784 161 equemene
    CLLaunch=MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
785 161 equemene
    CLLaunch.wait()
786 161 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
787 168 equemene
    print('Center Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
788 161 equemene
789 161 equemene
    if VirielStress:
790 161 equemene
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,MyFloat(0.),np.uint32(110271),np.uint32(250173))
791 161 equemene
    else:
792 161 equemene
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,Velocity,np.uint32(110271),np.uint32(250173))
793 161 equemene
    CLLaunch.wait()
794 161 equemene
795 168 equemene
    print('All particules stressed')
796 168 equemene
797 161 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
798 161 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
799 161 equemene
    CLLaunch.wait()
800 161 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
801 161 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
802 168 equemene
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
803 169 equemene
804 169 equemene
    if SpeedRendering:
805 169 equemene
        SizeOfBox=max(2*MyKinetic)
806 169 equemene
    else:
807 169 equemene
        SizeOfBox=SizeOfShape
808 169 equemene
809 162 equemene
    wall_time_start=time.time()
810 162 equemene
811 164 equemene
    Durations=np.array([],dtype=MyFloat)
812 169 equemene
    print('Starting!')
813 169 equemene
    if OpenGL:
814 170 equemene
        global ViewRX,ViewRY,ViewRZ
815 169 equemene
        Iterations=0
816 169 equemene
        ViewRX,ViewRY,ViewRZ = 0.,0.,0.
817 169 equemene
        # Launch OpenGL Loop
818 169 equemene
        glutInit(sys.argv)
819 169 equemene
        glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB)
820 169 equemene
        glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE,GLUT_ACTION_CONTINUE_EXECUTION)
821 169 equemene
        glutInitWindowSize(512,512)
822 169 equemene
        glutCreateWindow(b'NBodyGL')
823 169 equemene
        setup_viewport()
824 169 equemene
        glutReshapeFunc(reshape)
825 169 equemene
        glutDisplayFunc(display)
826 169 equemene
        glutIdleFunc(display)
827 169 equemene
        #   glutMouseFunc(mouse)
828 169 equemene
        glutSpecialFunc(special)
829 169 equemene
        glutKeyboardFunc(keyboard)
830 169 equemene
        glutMainLoop()
831 169 equemene
    else:
832 169 equemene
        for iteration in range(Iterations):
833 169 equemene
            Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
834 169 equemene
            if Verbose:
835 169 equemene
                print("Duration of #%s iteration: %s" % (Iterations,Elapsed))
836 169 equemene
            else:
837 169 equemene
                sys.stdout.write('.')
838 169 equemene
                sys.stdout.flush()
839 169 equemene
            Durations=np.append(Durations,Elapsed)
840 169 equemene
841 169 equemene
    print('\nEnding!')
842 169 equemene
843 161 equemene
    MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
844 161 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
845 161 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
846 161 equemene
    CLLaunch.wait()
847 161 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
848 161 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
849 161 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
850 168 equemene
    print('\nCenter Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
851 168 equemene
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
852 164 equemene
853 168 equemene
    print("Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n" % (Device,Iterations,np.mean(Durations),np.median(Durations),np.std(Durations),np.min(Durations),np.max(Durations),np.std(Durations)/np.median(Durations)))
854 167 equemene
855 167 equemene
    # Contraction of Square*Size*Hertz: Size*Size/Elapsed
856 164 equemene
    Squertz=np.ones(len(Durations))
857 164 equemene
    Squertz*=Number*Number
858 164 equemene
    Squertz/=Durations
859 164 equemene
860 167 equemene
    print("Squertz in log10 stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" % (Device,Iterations,np.log10(np.mean(Squertz)),np.log10(np.median(Squertz)),np.log10(np.std(Squertz)),np.log10(np.min(Squertz)),np.log10(np.max(Squertz))))
861 164 equemene
862 161 equemene
    clDataX.release()
863 161 equemene
    clDataV.release()
864 161 equemene
    clKinetic.release()
865 161 equemene
    clPotential.release()