Statistiques
| Révision :

root / NBody / NBody.py @ 178

Historique | Voir | Annoter | Télécharger (28,97 ko)

1 128 equemene
#!/usr/bin/env python3
2 136 equemene
# -*- coding: utf-8 -*-
3 116 equemene
"""
4 171 equemene
NBody Demonstrator implemented in OpenCL, rendering OpenGL
5 116 equemene

6 174 equemene
CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
7 174 equemene
Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8 174 equemene

9 174 equemene
Thanks to Andreas Klockner for PyOpenCL:
10 174 equemene
http://mathema.tician.de/software/pyopencl
11 174 equemene

12 116 equemene
"""
13 116 equemene
import getopt
14 116 equemene
import sys
15 116 equemene
import time
16 116 equemene
import numpy as np
17 116 equemene
import pyopencl as cl
18 116 equemene
import pyopencl.array as cl_array
19 116 equemene
from numpy.random import randint as nprnd
20 170 equemene
import string, sys
21 170 equemene
from OpenGL.GL import *
22 170 equemene
from OpenGL.GLUT import *
23 116 equemene
24 132 equemene
def DictionariesAPI():
25 132 equemene
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
26 132 equemene
    Computing={'FP32':0,'FP64':1}
27 170 equemene
    Interaction={'Force':0,'Potential':1}
28 175 equemene
    Artevasion={'None':0,'NegExp':1,'CorRad':2}
29 171 equemene
    return(Marsaglia,Computing,Interaction,Artevasion)
30 132 equemene
31 142 equemene
BlobOpenCL= """
32 132 equemene
#define TFP32 0
33 132 equemene
#define TFP64 1
34 132 equemene

35 170 equemene
#define TFORCE 0
36 170 equemene
#define TPOTENTIAL 1
37 116 equemene

38 171 equemene
#define NONE 0
39 171 equemene
#define NEGEXP 1
40 175 equemene
#define CORRAD 2
41 171 equemene

42 132 equemene
#if TYPE == TFP32
43 132 equemene
#define MYFLOAT4 float4
44 132 equemene
#define MYFLOAT8 float8
45 132 equemene
#define MYFLOAT float
46 151 equemene
#define DISTANCE fast_distance
47 132 equemene
#else
48 132 equemene
#define MYFLOAT4 double4
49 132 equemene
#define MYFLOAT8 double8
50 132 equemene
#define MYFLOAT double
51 151 equemene
#define DISTANCE distance
52 170 equemene
#if defined(cl_khr_fp64)  // Khronos extension available?
53 170 equemene
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
54 132 equemene
#endif
55 170 equemene
#endif
56 132 equemene

57 170 equemene
#define znew  ((zmwc=36969*(zmwc&65535)+(zmwc>>16))<<16)
58 170 equemene
#define wnew  ((wmwc=18000*(wmwc&65535)+(wmwc>>16))&65535)
59 170 equemene
#define MWC   (znew+wnew)
60 170 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
61 170 equemene
#define CONG  (jcong=69069*jcong+1234567)
62 170 equemene
#define KISS  ((MWC^CONG)+SHR3)
63 170 equemene

64 160 equemene
#define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f)
65 160 equemene
#define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f)
66 160 equemene
#define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f)
67 160 equemene
#define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f)
68 160 equemene

69 171 equemene
#define PI (MYFLOAT)3.141592653589793238e0f
70 160 equemene

71 170 equemene
#define SMALL_NUM (MYFLOAT)1.e-9f
72 160 equemene

73 175 equemene
#define CoreRadius (MYFLOAT)(1.e0f)
74 175 equemene

75 170 equemene
// Create my own Distance implementation: distance buggy on Oland AMD chipset
76 116 equemene

77 170 equemene
MYFLOAT MyDistance(MYFLOAT4 n,MYFLOAT4 m)
78 141 equemene
{
79 170 equemene
    private MYFLOAT x2,y2,z2;
80 170 equemene
    x2=n.s0-m.s0;
81 170 equemene
    x2*=x2;
82 170 equemene
    y2=n.s1-m.s1;
83 170 equemene
    y2*=y2;
84 170 equemene
    z2=n.s2-m.s2;
85 170 equemene
    z2*=z2;
86 170 equemene
    return(sqrt(x2+y2+z2));
87 141 equemene
}
88 141 equemene

89 171 equemene
// Potential between 2 m,n bodies
90 133 equemene
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
91 171 equemene
#if ARTEVASION == NEGEXP
92 171 equemene
// Add exp(-r) to numerator to avoid divergence for low distances
93 133 equemene
{
94 171 equemene
    MYFLOAT r=DISTANCE(n,m);
95 171 equemene
    return((-1.e0f+exp(-r))/r);
96 171 equemene
}
97 175 equemene
#elif ARTEVASION == CORRAD
98 175 equemene
// Add Core Radius to avoid divergence for low distances
99 175 equemene
{
100 175 equemene
    MYFLOAT r=DISTANCE(n,m);
101 175 equemene
    return(-1.e0f/sqrt(r*r+CoreRadius*CoreRadius));
102 175 equemene
}
103 171 equemene
#else
104 171 equemene
// Classical potential in 1/r
105 171 equemene
{
106 170 equemene
//    return((MYFLOAT)(-1.e0f)/(MyDistance(m,n)));
107 151 equemene
    return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
108 133 equemene
}
109 171 equemene
#endif
110 133 equemene

111 176 equemene
// Interaction based of Force as gradient of Potential
112 170 equemene
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
113 170 equemene
#if INTERACTION == TFORCE
114 175 equemene
#if ARTEVASION == NEGEXP
115 175 equemene
// Force gradient of potential, set as (1-exp(-r))/r
116 175 equemene
{
117 175 equemene
    private MYFLOAT r=MyDistance(n,m);
118 175 equemene
    private MYFLOAT num=1.e0f+exp(-r)*(r-1.e0f);
119 175 equemene
    return((n-m)*num/(MYFLOAT)(r*r*r));
120 175 equemene
}
121 175 equemene
#elif ARTEVASION == CORRAD
122 175 equemene
// Force gradient of potential, (Core Radius) set as 1/sqrt(r**2+CoreRadius**2)
123 175 equemene
{
124 175 equemene
    private MYFLOAT r=MyDistance(n,m);
125 175 equemene
    private MYFLOAT den=sqrt(r*r+CoreRadius*CoreRadius);
126 175 equemene
    return((n-m)/(MYFLOAT)(den*den*den));
127 175 equemene
}
128 175 equemene
#else
129 170 equemene
// Simplest implementation of force (equals to acceleration)
130 170 equemene
// seems to bo bad (numerous artevasions)
131 170 equemene
// MYFLOAT4 InteractionForce(MYFLOAT4 m,MYFLOAT4 n)
132 170 equemene
{
133 170 equemene
    private MYFLOAT r=MyDistance(n,m);
134 170 equemene
    return((n-m)/(MYFLOAT)(r*r*r));
135 170 equemene
}
136 175 equemene
#endif
137 170 equemene
#else
138 170 equemene
// Force definited as gradient of potential
139 170 equemene
// Estimate potential and proximate potential to estimate force
140 170 equemene
{
141 171 equemene
    // 1/1024 seems to be a good factor: larger one provides bad results
142 171 equemene
    private MYFLOAT epsilon=(MYFLOAT)(1.e0f/1024);
143 170 equemene
    private MYFLOAT4 er=normalize(n-m);
144 170 equemene
    private MYFLOAT4 dr=er*(MYFLOAT)epsilon;
145 170 equemene

146 171 equemene
    return(er/epsilon*(PairPotential(m,n)-PairPotential(m+dr,n)));
147 170 equemene
}
148 170 equemene
#endif
149 170 equemene

150 160 equemene
MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
151 139 equemene
{
152 160 equemene
    private MYFLOAT potential=(MYFLOAT)0.e0f;
153 160 equemene
    private MYFLOAT4 x=clDataX[gid];
154 139 equemene

155 139 equemene
    for (int i=0;i<get_global_size(0);i++)
156 139 equemene
    {
157 139 equemene
        if (gid != i)
158 160 equemene
        potential+=PairPotential(x,clDataX[i]);
159 139 equemene
    }
160 133 equemene

161 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
162 141 equemene
    return(potential);
163 139 equemene
}
164 139 equemene

165 160 equemene
MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid)
166 139 equemene
{
167 160 equemene
    return(PairPotential(clDataX[gid],clCoM[0]));
168 139 equemene
}
169 139 equemene

170 170 equemene
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
171 170 equemene

172 160 equemene
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
173 116 equemene
{
174 160 equemene
    private MYFLOAT4 a0,v0,x0,a1,v1,x1,a2,v2,x2,a3,v3,x3,a4,v4,x4,xf,vf;
175 170 equemene
    MYFLOAT4 DT=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
176 160 equemene

177 160 equemene
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
178 160 equemene
    v0=(MYFLOAT4)clDataInV[gid];
179 160 equemene
    x0=(MYFLOAT4)clDataInX[gid];
180 133 equemene
    int N = get_global_size(0);
181 133 equemene

182 170 equemene
    for (private int i=0;i<N;i++)
183 121 equemene
    {
184 121 equemene
        if (gid != i)
185 160 equemene
        a0+=Interaction(x0,clDataInX[i]);
186 121 equemene
    }
187 121 equemene

188 160 equemene
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
189 170 equemene
    v1=a0*dt+v0;
190 170 equemene
    x1=v0*dt+x0;
191 170 equemene
    for (private int j=0;j<N;j++)
192 121 equemene
    {
193 170 equemene
        if (gid != j)
194 170 equemene
        a1+=Interaction(x1,clDataInX[j]);
195 121 equemene
    }
196 121 equemene

197 160 equemene
    a2=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
198 170 equemene
    v2=a1*(MYFLOAT)(dt/2.e0f)+v0;
199 170 equemene
    x2=v1*(MYFLOAT)(dt/2.e0f)+x0;
200 170 equemene
    for (private int k=0;k<N;k++)
201 121 equemene
    {
202 170 equemene
        if (gid != k)
203 170 equemene
        a2+=Interaction(x2,clDataInX[k]);
204 121 equemene
    }
205 121 equemene

206 160 equemene
    a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
207 170 equemene
    v3=a2*(MYFLOAT)(dt/2.e0f)+v0;
208 170 equemene
    x3=v2*(MYFLOAT)(dt/2.e0f)+x0;
209 170 equemene
    for (private int l=0;l<N;l++)
210 121 equemene
    {
211 170 equemene
        if (gid != l)
212 170 equemene
        a3+=Interaction(x3,clDataInX[l]);
213 121 equemene
    }
214 121 equemene

215 160 equemene
    a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
216 170 equemene
    v4=a3*dt+v0;
217 170 equemene
    x4=v3*dt+x0;
218 170 equemene
    for (private int m=0;m<N;m++)
219 141 equemene
    {
220 170 equemene
        if (gid != m)
221 170 equemene
        a4+=Interaction(x4,clDataInX[m]);
222 141 equemene
    }
223 141 equemene

224 160 equemene
    xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
225 160 equemene
    vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
226 121 equemene

227 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
228 121 equemene
}
229 121 equemene

230 160 equemene
MYFLOAT8 AtomicHeun(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
231 121 equemene
{
232 170 equemene
    private MYFLOAT4 x0,v0,a0,x1,v1,a1,xf,vf;
233 170 equemene
    MYFLOAT4 Dt=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
234 116 equemene

235 170 equemene
    x0=(MYFLOAT4)clDataInX[gid];
236 170 equemene
    v0=(MYFLOAT4)clDataInV[gid];
237 170 equemene
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
238 141 equemene

239 170 equemene
    for (private int i=0;i<get_global_size(0);i++)
240 116 equemene
    {
241 116 equemene
        if (gid != i)
242 170 equemene
        a0+=Interaction(x0,clDataInX[i]);
243 116 equemene
    }
244 141 equemene

245 170 equemene
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
246 170 equemene
    //v1=v0+dt*a0;
247 170 equemene
    //x1=x0+dt*v0;
248 170 equemene
    v1=dt*a0+v0;
249 170 equemene
    x1=dt*v0+x0;
250 141 equemene

251 170 equemene
    for (private int j=0;j<get_global_size(0);j++)
252 116 equemene
    {
253 170 equemene
        if (gid != j)
254 170 equemene
        a1+=Interaction(x1,clDataInX[j]);
255 116 equemene
    }
256 118 equemene

257 170 equemene
    vf=v0+dt*(a0+a1)/(MYFLOAT)2.e0f;
258 170 equemene
    xf=x0+dt*(v0+v1)/(MYFLOAT)2.e0f;
259 170 equemene

260 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
261 119 equemene
}
262 119 equemene

263 160 equemene
MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
264 119 equemene
{
265 170 equemene
    MYFLOAT4 x0,v0,a,xf,vf;
266 119 equemene

267 170 equemene
    x0=(MYFLOAT4)clDataInX[gid];
268 170 equemene
    v0=(MYFLOAT4)clDataInV[gid];
269 151 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
270 160 equemene

271 170 equemene
    for (private int i=0;i<get_global_size(0);i++)
272 119 equemene
    {
273 119 equemene
        if (gid != i)
274 170 equemene
          a+=Interaction(x0,clDataInX[i]);
275 119 equemene
    }
276 133 equemene

277 170 equemene
    vf=v0+dt*a;
278 170 equemene
    xf=x0+dt*vf;
279 170 equemene

280 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
281 116 equemene
}
282 116 equemene

283 160 equemene
MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
284 140 equemene
{
285 170 equemene
    MYFLOAT4 x0,v0,a,xf,vf;
286 140 equemene

287 170 equemene
    x0=(MYFLOAT4)clDataInX[gid];
288 170 equemene
    v0=(MYFLOAT4)clDataInV[gid];
289 151 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
290 160 equemene

291 170 equemene
    for (private int i=0;i<get_global_size(0);i++)
292 140 equemene
    {
293 140 equemene
        if (gid != i)
294 170 equemene
        a+=Interaction(x0,clDataInX[i]);
295 140 equemene
    }
296 140 equemene

297 170 equemene
    vf=v0+dt*a;
298 170 equemene
    xf=x0+dt*v0;
299 140 equemene

300 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
301 140 equemene
}
302 140 equemene

303 170 equemene
__kernel void InBallSplutterPoints(__global MYFLOAT4* clDataX,
304 170 equemene
                                   MYFLOAT diameter,uint seed_z,uint seed_w)
305 170 equemene
{
306 170 equemene
    private int gid=get_global_id(0);
307 170 equemene
    private uint zmwc=seed_z+gid;
308 170 equemene
    private uint wmwc=seed_w+(gid+1)%2;
309 170 equemene
    private MYFLOAT Heat;
310 170 equemene

311 170 equemene
    for (int i=0;i<gid;i++)
312 170 equemene
    {
313 170 equemene
        Heat=MWCfp;
314 170 equemene
    }
315 170 equemene

316 170 equemene
// More accurate distribution based on spherical coordonates
317 170 equemene
// Disactivated because of AMD Oland GPU crash on launch
318 170 equemene
//     private MYFLOAT Radius,Theta,Phi,PosX,PosY,PosZ,SinTheta;
319 170 equemene
//     Radius=MWCfp*diameter/2.e0f;
320 170 equemene
//     Theta=(MYFLOAT)acos((float)(-2.e0f*MWCfp+1.0e0f));
321 170 equemene
//     Phi=(MYFLOAT)(2.e0f*PI*MWCfp);
322 170 equemene
//     SinTheta=sin((float)Theta);
323 170 equemene
//     PosX=cos((float)Phi)*Radius*SinTheta;
324 170 equemene
//     PosY=sin((float)Phi)*Radius*SinTheta;
325 170 equemene
//     PosZ=cos((float)Theta)*Radius;
326 170 equemene
//     clDataX[gid]=(MYFLOAT4)(PosX,PosY,PosZ,0.e0f);
327 170 equemene

328 170 equemene
    private MYFLOAT Radius=diameter/2.e0f;
329 170 equemene
    private MYFLOAT Length=diameter;
330 170 equemene
    private MYFLOAT4 Position;
331 170 equemene
    while (Length>Radius) {
332 170 equemene
       Position=(MYFLOAT4)((MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,0.e0f);
333 170 equemene
       Length=(MYFLOAT)length((MYFLOAT4)Position);
334 170 equemene
    }
335 170 equemene

336 170 equemene
    clDataX[gid]=Position;
337 170 equemene

338 170 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
339 170 equemene
}
340 170 equemene

341 170 equemene
__kernel void InBoxSplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box,
342 139 equemene
                             uint seed_z,uint seed_w)
343 116 equemene
{
344 170 equemene
    int gid=get_global_id(0);
345 170 equemene
    uint zmwc=seed_z+gid;
346 170 equemene
    uint wmwc=seed_w-gid;
347 170 equemene
    private MYFLOAT Heat;
348 170 equemene

349 170 equemene
    for (int i=0;i<gid;i++)
350 170 equemene
    {
351 170 equemene
        Heat=MWCfp;
352 170 equemene
    }
353 137 equemene

354 170 equemene
    clDataX[gid]=(MYFLOAT4)((MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,0.e0f);
355 170 equemene

356 170 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
357 116 equemene
}
358 116 equemene

359 160 equemene
__kernel void SplutterStress(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
360 139 equemene
{
361 139 equemene
    int gid = get_global_id(0);
362 142 equemene
    MYFLOAT N = (MYFLOAT)get_global_size(0);
363 170 equemene
    uint zmwc=seed_z+(uint)gid;
364 170 equemene
    uint wmwc=seed_w-(uint)gid;
365 178 equemene
    MYFLOAT4 CrossVector,SpeedVector,FromCoM;
366 178 equemene
    MYFLOAT Heat,ThetaA,PhiA,ThetaB,PhiB,Length,tA,tB,Polar;
367 139 equemene

368 178 equemene
    for (int i=0;i<gid;i++)
369 178 equemene
    {
370 178 equemene
        Heat=MWCfp;
371 178 equemene
    }
372 178 equemene

373 178 equemene
    // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
374 178 equemene
    // Implemention on AMD Oland are probably broken in float
375 178 equemene

376 178 equemene
    FromCoM=(MYFLOAT4)(clDataX[gid]-clCoM[0]);
377 178 equemene
    Length=length(FromCoM);
378 178 equemene
    //Theta=acos(FromCoM.z/Length);
379 178 equemene
    //Phi=atan(FromCoM.y/FromCoM.x);
380 178 equemene
    // First tangential vector to sphere of length radius
381 178 equemene
    ThetaA=acos(FromCoM.x/Length)+5.e-1f*PI;
382 178 equemene
    PhiA=atan(FromCoM.y/FromCoM.z);
383 178 equemene
    // Second tangential vector to sphere of length radius
384 178 equemene
    ThetaB=acos(FromCoM.x/Length);
385 178 equemene
    PhiB=atan(FromCoM.y/FromCoM.z)+5.e-1f*PI;
386 178 equemene
    // (x,y) random coordonates to plane tangential to sphere
387 178 equemene
    Polar=MWCfp*2.e0f*PI;
388 178 equemene
    tA=cos(Polar);
389 178 equemene
    tB=sin(Polar);
390 178 equemene
    //tA=MWCfp*2.e0f-1.e0f;
391 178 equemene
    //tB=MWCfp*2.e0f-1.e0f;
392 178 equemene

393 178 equemene
    // Exception for 2 particules to ovoid shifting
394 171 equemene
    if (get_global_size(0)==2) {
395 171 equemene
       CrossVector=(MYFLOAT4)(1.e0f,1.e0f,1.e0f,0.e0f);
396 171 equemene
    } else {
397 178 equemene
       CrossVector.s0=tA*cos(ThetaA)+tB*cos(ThetaB);
398 178 equemene
       CrossVector.s1=tA*sin(ThetaA)*sin(PhiA)+tB*sin(ThetaB)*sin(PhiB);
399 178 equemene
       CrossVector.s2=tA*sin(ThetaA)*cos(PhiA)+tB*sin(ThetaB)*cos(PhiB);
400 178 equemene
       CrossVector.s3=0.e0f;
401 171 equemene
    }
402 171 equemene

403 139 equemene
    if (velocity<SMALL_NUM) {
404 178 equemene
       SpeedVector=(MYFLOAT4)normalize(cross(FromCoM,CrossVector))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
405 139 equemene
    }
406 139 equemene
    else
407 178 equemene
    {
408 170 equemene

409 176 equemene
       SpeedVector=(MYFLOAT4)((MWCfp-5e-1f)*velocity,(MWCfp-5e-1f)*velocity,
410 176 equemene
                              (MWCfp-5e-1f)*velocity,0.e0f);
411 139 equemene
    }
412 170 equemene
    clDataV[gid]=SpeedVector;
413 170 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
414 139 equemene
}
415 139 equemene

416 160 equemene
__kernel void RungeKutta(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
417 116 equemene
{
418 170 equemene
    private int gid = get_global_id(0);
419 170 equemene
    private MYFLOAT8 clDataGid;
420 116 equemene

421 170 equemene
    clDataGid=AtomicRungeKutta(clDataX,clDataV,gid,h);
422 116 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
423 170 equemene
    clDataX[gid]=clDataGid.s0123;
424 170 equemene
    clDataV[gid]=clDataGid.s4567;
425 116 equemene
}
426 116 equemene

427 170 equemene
__kernel void Heun(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
428 116 equemene
{
429 170 equemene
    private int gid = get_global_id(0);
430 170 equemene
    private MYFLOAT8 clDataGid;
431 116 equemene

432 170 equemene
    clDataGid=AtomicHeun(clDataX,clDataV,gid,h);
433 116 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
434 170 equemene
    clDataX[gid]=clDataGid.s0123;
435 170 equemene
    clDataV[gid]=clDataGid.s4567;
436 116 equemene
}
437 133 equemene

438 170 equemene
__kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
439 141 equemene
{
440 170 equemene
    private int gid = get_global_id(0);
441 170 equemene
    private MYFLOAT8 clDataGid;
442 141 equemene

443 170 equemene
    clDataGid=AtomicImplicitEuler(clDataX,clDataV,gid,h);
444 141 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
445 170 equemene
    clDataX[gid]=clDataGid.s0123;
446 170 equemene
    clDataV[gid]=clDataGid.s4567;
447 141 equemene
}
448 141 equemene

449 160 equemene
__kernel void ExplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
450 140 equemene
{
451 170 equemene
    private int gid = get_global_id(0);
452 170 equemene
    private MYFLOAT8 clDataGid;
453 170 equemene

454 170 equemene
    clDataGid=AtomicExplicitEuler(clDataX,clDataV,gid,h);
455 140 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
456 170 equemene
    clDataX[gid]=clDataGid.s0123;
457 170 equemene
    clDataV[gid]=clDataGid.s4567;
458 140 equemene
}
459 139 equemene

460 160 equemene
__kernel void CoMPotential(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
461 133 equemene
{
462 133 equemene
    int gid = get_global_id(0);
463 133 equemene

464 160 equemene
    clPotential[gid]=PairPotential(clDataX[gid],clCoM[0]);
465 139 equemene
}
466 139 equemene

467 160 equemene
__kernel void Potential(__global MYFLOAT4* clDataX,__global MYFLOAT* clPotential)
468 139 equemene
{
469 139 equemene
    int gid = get_global_id(0);
470 139 equemene

471 155 equemene
    MYFLOAT potential=(MYFLOAT)0.e0f;
472 160 equemene
    MYFLOAT4 x=clDataX[gid];
473 133 equemene

474 133 equemene
    for (int i=0;i<get_global_size(0);i++)
475 133 equemene
    {
476 133 equemene
        if (gid != i)
477 160 equemene
        potential+=PairPotential(x,clDataX[i]);
478 133 equemene
    }
479 133 equemene

480 133 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
481 155 equemene
    clPotential[gid]=potential*(MYFLOAT)5.e-1f;
482 133 equemene
}
483 133 equemene

484 160 equemene
__kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int Size)
485 139 equemene
{
486 160 equemene
    MYFLOAT4 CoM=clDataX[0];
487 142 equemene

488 139 equemene
    for (int i=1;i<Size;i++)
489 139 equemene
    {
490 160 equemene
        CoM+=clDataX[i];
491 139 equemene
    }
492 142 equemene

493 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
494 170 equemene
    clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,0.e0f)/(MYFLOAT)Size;
495 139 equemene
}
496 139 equemene

497 160 equemene
__kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
498 133 equemene
{
499 133 equemene
    int gid = get_global_id(0);
500 133 equemene

501 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
502 160 equemene
    MYFLOAT d=(MYFLOAT)length(clDataV[gid]);
503 155 equemene
    clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)(d*d);
504 133 equemene
}
505 170 equemene

506 116 equemene
"""
507 116 equemene
508 170 equemene
def MainOpenCL(clDataX,clDataV,Step,Method):
509 170 equemene
    time_start=time.time()
510 170 equemene
    if Method=="RungeKutta":
511 170 equemene
        CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clDataX,clDataV,Step)
512 170 equemene
    elif Method=="ExplicitEuler":
513 170 equemene
        CLLaunch=MyRoutines.ExplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
514 170 equemene
    elif Method=="Heun":
515 170 equemene
        CLLaunch=MyRoutines.Heun(queue,(Number,1),None,clDataX,clDataV,Step)
516 170 equemene
    else:
517 170 equemene
        CLLaunch=MyRoutines.ImplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
518 170 equemene
    CLLaunch.wait()
519 170 equemene
    Elapsed=time.time()-time_start
520 170 equemene
    return(Elapsed)
521 170 equemene
522 170 equemene
def display(*args):
523 133 equemene
524 170 equemene
    global MyDataX,MyDataV,clDataX,clDataV,Step,Method,Number,Iterations,Durations,Verbose,SpeedRendering
525 170 equemene
526 170 equemene
    glClearColor(0.0, 0.0, 0.0, 0.0)
527 170 equemene
    glClear(GL_COLOR_BUFFER_BIT)
528 170 equemene
    glColor3f(1.0,1.0,1.0)
529 170 equemene
530 170 equemene
    Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
531 170 equemene
    if SpeedRendering:
532 170 equemene
        cl.enqueue_copy(queue, MyDataV, clDataV)
533 170 equemene
        MyDataV.reshape(Number,4)[:,3]=1
534 170 equemene
        glVertexPointerf(MyDataV.reshape(Number,4))
535 170 equemene
    else:
536 170 equemene
        cl.enqueue_copy(queue, MyDataX, clDataX)
537 170 equemene
        MyDataX.reshape(Number,4)[:,3]=1
538 170 equemene
        glVertexPointerf(MyDataX.reshape(Number,4))
539 170 equemene
540 170 equemene
    if Verbose:
541 170 equemene
        print("Positions for #%s iteration: %s" % (Iterations,MyDataX))
542 170 equemene
    else:
543 170 equemene
        sys.stdout.write('.')
544 170 equemene
        sys.stdout.flush()
545 170 equemene
    Durations=np.append(Durations,MainOpenCL(clDataX,clDataV,Step,Method))
546 170 equemene
    glEnableClientState(GL_VERTEX_ARRAY)
547 170 equemene
    glDrawArrays(GL_POINTS, 0, Number)
548 170 equemene
    glDisableClientState(GL_VERTEX_ARRAY)
549 170 equemene
    glFlush()
550 170 equemene
    Iterations+=1
551 170 equemene
    glutSwapBuffers()
552 170 equemene
553 170 equemene
def halt():
554 170 equemene
    pass
555 170 equemene
556 170 equemene
def keyboard(k,x,y):
557 174 equemene
    global ViewRZ,SpeedRendering
558 170 equemene
    LC_Z = as_8_bit( 'z' )
559 170 equemene
    UC_Z = as_8_bit( 'Z' )
560 170 equemene
    Plus = as_8_bit( '+' )
561 170 equemene
    Minus = as_8_bit( '-' )
562 170 equemene
    Switch = as_8_bit( 's' )
563 170 equemene
564 170 equemene
    Zoom=1
565 175 equemene
    if k == LC_Z:
566 170 equemene
        ViewRZ += 1.0
567 170 equemene
    elif k == UC_Z:
568 170 equemene
        ViewRZ -= 1.0
569 170 equemene
    elif k == Plus:
570 170 equemene
        Zoom *= 2.0
571 170 equemene
    elif k == Minus:
572 170 equemene
        Zoom /= 2.0
573 170 equemene
    elif k == Switch:
574 170 equemene
        if SpeedRendering:
575 170 equemene
            SpeedRendering=False
576 170 equemene
        else:
577 170 equemene
            SpeedRendering=True
578 170 equemene
    elif ord(k) == 27: # Escape
579 170 equemene
        glutLeaveMainLoop()
580 170 equemene
        return(False)
581 170 equemene
    else:
582 170 equemene
        return
583 170 equemene
    glRotatef(ViewRZ, 0.0, 0.0, 1.0)
584 170 equemene
    glScalef(Zoom,Zoom,Zoom)
585 170 equemene
    glutPostRedisplay()
586 170 equemene
587 170 equemene
def special(k,x,y):
588 174 equemene
    global ViewRX, ViewRY
589 170 equemene
590 178 equemene
    Step=1.
591 170 equemene
    if k == GLUT_KEY_UP:
592 178 equemene
        ViewRX += Step
593 170 equemene
    elif k == GLUT_KEY_DOWN:
594 178 equemene
        ViewRX -= Step
595 170 equemene
    elif k == GLUT_KEY_LEFT:
596 178 equemene
        ViewRY += Step
597 170 equemene
    elif k == GLUT_KEY_RIGHT:
598 178 equemene
        ViewRY -= Step
599 170 equemene
    else:
600 170 equemene
        return
601 170 equemene
    glRotatef(ViewRX, 1.0, 0.0, 0.0)
602 170 equemene
    glRotatef(ViewRY, 0.0, 1.0, 0.0)
603 170 equemene
    glutPostRedisplay()
604 170 equemene
605 170 equemene
def setup_viewport():
606 170 equemene
    global SizeOfBox
607 170 equemene
    glMatrixMode(GL_PROJECTION)
608 170 equemene
    glLoadIdentity()
609 170 equemene
    glOrtho(-SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox)
610 170 equemene
    glutPostRedisplay()
611 170 equemene
612 170 equemene
def reshape(w, h):
613 170 equemene
    glViewport(0, 0, w, h)
614 170 equemene
    setup_viewport()
615 170 equemene
616 116 equemene
if __name__=='__main__':
617 170 equemene
618 170 equemene
    global Number,Step,clDataX,clDataV,MyDataX,MyDataV,Method,SizeOfBox,Iterations,Verbose,Durations
619 116 equemene
620 132 equemene
    # ValueType
621 132 equemene
    ValueType='FP32'
622 132 equemene
    class MyFloat(np.float32):pass
623 160 equemene
    #    clType8=cl_array.vec.float8
624 116 equemene
    # Set defaults values
625 118 equemene
    np.set_printoptions(precision=2)
626 116 equemene
    # Id of Device : 1 is for first find !
627 160 equemene
    Device=0
628 170 equemene
    # Number of bodies is integer
629 160 equemene
    Number=2
630 170 equemene
    # Number of iterations (for standalone execution)
631 170 equemene
    Iterations=10
632 170 equemene
    # Size of shape
633 170 equemene
    SizeOfShape=MyFloat(1.)
634 116 equemene
    # Initial velocity of particules
635 132 equemene
    Velocity=MyFloat(1.)
636 116 equemene
    # Step
637 170 equemene
    Step=MyFloat(1./32)
638 121 equemene
    # Method of integration
639 150 equemene
    Method='ImplicitEuler'
640 132 equemene
    # InitialRandom
641 132 equemene
    InitialRandom=False
642 132 equemene
    # RNG Marsaglia Method
643 132 equemene
    RNG='MWC'
644 139 equemene
    # Viriel Distribution of stress
645 139 equemene
    VirielStress=True
646 170 equemene
    # Verbose
647 170 equemene
    Verbose=False
648 170 equemene
    # OpenGL real time rendering
649 170 equemene
    OpenGL=False
650 170 equemene
    # Speed rendering
651 170 equemene
    SpeedRendering=False
652 171 equemene
    # Counter ArtEvasions Measures (artefact evasion)
653 171 equemene
    CoArEv='None'
654 170 equemene
    # Shape to distribute
655 170 equemene
    Shape='Ball'
656 170 equemene
    # Type of Interaction
657 172 equemene
    InterType='Force'
658 132 equemene
659 175 equemene
    HowToUse='%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [Potential] -x <None|NegExp|CorRad> -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
660 116 equemene
661 116 equemene
    try:
662 175 equemene
        opts, args = getopt.getopt(sys.argv[1:],"rpgehod:n:i:z:v:s:m:t:b:x:",["random","potential","coarev","opengl","viriel","verbose","device=","number=","iterations=","size=","velocity=","step=","method=","valuetype=","shape="])
663 116 equemene
    except getopt.GetoptError:
664 128 equemene
        print(HowToUse % sys.argv[0])
665 116 equemene
        sys.exit(2)
666 116 equemene
667 116 equemene
    for opt, arg in opts:
668 116 equemene
        if opt == '-h':
669 128 equemene
            print(HowToUse % sys.argv[0])
670 116 equemene
671 128 equemene
            print("\nInformations about devices detected under OpenCL:")
672 116 equemene
            try:
673 132 equemene
                Id=0
674 116 equemene
                for platform in cl.get_platforms():
675 116 equemene
                    for device in platform.get_devices():
676 170 equemene
                        # Failed now because of POCL implementation
677 137 equemene
                        #deviceType=cl.device_type.to_string(device.type)
678 149 equemene
                        deviceType="xPU"
679 128 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
680 116 equemene
                        Id=Id+1
681 116 equemene
                sys.exit()
682 116 equemene
            except ImportError:
683 128 equemene
                print("Your platform does not seem to support OpenCL")
684 116 equemene
                sys.exit()
685 116 equemene
686 132 equemene
        elif opt in ("-t", "--valuetype"):
687 132 equemene
            if arg=='FP64':
688 132 equemene
                class MyFloat(np.float64): pass
689 132 equemene
            else:
690 132 equemene
                class MyFloat(np.float32):pass
691 132 equemene
            ValueType = arg
692 116 equemene
        elif opt in ("-d", "--device"):
693 116 equemene
            Device=int(arg)
694 121 equemene
        elif opt in ("-m", "--method"):
695 121 equemene
            Method=arg
696 170 equemene
        elif opt in ("-b", "--shape"):
697 170 equemene
            Shape=arg
698 175 equemene
            if Shape!='Ball' or Shape!='Box':
699 175 equemene
                print('Wrong argument: set to Ball')
700 116 equemene
        elif opt in ("-n", "--number"):
701 116 equemene
            Number=int(arg)
702 170 equemene
        elif opt in ("-i", "--iterations"):
703 170 equemene
            Iterations=int(arg)
704 116 equemene
        elif opt in ("-z", "--size"):
705 170 equemene
            SizeOfShape=MyFloat(arg)
706 116 equemene
        elif opt in ("-v", "--velocity"):
707 132 equemene
            Velocity=MyFloat(arg)
708 139 equemene
            VirielStress=False
709 116 equemene
        elif opt in ("-s", "--step"):
710 132 equemene
            Step=MyFloat(arg)
711 132 equemene
        elif opt in ("-r", "--random"):
712 132 equemene
            InitialRandom=True
713 133 equemene
        elif opt in ("-c", "--check"):
714 133 equemene
            CheckEnergies=True
715 139 equemene
        elif opt in ("-e", "--viriel"):
716 139 equemene
            VirielStress=True
717 170 equemene
        elif opt in ("-g", "--opengl"):
718 170 equemene
            OpenGL=True
719 172 equemene
        elif opt in ("-p", "--potential"):
720 172 equemene
            InterType='Potential'
721 175 equemene
        elif opt in ("-x", "--coarev"):
722 175 equemene
            CoArEv=arg
723 170 equemene
        elif opt in ("-o", "--verbose"):
724 170 equemene
            Verbose=True
725 170 equemene
726 175 equemene
    SizeOfShape=np.sqrt(MyFloat(SizeOfShape*Number))
727 132 equemene
    Velocity=MyFloat(Velocity)
728 132 equemene
    Step=MyFloat(Step)
729 132 equemene
730 128 equemene
    print("Device choosed : %s" % Device)
731 128 equemene
    print("Number of particules : %s" % Number)
732 170 equemene
    print("Size of Shape : %s" % SizeOfShape)
733 160 equemene
    print("Initial velocity : %s" % Velocity)
734 170 equemene
    print("Step of iteration : %s" % Step)
735 160 equemene
    print("Number of iterations : %s" % Iterations)
736 160 equemene
    print("Method of resolution : %s" % Method)
737 160 equemene
    print("Initial Random for RNG Seed : %s" % InitialRandom)
738 160 equemene
    print("ValueType is : %s" % ValueType)
739 170 equemene
    print("Viriel distribution of stress : %s" % VirielStress)
740 170 equemene
    print("OpenGL real time rendering : %s" % OpenGL)
741 170 equemene
    print("Speed rendering : %s" % SpeedRendering)
742 170 equemene
    print("Interaction type : %s" % InterType)
743 171 equemene
    print("Counter Artevasion type : %s" % CoArEv)
744 116 equemene
745 132 equemene
    # Create Numpy array of CL vector with 8 FP32
746 170 equemene
    MyCoM = np.zeros(4,dtype=MyFloat)
747 170 equemene
    MyDataX = np.zeros(Number*4, dtype=MyFloat)
748 170 equemene
    MyDataV = np.zeros(Number*4, dtype=MyFloat)
749 133 equemene
    MyPotential = np.zeros(Number, dtype=MyFloat)
750 133 equemene
    MyKinetic = np.zeros(Number, dtype=MyFloat)
751 132 equemene
752 171 equemene
    Marsaglia,Computing,Interaction,Artevasion=DictionariesAPI()
753 132 equemene
754 132 equemene
    # Scan the OpenCL arrays
755 132 equemene
    Id=0
756 116 equemene
    HasXPU=False
757 116 equemene
    for platform in cl.get_platforms():
758 116 equemene
        for device in platform.get_devices():
759 116 equemene
            if Id==Device:
760 116 equemene
                PlatForm=platform
761 116 equemene
                XPU=device
762 128 equemene
                print("CPU/GPU selected: ",device.name.lstrip())
763 151 equemene
                print("Platform selected: ",platform.name)
764 116 equemene
                HasXPU=True
765 116 equemene
            Id+=1
766 116 equemene
767 116 equemene
    if HasXPU==False:
768 128 equemene
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
769 116 equemene
        sys.exit()
770 116 equemene
771 132 equemene
    # Create Context
772 116 equemene
    try:
773 116 equemene
        ctx = cl.Context([XPU])
774 116 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
775 116 equemene
    except:
776 128 equemene
        print("Crash during context creation")
777 116 equemene
778 132 equemene
    # Build all routines used for the computing
779 170 equemene
780 170 equemene
    #BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
781 171 equemene
    BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i -DARTEVASION=%i" % (Marsaglia[RNG],Computing[ValueType],Interaction[InterType],Artevasion[CoArEv])
782 170 equemene
783 170 equemene
    if 'Intel' in PlatForm.name or 'Experimental' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name :
784 151 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
785 151 equemene
    else:
786 151 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions+" -cl-strict-aliasing")
787 170 equemene
788 170 equemene
    mf = cl.mem_flags
789 170 equemene
    # Read/Write approach for buffering
790 170 equemene
    clDataX = cl.Buffer(ctx, mf.READ_WRITE, MyDataX.nbytes)
791 170 equemene
    clDataV = cl.Buffer(ctx, mf.READ_WRITE, MyDataV.nbytes)
792 170 equemene
    clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
793 170 equemene
    clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
794 170 equemene
    clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
795 160 equemene
796 170 equemene
    # Write/HostPointer approach for buffering
797 170 equemene
    # clDataX = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataX)
798 170 equemene
    # clDataV = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataV)
799 170 equemene
    # clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential)
800 170 equemene
    # clKinetic = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyKinetic)
801 170 equemene
    # clCoM = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyCoM)
802 116 equemene
803 134 equemene
    print('All particles superimposed.')
804 116 equemene
805 132 equemene
    # Set particles to RNG points
806 132 equemene
    if InitialRandom:
807 170 equemene
        seed_w=np.uint32(nprnd(2**32))
808 170 equemene
        seed_z=np.uint32(nprnd(2**32))
809 132 equemene
    else:
810 170 equemene
        seed_w=np.uint32(19710211)
811 170 equemene
        seed_z=np.uint32(20081010)
812 170 equemene
813 170 equemene
    if Shape=='Ball':
814 170 equemene
        MyRoutines.InBallSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
815 170 equemene
    else:
816 170 equemene
        MyRoutines.InBoxSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
817 116 equemene
818 132 equemene
    print('All particules distributed')
819 139 equemene
820 160 equemene
    CLLaunch=MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
821 160 equemene
    CLLaunch.wait()
822 142 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
823 170 equemene
    print('Center Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
824 139 equemene
825 139 equemene
    if VirielStress:
826 160 equemene
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,MyFloat(0.),np.uint32(110271),np.uint32(250173))
827 139 equemene
    else:
828 160 equemene
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,Velocity,np.uint32(110271),np.uint32(250173))
829 160 equemene
    CLLaunch.wait()
830 139 equemene
831 170 equemene
    print('All particules stressed')
832 170 equemene
833 160 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
834 160 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
835 133 equemene
    CLLaunch.wait()
836 141 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
837 141 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
838 170 equemene
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
839 116 equemene
840 170 equemene
    if SpeedRendering:
841 170 equemene
        SizeOfBox=max(2*MyKinetic)
842 170 equemene
    else:
843 170 equemene
        SizeOfBox=SizeOfShape
844 170 equemene
845 174 equemene
    if OpenGL:
846 174 equemene
        print('\tTiny documentation to interact OpenGL rendering:\n')
847 174 equemene
        print('\t<Left|Right> Rotate around X axis')
848 174 equemene
        print('\t  <Up|Down>  Rotate around Y axis')
849 174 equemene
        print('\t   <z|Z>     Rotate around Z axis')
850 174 equemene
        print('\t   <-|+>     Unzoom/Zoom')
851 174 equemene
        print('\t    <s>      Toggle to display Positions or Velocities')
852 174 equemene
        print('\t   <Esc>     Quit\n')
853 174 equemene
854 170 equemene
    wall_time_start=time.time()
855 160 equemene
856 170 equemene
    Durations=np.array([],dtype=MyFloat)
857 170 equemene
    print('Starting!')
858 170 equemene
    if OpenGL:
859 170 equemene
        global ViewRX,ViewRY,ViewRZ
860 170 equemene
        Iterations=0
861 170 equemene
        ViewRX,ViewRY,ViewRZ = 0.,0.,0.
862 170 equemene
        # Launch OpenGL Loop
863 170 equemene
        glutInit(sys.argv)
864 170 equemene
        glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB)
865 170 equemene
        glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE,GLUT_ACTION_CONTINUE_EXECUTION)
866 170 equemene
        glutInitWindowSize(512,512)
867 170 equemene
        glutCreateWindow(b'NBodyGL')
868 170 equemene
        setup_viewport()
869 170 equemene
        glutReshapeFunc(reshape)
870 170 equemene
        glutDisplayFunc(display)
871 170 equemene
        glutIdleFunc(display)
872 170 equemene
        #   glutMouseFunc(mouse)
873 170 equemene
        glutSpecialFunc(special)
874 170 equemene
        glutKeyboardFunc(keyboard)
875 170 equemene
        glutMainLoop()
876 170 equemene
    else:
877 170 equemene
        for iteration in range(Iterations):
878 170 equemene
            Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
879 170 equemene
            if Verbose:
880 170 equemene
                # print("Duration of #%s iteration: %s" % (iteration,Elapsed))
881 170 equemene
                cl.enqueue_copy(queue, MyDataX, clDataX)
882 170 equemene
                print("Positions for #%s iteration: %s" % (iteration,MyDataX))
883 170 equemene
            else:
884 170 equemene
                sys.stdout.write('.')
885 170 equemene
                sys.stdout.flush()
886 170 equemene
            Durations=np.append(Durations,Elapsed)
887 133 equemene
888 170 equemene
    print('\nEnding!')
889 139 equemene
890 160 equemene
    MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
891 160 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
892 160 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
893 141 equemene
    CLLaunch.wait()
894 160 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
895 141 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
896 141 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
897 170 equemene
    print('\nCenter Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
898 170 equemene
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
899 135 equemene
900 170 equemene
    print("Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n" % (Device,Iterations,np.mean(Durations),np.median(Durations),np.std(Durations),np.min(Durations),np.max(Durations),np.std(Durations)/np.median(Durations)))
901 170 equemene
902 171 equemene
    # FPS: 1/Elapsed
903 171 equemene
    FPS=np.ones(len(Durations))
904 171 equemene
    FPS/=Durations
905 171 equemene
906 171 equemene
    print("FPS stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" % (Device,Iterations,np.mean(FPS),np.median(FPS),np.std(FPS),np.min(FPS),np.max(FPS)))
907 171 equemene
908 170 equemene
    # Contraction of Square*Size*Hertz: Size*Size/Elapsed
909 170 equemene
    Squertz=np.ones(len(Durations))
910 170 equemene
    Squertz*=Number*Number
911 170 equemene
    Squertz/=Durations
912 170 equemene
913 172 equemene
    print("Squertz in log10 & complete stats on device %s with %s iterations :\n\tMean:\t%s\t%s\n\tMedian:\t%s\t%s\n\tStddev:\t%s\t%s\n\tMin:\t%s\t%s\n\tMax:\t%s\t%s\n" % (Device,Iterations,np.log10(np.mean(Squertz)),np.mean(Squertz),np.log10(np.median(Squertz)),np.median(Squertz),np.log10(np.std(Squertz)),np.std(Squertz),np.log10(np.min(Squertz)),np.min(Squertz),np.log10(np.max(Squertz)),np.max(Squertz)))
914 170 equemene
915 160 equemene
    clDataX.release()
916 160 equemene
    clDataV.release()
917 133 equemene
    clKinetic.release()
918 133 equemene
    clPotential.release()