Statistiques
| Révision :

root / NBody / NBody.py @ 171

Historique | Voir | Annoter | Télécharger (27,19 ko)

1 128 equemene
#!/usr/bin/env python3
2 136 equemene
# -*- coding: utf-8 -*-
3 116 equemene
"""
4 171 equemene
NBody Demonstrator implemented in OpenCL, rendering OpenGL
5 116 equemene

6 116 equemene
Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> CeCILLv2
7 116 equemene
"""
8 116 equemene
import getopt
9 116 equemene
import sys
10 116 equemene
import time
11 116 equemene
import numpy as np
12 116 equemene
import pyopencl as cl
13 116 equemene
import pyopencl.array as cl_array
14 116 equemene
from numpy.random import randint as nprnd
15 170 equemene
import string, sys
16 170 equemene
from OpenGL.GL import *
17 170 equemene
from OpenGL.GLUT import *
18 116 equemene
19 132 equemene
def DictionariesAPI():
20 132 equemene
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
21 132 equemene
    Computing={'FP32':0,'FP64':1}
22 170 equemene
    Interaction={'Force':0,'Potential':1}
23 171 equemene
    Artevasion={'None':0,'NegExp':1}
24 171 equemene
    return(Marsaglia,Computing,Interaction,Artevasion)
25 132 equemene
26 142 equemene
BlobOpenCL= """
27 132 equemene
#define TFP32 0
28 132 equemene
#define TFP64 1
29 132 equemene

30 170 equemene
#define TFORCE 0
31 170 equemene
#define TPOTENTIAL 1
32 116 equemene

33 171 equemene
#define NONE 0
34 171 equemene
#define NEGEXP 1
35 171 equemene

36 132 equemene
#if TYPE == TFP32
37 132 equemene
#define MYFLOAT4 float4
38 132 equemene
#define MYFLOAT8 float8
39 132 equemene
#define MYFLOAT float
40 151 equemene
#define DISTANCE fast_distance
41 132 equemene
#else
42 132 equemene
#define MYFLOAT4 double4
43 132 equemene
#define MYFLOAT8 double8
44 132 equemene
#define MYFLOAT double
45 151 equemene
#define DISTANCE distance
46 170 equemene
#if defined(cl_khr_fp64)  // Khronos extension available?
47 170 equemene
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
48 132 equemene
#endif
49 170 equemene
#endif
50 132 equemene

51 170 equemene
#define znew  ((zmwc=36969*(zmwc&65535)+(zmwc>>16))<<16)
52 170 equemene
#define wnew  ((wmwc=18000*(wmwc&65535)+(wmwc>>16))&65535)
53 170 equemene
#define MWC   (znew+wnew)
54 170 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
55 170 equemene
#define CONG  (jcong=69069*jcong+1234567)
56 170 equemene
#define KISS  ((MWC^CONG)+SHR3)
57 170 equemene

58 160 equemene
#define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f)
59 160 equemene
#define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f)
60 160 equemene
#define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f)
61 160 equemene
#define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f)
62 160 equemene

63 171 equemene
#define PI (MYFLOAT)3.141592653589793238e0f
64 160 equemene

65 170 equemene
#define SMALL_NUM (MYFLOAT)1.e-9f
66 160 equemene

67 170 equemene
// Create my own Distance implementation: distance buggy on Oland AMD chipset
68 116 equemene

69 170 equemene
MYFLOAT MyDistance(MYFLOAT4 n,MYFLOAT4 m)
70 141 equemene
{
71 170 equemene
    private MYFLOAT x2,y2,z2;
72 170 equemene
    x2=n.s0-m.s0;
73 170 equemene
    x2*=x2;
74 170 equemene
    y2=n.s1-m.s1;
75 170 equemene
    y2*=y2;
76 170 equemene
    z2=n.s2-m.s2;
77 170 equemene
    z2*=z2;
78 170 equemene
    return(sqrt(x2+y2+z2));
79 141 equemene
}
80 141 equemene

81 170 equemene
// Interaction to avoid artevasion of bodies : 1-e^-x*x is x*x near 0
82 170 equemene

83 170 equemene
//MYFLOAT4 InteractionCore(MYFLOAT4 m,MYFLOAT4 n)
84 170 equemene
//{
85 170 equemene
//    private MYFLOAT r=MyDistance((MYFLOAT4)n,(MYFLOAT4)m);
86 170 equemene
//    private MYFLOAT r2=r*r;
87 170 equemene
//    private MYFLOAT c=1.e0f/(MYFLOAT)get_global_size(0);
88 170 equemene

89 170 equemene
//    private MYFLOAT4 unity=normalize(n,m);
90 170 equemene

91 170 equemene
//    return(((MYFLOAT4)n-(MYFLOAT4)m)*(MYFLOAT)(1.e0f-exp(-c*r2))/(MYFLOAT)(r*r2));
92 170 equemene
//}
93 170 equemene

94 171 equemene
// Potential between 2 m,n bodies
95 133 equemene
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
96 171 equemene
#if ARTEVASION == NEGEXP
97 171 equemene
// Add exp(-r) to numerator to avoid divergence for low distances
98 133 equemene
{
99 171 equemene
    MYFLOAT r=DISTANCE(n,m);
100 171 equemene
    return((-1.e0f+exp(-r))/r);
101 171 equemene
}
102 171 equemene
#else
103 171 equemene
// Classical potential in 1/r
104 171 equemene
{
105 170 equemene
//    return((MYFLOAT)(-1.e0f)/(MyDistance(m,n)));
106 151 equemene
    return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
107 133 equemene
}
108 171 equemene
#endif
109 133 equemene

110 171 equemene
// Interaction based of Force (1/r**2) or Potential (-grad(1/r))
111 170 equemene
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
112 170 equemene
#if INTERACTION == TFORCE
113 170 equemene
// Simplest implementation of force (equals to acceleration)
114 170 equemene
// seems to bo bad (numerous artevasions)
115 170 equemene
// MYFLOAT4 InteractionForce(MYFLOAT4 m,MYFLOAT4 n)
116 170 equemene
{
117 170 equemene
    private MYFLOAT r=MyDistance(n,m);
118 170 equemene
    return((n-m)/(MYFLOAT)(r*r*r));
119 170 equemene
}
120 170 equemene
#else
121 170 equemene
// Force definited as gradient of potential
122 170 equemene
// Estimate potential and proximate potential to estimate force
123 170 equemene
// MYFLOAT4 InteractionPotential(MYFLOAT4 m,MYFLOAT4 n)
124 170 equemene
{
125 171 equemene
    // 1/1024 seems to be a good factor: larger one provides bad results
126 171 equemene
    private MYFLOAT epsilon=(MYFLOAT)(1.e0f/1024);
127 170 equemene
    private MYFLOAT4 er=normalize(n-m);
128 170 equemene
    private MYFLOAT4 dr=er*(MYFLOAT)epsilon;
129 170 equemene

130 171 equemene
    return(er/epsilon*(PairPotential(m,n)-PairPotential(m+dr,n)));
131 170 equemene
}
132 170 equemene
#endif
133 170 equemene

134 160 equemene
MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
135 139 equemene
{
136 160 equemene
    private MYFLOAT potential=(MYFLOAT)0.e0f;
137 160 equemene
    private MYFLOAT4 x=clDataX[gid];
138 139 equemene

139 139 equemene
    for (int i=0;i<get_global_size(0);i++)
140 139 equemene
    {
141 139 equemene
        if (gid != i)
142 160 equemene
        potential+=PairPotential(x,clDataX[i]);
143 139 equemene
    }
144 133 equemene

145 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
146 141 equemene
    return(potential);
147 139 equemene
}
148 139 equemene

149 160 equemene
MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid)
150 139 equemene
{
151 160 equemene
    return(PairPotential(clDataX[gid],clCoM[0]));
152 139 equemene
}
153 139 equemene

154 170 equemene
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
155 170 equemene

156 160 equemene
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
157 116 equemene
{
158 160 equemene
    private MYFLOAT4 a0,v0,x0,a1,v1,x1,a2,v2,x2,a3,v3,x3,a4,v4,x4,xf,vf;
159 170 equemene
    MYFLOAT4 DT=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
160 160 equemene

161 160 equemene
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
162 160 equemene
    v0=(MYFLOAT4)clDataInV[gid];
163 160 equemene
    x0=(MYFLOAT4)clDataInX[gid];
164 133 equemene
    int N = get_global_size(0);
165 133 equemene

166 170 equemene
    for (private int i=0;i<N;i++)
167 121 equemene
    {
168 121 equemene
        if (gid != i)
169 160 equemene
        a0+=Interaction(x0,clDataInX[i]);
170 121 equemene
    }
171 121 equemene

172 160 equemene
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
173 170 equemene
    v1=a0*dt+v0;
174 170 equemene
    x1=v0*dt+x0;
175 170 equemene
    for (private int j=0;j<N;j++)
176 121 equemene
    {
177 170 equemene
        if (gid != j)
178 170 equemene
        a1+=Interaction(x1,clDataInX[j]);
179 121 equemene
    }
180 121 equemene

181 160 equemene
    a2=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
182 170 equemene
    v2=a1*(MYFLOAT)(dt/2.e0f)+v0;
183 170 equemene
    x2=v1*(MYFLOAT)(dt/2.e0f)+x0;
184 170 equemene
    for (private int k=0;k<N;k++)
185 121 equemene
    {
186 170 equemene
        if (gid != k)
187 170 equemene
        a2+=Interaction(x2,clDataInX[k]);
188 121 equemene
    }
189 121 equemene

190 160 equemene
    a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
191 170 equemene
    v3=a2*(MYFLOAT)(dt/2.e0f)+v0;
192 170 equemene
    x3=v2*(MYFLOAT)(dt/2.e0f)+x0;
193 170 equemene
    for (private int l=0;l<N;l++)
194 121 equemene
    {
195 170 equemene
        if (gid != l)
196 170 equemene
        a3+=Interaction(x3,clDataInX[l]);
197 121 equemene
    }
198 121 equemene

199 160 equemene
    a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
200 170 equemene
    v4=a3*dt+v0;
201 170 equemene
    x4=v3*dt+x0;
202 170 equemene
    for (private int m=0;m<N;m++)
203 141 equemene
    {
204 170 equemene
        if (gid != m)
205 170 equemene
        a4+=Interaction(x4,clDataInX[m]);
206 141 equemene
    }
207 141 equemene

208 160 equemene
    xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
209 160 equemene
    vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
210 121 equemene

211 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
212 121 equemene
}
213 121 equemene

214 160 equemene
MYFLOAT8 AtomicHeun(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
215 121 equemene
{
216 170 equemene
    private MYFLOAT4 x0,v0,a0,x1,v1,a1,xf,vf;
217 170 equemene
    MYFLOAT4 Dt=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
218 116 equemene

219 170 equemene
    x0=(MYFLOAT4)clDataInX[gid];
220 170 equemene
    v0=(MYFLOAT4)clDataInV[gid];
221 170 equemene
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
222 141 equemene

223 170 equemene
    for (private int i=0;i<get_global_size(0);i++)
224 116 equemene
    {
225 116 equemene
        if (gid != i)
226 170 equemene
        a0+=Interaction(x0,clDataInX[i]);
227 116 equemene
    }
228 141 equemene

229 170 equemene
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
230 170 equemene
    //v1=v0+dt*a0;
231 170 equemene
    //x1=x0+dt*v0;
232 170 equemene
    v1=dt*a0+v0;
233 170 equemene
    x1=dt*v0+x0;
234 141 equemene

235 170 equemene
    for (private int j=0;j<get_global_size(0);j++)
236 116 equemene
    {
237 170 equemene
        if (gid != j)
238 170 equemene
        a1+=Interaction(x1,clDataInX[j]);
239 116 equemene
    }
240 118 equemene

241 170 equemene
    vf=v0+dt*(a0+a1)/(MYFLOAT)2.e0f;
242 170 equemene
    xf=x0+dt*(v0+v1)/(MYFLOAT)2.e0f;
243 170 equemene

244 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
245 119 equemene
}
246 119 equemene

247 160 equemene
MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
248 119 equemene
{
249 170 equemene
    MYFLOAT4 x0,v0,a,xf,vf;
250 119 equemene

251 170 equemene
    x0=(MYFLOAT4)clDataInX[gid];
252 170 equemene
    v0=(MYFLOAT4)clDataInV[gid];
253 151 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
254 160 equemene

255 170 equemene
    for (private int i=0;i<get_global_size(0);i++)
256 119 equemene
    {
257 119 equemene
        if (gid != i)
258 170 equemene
          a+=Interaction(x0,clDataInX[i]);
259 119 equemene
    }
260 133 equemene

261 170 equemene
    vf=v0+dt*a;
262 170 equemene
    xf=x0+dt*vf;
263 170 equemene

264 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
265 116 equemene
}
266 116 equemene

267 160 equemene
MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
268 140 equemene
{
269 170 equemene
    MYFLOAT4 x0,v0,a,xf,vf;
270 140 equemene

271 170 equemene
    x0=(MYFLOAT4)clDataInX[gid];
272 170 equemene
    v0=(MYFLOAT4)clDataInV[gid];
273 151 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
274 160 equemene

275 170 equemene
    for (private int i=0;i<get_global_size(0);i++)
276 140 equemene
    {
277 140 equemene
        if (gid != i)
278 170 equemene
        a+=Interaction(x0,clDataInX[i]);
279 140 equemene
    }
280 140 equemene

281 170 equemene
    vf=v0+dt*a;
282 170 equemene
    xf=x0+dt*v0;
283 140 equemene

284 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
285 140 equemene
}
286 140 equemene

287 170 equemene
__kernel void InBallSplutterPoints(__global MYFLOAT4* clDataX,
288 170 equemene
                                   MYFLOAT diameter,uint seed_z,uint seed_w)
289 170 equemene
{
290 170 equemene
    private int gid=get_global_id(0);
291 170 equemene
    private uint zmwc=seed_z+gid;
292 170 equemene
    private uint wmwc=seed_w+(gid+1)%2;
293 170 equemene
    private MYFLOAT Heat;
294 170 equemene

295 170 equemene
    for (int i=0;i<gid;i++)
296 170 equemene
    {
297 170 equemene
        Heat=MWCfp;
298 170 equemene
    }
299 170 equemene

300 170 equemene
// More accurate distribution based on spherical coordonates
301 170 equemene
// Disactivated because of AMD Oland GPU crash on launch
302 170 equemene
//     private MYFLOAT Radius,Theta,Phi,PosX,PosY,PosZ,SinTheta;
303 170 equemene
//     Radius=MWCfp*diameter/2.e0f;
304 170 equemene
//     Theta=(MYFLOAT)acos((float)(-2.e0f*MWCfp+1.0e0f));
305 170 equemene
//     Phi=(MYFLOAT)(2.e0f*PI*MWCfp);
306 170 equemene
//     SinTheta=sin((float)Theta);
307 170 equemene
//     PosX=cos((float)Phi)*Radius*SinTheta;
308 170 equemene
//     PosY=sin((float)Phi)*Radius*SinTheta;
309 170 equemene
//     PosZ=cos((float)Theta)*Radius;
310 170 equemene
//     clDataX[gid]=(MYFLOAT4)(PosX,PosY,PosZ,0.e0f);
311 170 equemene

312 170 equemene
    private MYFLOAT Radius=diameter/2.e0f;
313 170 equemene
    private MYFLOAT Length=diameter;
314 170 equemene
    private MYFLOAT4 Position;
315 170 equemene
    while (Length>Radius) {
316 170 equemene
       Position=(MYFLOAT4)((MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,0.e0f);
317 170 equemene
       Length=(MYFLOAT)length((MYFLOAT4)Position);
318 170 equemene
    }
319 170 equemene

320 170 equemene
    clDataX[gid]=Position;
321 170 equemene

322 170 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
323 170 equemene
}
324 170 equemene

325 170 equemene
__kernel void InBoxSplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box,
326 139 equemene
                             uint seed_z,uint seed_w)
327 116 equemene
{
328 170 equemene
    int gid=get_global_id(0);
329 170 equemene
    uint zmwc=seed_z+gid;
330 170 equemene
    uint wmwc=seed_w-gid;
331 170 equemene
    private MYFLOAT Heat;
332 170 equemene

333 170 equemene
    for (int i=0;i<gid;i++)
334 170 equemene
    {
335 170 equemene
        Heat=MWCfp;
336 170 equemene
    }
337 137 equemene

338 170 equemene
    clDataX[gid]=(MYFLOAT4)((MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,0.e0f);
339 170 equemene

340 170 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
341 116 equemene
}
342 116 equemene

343 160 equemene
__kernel void SplutterStress(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
344 139 equemene
{
345 139 equemene
    int gid = get_global_id(0);
346 142 equemene
    MYFLOAT N = (MYFLOAT)get_global_size(0);
347 170 equemene
    uint zmwc=seed_z+(uint)gid;
348 170 equemene
    uint wmwc=seed_w-(uint)gid;
349 171 equemene
    MYFLOAT4 CrossVector,SpeedVector;
350 139 equemene

351 171 equemene
    if (get_global_size(0)==2) {
352 171 equemene
       CrossVector=(MYFLOAT4)(1.e0f,1.e0f,1.e0f,0.e0f);
353 171 equemene
    } else {
354 171 equemene
       CrossVector=(MYFLOAT4)(MWCfp-5e-1f,MWCfp-5e-1f,MWCfp-5e-1f,0.e0f);
355 171 equemene
    }
356 171 equemene

357 139 equemene
    if (velocity<SMALL_NUM) {
358 171 equemene
       SpeedVector=(MYFLOAT4)normalize(cross(clDataX[gid]-clCoM[0],CrossVector))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
359 139 equemene
    }
360 139 equemene
    else
361 139 equemene
    {
362 155 equemene
       // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
363 170 equemene
       // Implemention on AMD Oland are probably broken in float
364 170 equemene

365 170 equemene
       SpeedVector=(MYFLOAT4)((MWCfp-0.5e0f)*velocity,(MWCfp-0.5e0f)*velocity,
366 170 equemene
                              (MWCfp-0.5e0f)*velocity,0.e0f);
367 139 equemene
    }
368 170 equemene
    clDataV[gid]=SpeedVector;
369 170 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
370 139 equemene
}
371 139 equemene

372 160 equemene
__kernel void RungeKutta(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
373 116 equemene
{
374 170 equemene
    private int gid = get_global_id(0);
375 170 equemene
    private MYFLOAT8 clDataGid;
376 116 equemene

377 170 equemene
    clDataGid=AtomicRungeKutta(clDataX,clDataV,gid,h);
378 116 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
379 170 equemene
    clDataX[gid]=clDataGid.s0123;
380 170 equemene
    clDataV[gid]=clDataGid.s4567;
381 116 equemene
}
382 116 equemene

383 170 equemene
__kernel void Heun(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
384 116 equemene
{
385 170 equemene
    private int gid = get_global_id(0);
386 170 equemene
    private MYFLOAT8 clDataGid;
387 116 equemene

388 170 equemene
    clDataGid=AtomicHeun(clDataX,clDataV,gid,h);
389 116 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
390 170 equemene
    clDataX[gid]=clDataGid.s0123;
391 170 equemene
    clDataV[gid]=clDataGid.s4567;
392 116 equemene
}
393 133 equemene

394 170 equemene
__kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
395 141 equemene
{
396 170 equemene
    private int gid = get_global_id(0);
397 170 equemene
    private MYFLOAT8 clDataGid;
398 141 equemene

399 170 equemene
    clDataGid=AtomicImplicitEuler(clDataX,clDataV,gid,h);
400 141 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
401 170 equemene
    clDataX[gid]=clDataGid.s0123;
402 170 equemene
    clDataV[gid]=clDataGid.s4567;
403 141 equemene
}
404 141 equemene

405 160 equemene
__kernel void ExplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
406 140 equemene
{
407 170 equemene
    private int gid = get_global_id(0);
408 170 equemene
    private MYFLOAT8 clDataGid;
409 170 equemene

410 170 equemene
    clDataGid=AtomicExplicitEuler(clDataX,clDataV,gid,h);
411 140 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
412 170 equemene
    clDataX[gid]=clDataGid.s0123;
413 170 equemene
    clDataV[gid]=clDataGid.s4567;
414 140 equemene
}
415 139 equemene

416 160 equemene
__kernel void CoMPotential(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
417 133 equemene
{
418 133 equemene
    int gid = get_global_id(0);
419 133 equemene

420 160 equemene
    clPotential[gid]=PairPotential(clDataX[gid],clCoM[0]);
421 139 equemene
}
422 139 equemene

423 160 equemene
__kernel void Potential(__global MYFLOAT4* clDataX,__global MYFLOAT* clPotential)
424 139 equemene
{
425 139 equemene
    int gid = get_global_id(0);
426 139 equemene

427 155 equemene
    MYFLOAT potential=(MYFLOAT)0.e0f;
428 160 equemene
    MYFLOAT4 x=clDataX[gid];
429 133 equemene

430 133 equemene
    for (int i=0;i<get_global_size(0);i++)
431 133 equemene
    {
432 133 equemene
        if (gid != i)
433 160 equemene
        potential+=PairPotential(x,clDataX[i]);
434 133 equemene
    }
435 133 equemene

436 133 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
437 155 equemene
    clPotential[gid]=potential*(MYFLOAT)5.e-1f;
438 133 equemene
}
439 133 equemene

440 160 equemene
__kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int Size)
441 139 equemene
{
442 160 equemene
    MYFLOAT4 CoM=clDataX[0];
443 142 equemene

444 139 equemene
    for (int i=1;i<Size;i++)
445 139 equemene
    {
446 160 equemene
        CoM+=clDataX[i];
447 139 equemene
    }
448 142 equemene

449 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
450 170 equemene
    clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,0.e0f)/(MYFLOAT)Size;
451 139 equemene
}
452 139 equemene

453 160 equemene
__kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
454 133 equemene
{
455 133 equemene
    int gid = get_global_id(0);
456 133 equemene

457 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
458 160 equemene
    MYFLOAT d=(MYFLOAT)length(clDataV[gid]);
459 155 equemene
    clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)(d*d);
460 133 equemene
}
461 170 equemene

462 116 equemene
"""
463 116 equemene
464 170 equemene
def MainOpenCL(clDataX,clDataV,Step,Method):
465 170 equemene
    time_start=time.time()
466 170 equemene
    if Method=="RungeKutta":
467 170 equemene
        CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clDataX,clDataV,Step)
468 170 equemene
    elif Method=="ExplicitEuler":
469 170 equemene
        CLLaunch=MyRoutines.ExplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
470 170 equemene
    elif Method=="Heun":
471 170 equemene
        CLLaunch=MyRoutines.Heun(queue,(Number,1),None,clDataX,clDataV,Step)
472 170 equemene
    else:
473 170 equemene
        CLLaunch=MyRoutines.ImplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
474 170 equemene
    CLLaunch.wait()
475 170 equemene
    Elapsed=time.time()-time_start
476 170 equemene
    return(Elapsed)
477 170 equemene
478 170 equemene
def display(*args):
479 133 equemene
480 170 equemene
    global MyDataX,MyDataV,clDataX,clDataV,Step,Method,Number,Iterations,Durations,Verbose,SpeedRendering
481 170 equemene
482 170 equemene
    glClearColor(0.0, 0.0, 0.0, 0.0)
483 170 equemene
    glClear(GL_COLOR_BUFFER_BIT)
484 170 equemene
    glColor3f(1.0,1.0,1.0)
485 170 equemene
486 170 equemene
    Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
487 170 equemene
    if SpeedRendering:
488 170 equemene
        cl.enqueue_copy(queue, MyDataV, clDataV)
489 170 equemene
        MyDataV.reshape(Number,4)[:,3]=1
490 170 equemene
        glVertexPointerf(MyDataV.reshape(Number,4))
491 170 equemene
    else:
492 170 equemene
        cl.enqueue_copy(queue, MyDataX, clDataX)
493 170 equemene
        MyDataX.reshape(Number,4)[:,3]=1
494 170 equemene
        glVertexPointerf(MyDataX.reshape(Number,4))
495 170 equemene
496 170 equemene
    if Verbose:
497 170 equemene
        print("Positions for #%s iteration: %s" % (Iterations,MyDataX))
498 170 equemene
    else:
499 170 equemene
        sys.stdout.write('.')
500 170 equemene
        sys.stdout.flush()
501 170 equemene
    Durations=np.append(Durations,MainOpenCL(clDataX,clDataV,Step,Method))
502 170 equemene
    glEnableClientState(GL_VERTEX_ARRAY)
503 170 equemene
    glDrawArrays(GL_POINTS, 0, Number)
504 170 equemene
    glDisableClientState(GL_VERTEX_ARRAY)
505 170 equemene
    glFlush()
506 170 equemene
    Iterations+=1
507 170 equemene
    glutSwapBuffers()
508 170 equemene
509 170 equemene
def halt():
510 170 equemene
    pass
511 170 equemene
512 170 equemene
def keyboard(k,x,y):
513 170 equemene
    global ViewRZ,SizeOfBox,SpeedRendering
514 170 equemene
    LC_Z = as_8_bit( 'z' )
515 170 equemene
    UC_Z = as_8_bit( 'Z' )
516 170 equemene
    Plus = as_8_bit( '+' )
517 170 equemene
    Minus = as_8_bit( '-' )
518 170 equemene
    Switch = as_8_bit( 's' )
519 170 equemene
520 170 equemene
    Zoom=1
521 170 equemene
    if k == LC_Z:
522 170 equemene
        ViewRZ += 1.0
523 170 equemene
    elif k == UC_Z:
524 170 equemene
        ViewRZ -= 1.0
525 170 equemene
    elif k == Plus:
526 170 equemene
        Zoom *= 2.0
527 170 equemene
    elif k == Minus:
528 170 equemene
        Zoom /= 2.0
529 170 equemene
    elif k == Switch:
530 170 equemene
        if SpeedRendering:
531 170 equemene
            SpeedRendering=False
532 170 equemene
        else:
533 170 equemene
            SpeedRendering=True
534 170 equemene
    elif ord(k) == 27: # Escape
535 170 equemene
        glutLeaveMainLoop()
536 170 equemene
        return(False)
537 170 equemene
    else:
538 170 equemene
        return
539 170 equemene
    glRotatef(ViewRZ, 0.0, 0.0, 1.0)
540 170 equemene
    glScalef(Zoom,Zoom,Zoom)
541 170 equemene
    glutPostRedisplay()
542 170 equemene
543 170 equemene
def special(k,x,y):
544 170 equemene
    global ViewRX, ViewRY, ViewRZ
545 170 equemene
546 170 equemene
    if k == GLUT_KEY_UP:
547 170 equemene
        ViewRX += 1.0
548 170 equemene
    elif k == GLUT_KEY_DOWN:
549 170 equemene
        ViewRX -= 1.0
550 170 equemene
    elif k == GLUT_KEY_LEFT:
551 170 equemene
        ViewRY += 1.0
552 170 equemene
    elif k == GLUT_KEY_RIGHT:
553 170 equemene
        ViewRY -= 1.0
554 170 equemene
    else:
555 170 equemene
        return
556 170 equemene
    glRotatef(ViewRX, 1.0, 0.0, 0.0)
557 170 equemene
    glRotatef(ViewRY, 0.0, 1.0, 0.0)
558 170 equemene
    glutPostRedisplay()
559 170 equemene
560 170 equemene
def setup_viewport():
561 170 equemene
    global SizeOfBox
562 170 equemene
    glMatrixMode(GL_PROJECTION)
563 170 equemene
    glLoadIdentity()
564 170 equemene
    glOrtho(-SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox)
565 170 equemene
    glutPostRedisplay()
566 170 equemene
567 170 equemene
def reshape(w, h):
568 170 equemene
    glViewport(0, 0, w, h)
569 170 equemene
    setup_viewport()
570 170 equemene
571 116 equemene
if __name__=='__main__':
572 170 equemene
573 170 equemene
    global Number,Step,clDataX,clDataV,MyDataX,MyDataV,Method,SizeOfBox,Iterations,Verbose,Durations
574 116 equemene
575 132 equemene
    # ValueType
576 132 equemene
    ValueType='FP32'
577 132 equemene
    class MyFloat(np.float32):pass
578 160 equemene
    #    clType8=cl_array.vec.float8
579 116 equemene
    # Set defaults values
580 118 equemene
    np.set_printoptions(precision=2)
581 116 equemene
    # Id of Device : 1 is for first find !
582 160 equemene
    Device=0
583 170 equemene
    # Number of bodies is integer
584 160 equemene
    Number=2
585 170 equemene
    # Number of iterations (for standalone execution)
586 170 equemene
    Iterations=10
587 170 equemene
    # Size of shape
588 170 equemene
    SizeOfShape=MyFloat(1.)
589 116 equemene
    # Initial velocity of particules
590 132 equemene
    Velocity=MyFloat(1.)
591 116 equemene
    # Step
592 170 equemene
    Step=MyFloat(1./32)
593 121 equemene
    # Method of integration
594 150 equemene
    Method='ImplicitEuler'
595 132 equemene
    # InitialRandom
596 132 equemene
    InitialRandom=False
597 132 equemene
    # RNG Marsaglia Method
598 132 equemene
    RNG='MWC'
599 139 equemene
    # Viriel Distribution of stress
600 139 equemene
    VirielStress=True
601 170 equemene
    # Verbose
602 170 equemene
    Verbose=False
603 170 equemene
    # OpenGL real time rendering
604 170 equemene
    OpenGL=False
605 170 equemene
    # Speed rendering
606 170 equemene
    SpeedRendering=False
607 171 equemene
    # Counter ArtEvasions Measures (artefact evasion)
608 171 equemene
    CoArEv='None'
609 170 equemene
    # Shape to distribute
610 170 equemene
    Shape='Ball'
611 170 equemene
    # Type of Interaction
612 170 equemene
    InterType='Potential'
613 132 equemene
614 171 equemene
    HowToUse='%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [SpeedRendering] -x [NegativeExponential4ArteEvasions] -f [Force] -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
615 116 equemene
616 116 equemene
    try:
617 171 equemene
        opts, args = getopt.getopt(sys.argv[1:],"rpxfgehod:n:i:z:v:s:m:t:b:",["random","rendering","negexp","force","opengl","viriel","verbose","device=","number=","iterations=","size=","velocity=","step=","method=","valuetype=","shape="])
618 116 equemene
    except getopt.GetoptError:
619 128 equemene
        print(HowToUse % sys.argv[0])
620 116 equemene
        sys.exit(2)
621 116 equemene
622 116 equemene
    for opt, arg in opts:
623 116 equemene
        if opt == '-h':
624 128 equemene
            print(HowToUse % sys.argv[0])
625 116 equemene
626 128 equemene
            print("\nInformations about devices detected under OpenCL:")
627 116 equemene
            try:
628 132 equemene
                Id=0
629 116 equemene
                for platform in cl.get_platforms():
630 116 equemene
                    for device in platform.get_devices():
631 170 equemene
                        # Failed now because of POCL implementation
632 137 equemene
                        #deviceType=cl.device_type.to_string(device.type)
633 149 equemene
                        deviceType="xPU"
634 128 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
635 116 equemene
                        Id=Id+1
636 116 equemene
                sys.exit()
637 116 equemene
            except ImportError:
638 128 equemene
                print("Your platform does not seem to support OpenCL")
639 116 equemene
                sys.exit()
640 116 equemene
641 132 equemene
        elif opt in ("-t", "--valuetype"):
642 132 equemene
            if arg=='FP64':
643 132 equemene
                class MyFloat(np.float64): pass
644 132 equemene
            else:
645 132 equemene
                class MyFloat(np.float32):pass
646 132 equemene
            ValueType = arg
647 116 equemene
        elif opt in ("-d", "--device"):
648 116 equemene
            Device=int(arg)
649 121 equemene
        elif opt in ("-m", "--method"):
650 121 equemene
            Method=arg
651 170 equemene
        elif opt in ("-b", "--shape"):
652 170 equemene
            Shape=arg
653 116 equemene
        elif opt in ("-n", "--number"):
654 116 equemene
            Number=int(arg)
655 170 equemene
        elif opt in ("-i", "--iterations"):
656 170 equemene
            Iterations=int(arg)
657 116 equemene
        elif opt in ("-z", "--size"):
658 170 equemene
            SizeOfShape=MyFloat(arg)
659 116 equemene
        elif opt in ("-v", "--velocity"):
660 132 equemene
            Velocity=MyFloat(arg)
661 139 equemene
            VirielStress=False
662 116 equemene
        elif opt in ("-s", "--step"):
663 132 equemene
            Step=MyFloat(arg)
664 132 equemene
        elif opt in ("-r", "--random"):
665 132 equemene
            InitialRandom=True
666 133 equemene
        elif opt in ("-c", "--check"):
667 133 equemene
            CheckEnergies=True
668 139 equemene
        elif opt in ("-e", "--viriel"):
669 139 equemene
            VirielStress=True
670 170 equemene
        elif opt in ("-g", "--opengl"):
671 170 equemene
            OpenGL=True
672 170 equemene
        elif opt in ("-p", "--rendering"):
673 170 equemene
            SpeedRendering=True
674 171 equemene
        elif opt in ("-x", "--negexp"):
675 171 equemene
            CoArEv='NegExp'
676 170 equemene
        elif opt in ("-o", "--verbose"):
677 170 equemene
            Verbose=True
678 170 equemene
        elif opt in ("-f", "--force"):
679 170 equemene
            InterType='Force'
680 170 equemene
681 170 equemene
    SizeOfShape=MyFloat(SizeOfShape*Number)
682 132 equemene
    Velocity=MyFloat(Velocity)
683 132 equemene
    Step=MyFloat(Step)
684 132 equemene
685 128 equemene
    print("Device choosed : %s" % Device)
686 128 equemene
    print("Number of particules : %s" % Number)
687 170 equemene
    print("Size of Shape : %s" % SizeOfShape)
688 160 equemene
    print("Initial velocity : %s" % Velocity)
689 170 equemene
    print("Step of iteration : %s" % Step)
690 160 equemene
    print("Number of iterations : %s" % Iterations)
691 160 equemene
    print("Method of resolution : %s" % Method)
692 160 equemene
    print("Initial Random for RNG Seed : %s" % InitialRandom)
693 160 equemene
    print("ValueType is : %s" % ValueType)
694 170 equemene
    print("Viriel distribution of stress : %s" % VirielStress)
695 170 equemene
    print("OpenGL real time rendering : %s" % OpenGL)
696 170 equemene
    print("Speed rendering : %s" % SpeedRendering)
697 170 equemene
    print("Interaction type : %s" % InterType)
698 171 equemene
    print("Counter Artevasion type : %s" % CoArEv)
699 116 equemene
700 132 equemene
    # Create Numpy array of CL vector with 8 FP32
701 170 equemene
    MyCoM = np.zeros(4,dtype=MyFloat)
702 170 equemene
    MyDataX = np.zeros(Number*4, dtype=MyFloat)
703 170 equemene
    MyDataV = np.zeros(Number*4, dtype=MyFloat)
704 133 equemene
    MyPotential = np.zeros(Number, dtype=MyFloat)
705 133 equemene
    MyKinetic = np.zeros(Number, dtype=MyFloat)
706 132 equemene
707 171 equemene
    Marsaglia,Computing,Interaction,Artevasion=DictionariesAPI()
708 132 equemene
709 132 equemene
    # Scan the OpenCL arrays
710 132 equemene
    Id=0
711 116 equemene
    HasXPU=False
712 116 equemene
    for platform in cl.get_platforms():
713 116 equemene
        for device in platform.get_devices():
714 116 equemene
            if Id==Device:
715 116 equemene
                PlatForm=platform
716 116 equemene
                XPU=device
717 128 equemene
                print("CPU/GPU selected: ",device.name.lstrip())
718 151 equemene
                print("Platform selected: ",platform.name)
719 116 equemene
                HasXPU=True
720 116 equemene
            Id+=1
721 116 equemene
722 116 equemene
    if HasXPU==False:
723 128 equemene
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
724 116 equemene
        sys.exit()
725 116 equemene
726 132 equemene
    # Create Context
727 116 equemene
    try:
728 116 equemene
        ctx = cl.Context([XPU])
729 116 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
730 116 equemene
    except:
731 128 equemene
        print("Crash during context creation")
732 116 equemene
733 132 equemene
    # Build all routines used for the computing
734 170 equemene
735 170 equemene
    #BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
736 171 equemene
    BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i -DARTEVASION=%i" % (Marsaglia[RNG],Computing[ValueType],Interaction[InterType],Artevasion[CoArEv])
737 170 equemene
738 170 equemene
    if 'Intel' in PlatForm.name or 'Experimental' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name :
739 151 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
740 151 equemene
    else:
741 151 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions+" -cl-strict-aliasing")
742 170 equemene
743 170 equemene
    mf = cl.mem_flags
744 170 equemene
    # Read/Write approach for buffering
745 170 equemene
    clDataX = cl.Buffer(ctx, mf.READ_WRITE, MyDataX.nbytes)
746 170 equemene
    clDataV = cl.Buffer(ctx, mf.READ_WRITE, MyDataV.nbytes)
747 170 equemene
    clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
748 170 equemene
    clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
749 170 equemene
    clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
750 160 equemene
751 170 equemene
    # Write/HostPointer approach for buffering
752 170 equemene
    # clDataX = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataX)
753 170 equemene
    # clDataV = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataV)
754 170 equemene
    # clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential)
755 170 equemene
    # clKinetic = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyKinetic)
756 170 equemene
    # clCoM = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyCoM)
757 116 equemene
758 134 equemene
    print('All particles superimposed.')
759 116 equemene
760 132 equemene
    # Set particles to RNG points
761 132 equemene
    if InitialRandom:
762 170 equemene
        seed_w=np.uint32(nprnd(2**32))
763 170 equemene
        seed_z=np.uint32(nprnd(2**32))
764 132 equemene
    else:
765 170 equemene
        seed_w=np.uint32(19710211)
766 170 equemene
        seed_z=np.uint32(20081010)
767 170 equemene
768 170 equemene
    if Shape=='Ball':
769 170 equemene
        MyRoutines.InBallSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
770 170 equemene
    else:
771 170 equemene
        MyRoutines.InBoxSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
772 116 equemene
773 132 equemene
    print('All particules distributed')
774 139 equemene
775 160 equemene
    CLLaunch=MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
776 160 equemene
    CLLaunch.wait()
777 142 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
778 170 equemene
    print('Center Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
779 139 equemene
780 139 equemene
    if VirielStress:
781 160 equemene
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,MyFloat(0.),np.uint32(110271),np.uint32(250173))
782 139 equemene
    else:
783 160 equemene
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,Velocity,np.uint32(110271),np.uint32(250173))
784 160 equemene
    CLLaunch.wait()
785 139 equemene
786 170 equemene
    print('All particules stressed')
787 170 equemene
788 160 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
789 160 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
790 133 equemene
    CLLaunch.wait()
791 141 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
792 141 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
793 170 equemene
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
794 116 equemene
795 170 equemene
    if SpeedRendering:
796 170 equemene
        SizeOfBox=max(2*MyKinetic)
797 170 equemene
    else:
798 170 equemene
        SizeOfBox=SizeOfShape
799 170 equemene
800 170 equemene
    wall_time_start=time.time()
801 160 equemene
802 170 equemene
    Durations=np.array([],dtype=MyFloat)
803 170 equemene
    print('Starting!')
804 170 equemene
    if OpenGL:
805 170 equemene
        global ViewRX,ViewRY,ViewRZ
806 170 equemene
        Iterations=0
807 170 equemene
        ViewRX,ViewRY,ViewRZ = 0.,0.,0.
808 170 equemene
        # Launch OpenGL Loop
809 170 equemene
        glutInit(sys.argv)
810 170 equemene
        glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB)
811 170 equemene
        glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE,GLUT_ACTION_CONTINUE_EXECUTION)
812 170 equemene
        glutInitWindowSize(512,512)
813 170 equemene
        glutCreateWindow(b'NBodyGL')
814 170 equemene
        setup_viewport()
815 170 equemene
        glutReshapeFunc(reshape)
816 170 equemene
        glutDisplayFunc(display)
817 170 equemene
        glutIdleFunc(display)
818 170 equemene
        #   glutMouseFunc(mouse)
819 170 equemene
        glutSpecialFunc(special)
820 170 equemene
        glutKeyboardFunc(keyboard)
821 170 equemene
        glutMainLoop()
822 170 equemene
    else:
823 170 equemene
        for iteration in range(Iterations):
824 170 equemene
            Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
825 170 equemene
            if Verbose:
826 170 equemene
                # print("Duration of #%s iteration: %s" % (iteration,Elapsed))
827 170 equemene
                cl.enqueue_copy(queue, MyDataX, clDataX)
828 170 equemene
                print("Positions for #%s iteration: %s" % (iteration,MyDataX))
829 170 equemene
            else:
830 170 equemene
                sys.stdout.write('.')
831 170 equemene
                sys.stdout.flush()
832 170 equemene
            Durations=np.append(Durations,Elapsed)
833 133 equemene
834 170 equemene
    print('\nEnding!')
835 139 equemene
836 160 equemene
    MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
837 160 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
838 160 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
839 141 equemene
    CLLaunch.wait()
840 160 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
841 141 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
842 141 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
843 170 equemene
    print('\nCenter Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
844 170 equemene
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
845 135 equemene
846 170 equemene
    print("Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n" % (Device,Iterations,np.mean(Durations),np.median(Durations),np.std(Durations),np.min(Durations),np.max(Durations),np.std(Durations)/np.median(Durations)))
847 170 equemene
848 171 equemene
    # FPS: 1/Elapsed
849 171 equemene
    FPS=np.ones(len(Durations))
850 171 equemene
    FPS/=Durations
851 171 equemene
852 171 equemene
    print("FPS stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" % (Device,Iterations,np.mean(FPS),np.median(FPS),np.std(FPS),np.min(FPS),np.max(FPS)))
853 171 equemene
854 170 equemene
    # Contraction of Square*Size*Hertz: Size*Size/Elapsed
855 170 equemene
    Squertz=np.ones(len(Durations))
856 170 equemene
    Squertz*=Number*Number
857 170 equemene
    Squertz/=Durations
858 170 equemene
859 170 equemene
    print("Squertz in log10 stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" % (Device,Iterations,np.log10(np.mean(Squertz)),np.log10(np.median(Squertz)),np.log10(np.std(Squertz)),np.log10(np.min(Squertz)),np.log10(np.max(Squertz))))
860 170 equemene
861 160 equemene
    clDataX.release()
862 160 equemene
    clDataV.release()
863 133 equemene
    clKinetic.release()
864 133 equemene
    clPotential.release()