Statistiques
| Révision :

root / NBody / NBody.py @ 226

Historique | Voir | Annoter | Télécharger (29,02 ko)

1 128 equemene
#!/usr/bin/env python3
2 136 equemene
# -*- coding: utf-8 -*-
3 116 equemene
"""
4 171 equemene
NBody Demonstrator implemented in OpenCL, rendering OpenGL
5 116 equemene

6 174 equemene
CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
7 174 equemene
Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8 174 equemene

9 174 equemene
Thanks to Andreas Klockner for PyOpenCL:
10 174 equemene
http://mathema.tician.de/software/pyopencl
11 174 equemene

12 116 equemene
"""
13 116 equemene
import getopt
14 116 equemene
import sys
15 116 equemene
import time
16 116 equemene
import numpy as np
17 116 equemene
import pyopencl as cl
18 116 equemene
import pyopencl.array as cl_array
19 116 equemene
from numpy.random import randint as nprnd
20 170 equemene
import string, sys
21 170 equemene
from OpenGL.GL import *
22 170 equemene
from OpenGL.GLUT import *
23 116 equemene
24 132 equemene
def DictionariesAPI():
25 132 equemene
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
26 132 equemene
    Computing={'FP32':0,'FP64':1}
27 170 equemene
    Interaction={'Force':0,'Potential':1}
28 175 equemene
    Artevasion={'None':0,'NegExp':1,'CorRad':2}
29 171 equemene
    return(Marsaglia,Computing,Interaction,Artevasion)
30 132 equemene
31 142 equemene
BlobOpenCL= """
32 132 equemene
#define TFP32 0
33 132 equemene
#define TFP64 1
34 132 equemene

35 170 equemene
#define TFORCE 0
36 170 equemene
#define TPOTENTIAL 1
37 116 equemene

38 171 equemene
#define NONE 0
39 171 equemene
#define NEGEXP 1
40 175 equemene
#define CORRAD 2
41 171 equemene

42 132 equemene
#if TYPE == TFP32
43 132 equemene
#define MYFLOAT4 float4
44 132 equemene
#define MYFLOAT8 float8
45 132 equemene
#define MYFLOAT float
46 151 equemene
#define DISTANCE fast_distance
47 132 equemene
#else
48 132 equemene
#define MYFLOAT4 double4
49 132 equemene
#define MYFLOAT8 double8
50 132 equemene
#define MYFLOAT double
51 151 equemene
#define DISTANCE distance
52 170 equemene
#if defined(cl_khr_fp64)  // Khronos extension available?
53 170 equemene
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
54 132 equemene
#endif
55 170 equemene
#endif
56 132 equemene

57 170 equemene
#define znew  ((zmwc=36969*(zmwc&65535)+(zmwc>>16))<<16)
58 170 equemene
#define wnew  ((wmwc=18000*(wmwc&65535)+(wmwc>>16))&65535)
59 170 equemene
#define MWC   (znew+wnew)
60 170 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
61 170 equemene
#define CONG  (jcong=69069*jcong+1234567)
62 170 equemene
#define KISS  ((MWC^CONG)+SHR3)
63 170 equemene

64 160 equemene
#define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f)
65 160 equemene
#define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f)
66 160 equemene
#define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f)
67 160 equemene
#define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f)
68 160 equemene

69 171 equemene
#define PI (MYFLOAT)3.141592653589793238e0f
70 160 equemene

71 170 equemene
#define SMALL_NUM (MYFLOAT)1.e-9f
72 160 equemene

73 175 equemene
#define CoreRadius (MYFLOAT)(1.e0f)
74 175 equemene

75 170 equemene
// Create my own Distance implementation: distance buggy on Oland AMD chipset
76 116 equemene

77 170 equemene
MYFLOAT MyDistance(MYFLOAT4 n,MYFLOAT4 m)
78 141 equemene
{
79 170 equemene
    private MYFLOAT x2,y2,z2;
80 170 equemene
    x2=n.s0-m.s0;
81 170 equemene
    x2*=x2;
82 170 equemene
    y2=n.s1-m.s1;
83 170 equemene
    y2*=y2;
84 170 equemene
    z2=n.s2-m.s2;
85 170 equemene
    z2*=z2;
86 170 equemene
    return(sqrt(x2+y2+z2));
87 141 equemene
}
88 141 equemene

89 171 equemene
// Potential between 2 m,n bodies
90 133 equemene
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
91 171 equemene
#if ARTEVASION == NEGEXP
92 171 equemene
// Add exp(-r) to numerator to avoid divergence for low distances
93 133 equemene
{
94 171 equemene
    MYFLOAT r=DISTANCE(n,m);
95 171 equemene
    return((-1.e0f+exp(-r))/r);
96 171 equemene
}
97 175 equemene
#elif ARTEVASION == CORRAD
98 175 equemene
// Add Core Radius to avoid divergence for low distances
99 175 equemene
{
100 175 equemene
    MYFLOAT r=DISTANCE(n,m);
101 175 equemene
    return(-1.e0f/sqrt(r*r+CoreRadius*CoreRadius));
102 175 equemene
}
103 171 equemene
#else
104 171 equemene
// Classical potential in 1/r
105 171 equemene
{
106 170 equemene
//    return((MYFLOAT)(-1.e0f)/(MyDistance(m,n)));
107 151 equemene
    return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
108 133 equemene
}
109 171 equemene
#endif
110 133 equemene

111 176 equemene
// Interaction based of Force as gradient of Potential
112 170 equemene
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
113 170 equemene
#if INTERACTION == TFORCE
114 175 equemene
#if ARTEVASION == NEGEXP
115 175 equemene
// Force gradient of potential, set as (1-exp(-r))/r
116 175 equemene
{
117 175 equemene
    private MYFLOAT r=MyDistance(n,m);
118 175 equemene
    private MYFLOAT num=1.e0f+exp(-r)*(r-1.e0f);
119 175 equemene
    return((n-m)*num/(MYFLOAT)(r*r*r));
120 175 equemene
}
121 175 equemene
#elif ARTEVASION == CORRAD
122 175 equemene
// Force gradient of potential, (Core Radius) set as 1/sqrt(r**2+CoreRadius**2)
123 175 equemene
{
124 175 equemene
    private MYFLOAT r=MyDistance(n,m);
125 175 equemene
    private MYFLOAT den=sqrt(r*r+CoreRadius*CoreRadius);
126 175 equemene
    return((n-m)/(MYFLOAT)(den*den*den));
127 175 equemene
}
128 175 equemene
#else
129 170 equemene
// Simplest implementation of force (equals to acceleration)
130 170 equemene
// seems to bo bad (numerous artevasions)
131 170 equemene
// MYFLOAT4 InteractionForce(MYFLOAT4 m,MYFLOAT4 n)
132 170 equemene
{
133 170 equemene
    private MYFLOAT r=MyDistance(n,m);
134 170 equemene
    return((n-m)/(MYFLOAT)(r*r*r));
135 170 equemene
}
136 175 equemene
#endif
137 170 equemene
#else
138 170 equemene
// Force definited as gradient of potential
139 170 equemene
// Estimate potential and proximate potential to estimate force
140 170 equemene
{
141 171 equemene
    // 1/1024 seems to be a good factor: larger one provides bad results
142 171 equemene
    private MYFLOAT epsilon=(MYFLOAT)(1.e0f/1024);
143 170 equemene
    private MYFLOAT4 er=normalize(n-m);
144 170 equemene
    private MYFLOAT4 dr=er*(MYFLOAT)epsilon;
145 170 equemene

146 171 equemene
    return(er/epsilon*(PairPotential(m,n)-PairPotential(m+dr,n)));
147 170 equemene
}
148 170 equemene
#endif
149 170 equemene

150 160 equemene
MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
151 139 equemene
{
152 160 equemene
    private MYFLOAT potential=(MYFLOAT)0.e0f;
153 160 equemene
    private MYFLOAT4 x=clDataX[gid];
154 139 equemene

155 139 equemene
    for (int i=0;i<get_global_size(0);i++)
156 139 equemene
    {
157 139 equemene
        if (gid != i)
158 160 equemene
        potential+=PairPotential(x,clDataX[i]);
159 139 equemene
    }
160 133 equemene

161 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
162 141 equemene
    return(potential);
163 139 equemene
}
164 139 equemene

165 160 equemene
MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid)
166 139 equemene
{
167 160 equemene
    return(PairPotential(clDataX[gid],clCoM[0]));
168 139 equemene
}
169 139 equemene

170 170 equemene
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
171 170 equemene

172 160 equemene
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
173 116 equemene
{
174 160 equemene
    private MYFLOAT4 a0,v0,x0,a1,v1,x1,a2,v2,x2,a3,v3,x3,a4,v4,x4,xf,vf;
175 170 equemene
    MYFLOAT4 DT=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
176 160 equemene

177 160 equemene
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
178 160 equemene
    v0=(MYFLOAT4)clDataInV[gid];
179 160 equemene
    x0=(MYFLOAT4)clDataInX[gid];
180 133 equemene
    int N = get_global_size(0);
181 133 equemene

182 170 equemene
    for (private int i=0;i<N;i++)
183 121 equemene
    {
184 121 equemene
        if (gid != i)
185 160 equemene
        a0+=Interaction(x0,clDataInX[i]);
186 121 equemene
    }
187 121 equemene

188 160 equemene
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
189 170 equemene
    v1=a0*dt+v0;
190 170 equemene
    x1=v0*dt+x0;
191 170 equemene
    for (private int j=0;j<N;j++)
192 121 equemene
    {
193 170 equemene
        if (gid != j)
194 170 equemene
        a1+=Interaction(x1,clDataInX[j]);
195 121 equemene
    }
196 121 equemene

197 160 equemene
    a2=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
198 170 equemene
    v2=a1*(MYFLOAT)(dt/2.e0f)+v0;
199 170 equemene
    x2=v1*(MYFLOAT)(dt/2.e0f)+x0;
200 170 equemene
    for (private int k=0;k<N;k++)
201 121 equemene
    {
202 170 equemene
        if (gid != k)
203 170 equemene
        a2+=Interaction(x2,clDataInX[k]);
204 121 equemene
    }
205 121 equemene

206 160 equemene
    a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
207 170 equemene
    v3=a2*(MYFLOAT)(dt/2.e0f)+v0;
208 170 equemene
    x3=v2*(MYFLOAT)(dt/2.e0f)+x0;
209 170 equemene
    for (private int l=0;l<N;l++)
210 121 equemene
    {
211 170 equemene
        if (gid != l)
212 170 equemene
        a3+=Interaction(x3,clDataInX[l]);
213 121 equemene
    }
214 121 equemene

215 160 equemene
    a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
216 170 equemene
    v4=a3*dt+v0;
217 170 equemene
    x4=v3*dt+x0;
218 170 equemene
    for (private int m=0;m<N;m++)
219 141 equemene
    {
220 170 equemene
        if (gid != m)
221 170 equemene
        a4+=Interaction(x4,clDataInX[m]);
222 141 equemene
    }
223 141 equemene

224 160 equemene
    xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
225 160 equemene
    vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
226 121 equemene

227 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
228 121 equemene
}
229 121 equemene

230 160 equemene
MYFLOAT8 AtomicHeun(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
231 121 equemene
{
232 170 equemene
    private MYFLOAT4 x0,v0,a0,x1,v1,a1,xf,vf;
233 170 equemene
    MYFLOAT4 Dt=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
234 116 equemene

235 170 equemene
    x0=(MYFLOAT4)clDataInX[gid];
236 170 equemene
    v0=(MYFLOAT4)clDataInV[gid];
237 170 equemene
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
238 141 equemene

239 170 equemene
    for (private int i=0;i<get_global_size(0);i++)
240 116 equemene
    {
241 116 equemene
        if (gid != i)
242 170 equemene
        a0+=Interaction(x0,clDataInX[i]);
243 116 equemene
    }
244 141 equemene

245 170 equemene
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
246 170 equemene
    //v1=v0+dt*a0;
247 170 equemene
    //x1=x0+dt*v0;
248 170 equemene
    v1=dt*a0+v0;
249 170 equemene
    x1=dt*v0+x0;
250 141 equemene

251 170 equemene
    for (private int j=0;j<get_global_size(0);j++)
252 116 equemene
    {
253 170 equemene
        if (gid != j)
254 170 equemene
        a1+=Interaction(x1,clDataInX[j]);
255 116 equemene
    }
256 118 equemene

257 170 equemene
    vf=v0+dt*(a0+a1)/(MYFLOAT)2.e0f;
258 170 equemene
    xf=x0+dt*(v0+v1)/(MYFLOAT)2.e0f;
259 170 equemene

260 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
261 119 equemene
}
262 119 equemene

263 160 equemene
MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
264 119 equemene
{
265 170 equemene
    MYFLOAT4 x0,v0,a,xf,vf;
266 119 equemene

267 170 equemene
    x0=(MYFLOAT4)clDataInX[gid];
268 170 equemene
    v0=(MYFLOAT4)clDataInV[gid];
269 151 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
270 160 equemene

271 170 equemene
    for (private int i=0;i<get_global_size(0);i++)
272 119 equemene
    {
273 119 equemene
        if (gid != i)
274 170 equemene
          a+=Interaction(x0,clDataInX[i]);
275 119 equemene
    }
276 133 equemene

277 170 equemene
    vf=v0+dt*a;
278 170 equemene
    xf=x0+dt*vf;
279 170 equemene

280 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
281 116 equemene
}
282 116 equemene

283 160 equemene
MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
284 140 equemene
{
285 170 equemene
    MYFLOAT4 x0,v0,a,xf,vf;
286 140 equemene

287 170 equemene
    x0=(MYFLOAT4)clDataInX[gid];
288 170 equemene
    v0=(MYFLOAT4)clDataInV[gid];
289 151 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
290 160 equemene

291 170 equemene
    for (private int i=0;i<get_global_size(0);i++)
292 140 equemene
    {
293 140 equemene
        if (gid != i)
294 170 equemene
        a+=Interaction(x0,clDataInX[i]);
295 140 equemene
    }
296 140 equemene

297 170 equemene
    vf=v0+dt*a;
298 170 equemene
    xf=x0+dt*v0;
299 140 equemene

300 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
301 140 equemene
}
302 140 equemene

303 170 equemene
__kernel void InBallSplutterPoints(__global MYFLOAT4* clDataX,
304 170 equemene
                                   MYFLOAT diameter,uint seed_z,uint seed_w)
305 170 equemene
{
306 170 equemene
    private int gid=get_global_id(0);
307 170 equemene
    private uint zmwc=seed_z+gid;
308 170 equemene
    private uint wmwc=seed_w+(gid+1)%2;
309 170 equemene
    private MYFLOAT Heat;
310 170 equemene

311 170 equemene
    for (int i=0;i<gid;i++)
312 170 equemene
    {
313 170 equemene
        Heat=MWCfp;
314 170 equemene
    }
315 170 equemene

316 170 equemene
// More accurate distribution based on spherical coordonates
317 170 equemene
// Disactivated because of AMD Oland GPU crash on launch
318 170 equemene
//     private MYFLOAT Radius,Theta,Phi,PosX,PosY,PosZ,SinTheta;
319 170 equemene
//     Radius=MWCfp*diameter/2.e0f;
320 170 equemene
//     Theta=(MYFLOAT)acos((float)(-2.e0f*MWCfp+1.0e0f));
321 170 equemene
//     Phi=(MYFLOAT)(2.e0f*PI*MWCfp);
322 170 equemene
//     SinTheta=sin((float)Theta);
323 170 equemene
//     PosX=cos((float)Phi)*Radius*SinTheta;
324 170 equemene
//     PosY=sin((float)Phi)*Radius*SinTheta;
325 170 equemene
//     PosZ=cos((float)Theta)*Radius;
326 170 equemene
//     clDataX[gid]=(MYFLOAT4)(PosX,PosY,PosZ,0.e0f);
327 170 equemene

328 170 equemene
    private MYFLOAT Radius=diameter/2.e0f;
329 170 equemene
    private MYFLOAT Length=diameter;
330 170 equemene
    private MYFLOAT4 Position;
331 170 equemene
    while (Length>Radius) {
332 170 equemene
       Position=(MYFLOAT4)((MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,0.e0f);
333 170 equemene
       Length=(MYFLOAT)length((MYFLOAT4)Position);
334 170 equemene
    }
335 170 equemene

336 170 equemene
    clDataX[gid]=Position;
337 170 equemene

338 170 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
339 170 equemene
}
340 170 equemene

341 170 equemene
__kernel void InBoxSplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box,
342 139 equemene
                             uint seed_z,uint seed_w)
343 116 equemene
{
344 170 equemene
    int gid=get_global_id(0);
345 170 equemene
    uint zmwc=seed_z+gid;
346 170 equemene
    uint wmwc=seed_w-gid;
347 170 equemene
    private MYFLOAT Heat;
348 170 equemene

349 170 equemene
    for (int i=0;i<gid;i++)
350 170 equemene
    {
351 170 equemene
        Heat=MWCfp;
352 170 equemene
    }
353 137 equemene

354 170 equemene
    clDataX[gid]=(MYFLOAT4)((MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,0.e0f);
355 170 equemene

356 170 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
357 116 equemene
}
358 116 equemene

359 160 equemene
__kernel void SplutterStress(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
360 139 equemene
{
361 139 equemene
    int gid = get_global_id(0);
362 142 equemene
    MYFLOAT N = (MYFLOAT)get_global_size(0);
363 170 equemene
    uint zmwc=seed_z+(uint)gid;
364 170 equemene
    uint wmwc=seed_w-(uint)gid;
365 178 equemene
    MYFLOAT4 CrossVector,SpeedVector,FromCoM;
366 178 equemene
    MYFLOAT Heat,ThetaA,PhiA,ThetaB,PhiB,Length,tA,tB,Polar;
367 139 equemene

368 178 equemene
    for (int i=0;i<gid;i++)
369 178 equemene
    {
370 178 equemene
        Heat=MWCfp;
371 178 equemene
    }
372 178 equemene

373 178 equemene
    // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
374 178 equemene
    // Implemention on AMD Oland are probably broken in float
375 178 equemene

376 178 equemene
    FromCoM=(MYFLOAT4)(clDataX[gid]-clCoM[0]);
377 178 equemene
    Length=length(FromCoM);
378 178 equemene
    //Theta=acos(FromCoM.z/Length);
379 178 equemene
    //Phi=atan(FromCoM.y/FromCoM.x);
380 178 equemene
    // First tangential vector to sphere of length radius
381 178 equemene
    ThetaA=acos(FromCoM.x/Length)+5.e-1f*PI;
382 178 equemene
    PhiA=atan(FromCoM.y/FromCoM.z);
383 178 equemene
    // Second tangential vector to sphere of length radius
384 179 equemene
    ThetaB=acos((float)(FromCoM.x/Length));
385 179 equemene
    PhiB=atan((float)(FromCoM.y/FromCoM.z))+5.e-1f*PI;
386 178 equemene
    // (x,y) random coordonates to plane tangential to sphere
387 178 equemene
    Polar=MWCfp*2.e0f*PI;
388 179 equemene
    tA=cos((float)Polar);
389 179 equemene
    tB=sin((float)Polar);
390 178 equemene

391 178 equemene
    // Exception for 2 particules to ovoid shifting
392 171 equemene
    if (get_global_size(0)==2) {
393 171 equemene
       CrossVector=(MYFLOAT4)(1.e0f,1.e0f,1.e0f,0.e0f);
394 171 equemene
    } else {
395 179 equemene
       CrossVector.s0=tA*cos((float)ThetaA)+tB*cos((float)ThetaB);
396 179 equemene
       CrossVector.s1=tA*sin((float)ThetaA)*sin((float)PhiA)+tB*sin((float)ThetaB)*sin((float)PhiB);
397 179 equemene
       CrossVector.s2=tA*sin((float)ThetaA)*cos((float)PhiA)+tB*sin((float)ThetaB)*cos((float)PhiB);
398 178 equemene
       CrossVector.s3=0.e0f;
399 171 equemene
    }
400 171 equemene

401 139 equemene
    if (velocity<SMALL_NUM) {
402 178 equemene
       SpeedVector=(MYFLOAT4)normalize(cross(FromCoM,CrossVector))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
403 139 equemene
    }
404 139 equemene
    else
405 178 equemene
    {
406 170 equemene

407 176 equemene
       SpeedVector=(MYFLOAT4)((MWCfp-5e-1f)*velocity,(MWCfp-5e-1f)*velocity,
408 176 equemene
                              (MWCfp-5e-1f)*velocity,0.e0f);
409 139 equemene
    }
410 170 equemene
    clDataV[gid]=SpeedVector;
411 170 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
412 139 equemene
}
413 139 equemene

414 160 equemene
__kernel void RungeKutta(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
415 116 equemene
{
416 170 equemene
    private int gid = get_global_id(0);
417 170 equemene
    private MYFLOAT8 clDataGid;
418 116 equemene

419 170 equemene
    clDataGid=AtomicRungeKutta(clDataX,clDataV,gid,h);
420 116 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
421 170 equemene
    clDataX[gid]=clDataGid.s0123;
422 170 equemene
    clDataV[gid]=clDataGid.s4567;
423 116 equemene
}
424 116 equemene

425 170 equemene
__kernel void Heun(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
426 116 equemene
{
427 170 equemene
    private int gid = get_global_id(0);
428 170 equemene
    private MYFLOAT8 clDataGid;
429 116 equemene

430 170 equemene
    clDataGid=AtomicHeun(clDataX,clDataV,gid,h);
431 116 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
432 170 equemene
    clDataX[gid]=clDataGid.s0123;
433 170 equemene
    clDataV[gid]=clDataGid.s4567;
434 116 equemene
}
435 133 equemene

436 170 equemene
__kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
437 141 equemene
{
438 170 equemene
    private int gid = get_global_id(0);
439 170 equemene
    private MYFLOAT8 clDataGid;
440 141 equemene

441 170 equemene
    clDataGid=AtomicImplicitEuler(clDataX,clDataV,gid,h);
442 141 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
443 170 equemene
    clDataX[gid]=clDataGid.s0123;
444 170 equemene
    clDataV[gid]=clDataGid.s4567;
445 141 equemene
}
446 141 equemene

447 160 equemene
__kernel void ExplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
448 140 equemene
{
449 170 equemene
    private int gid = get_global_id(0);
450 170 equemene
    private MYFLOAT8 clDataGid;
451 170 equemene

452 170 equemene
    clDataGid=AtomicExplicitEuler(clDataX,clDataV,gid,h);
453 140 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
454 170 equemene
    clDataX[gid]=clDataGid.s0123;
455 170 equemene
    clDataV[gid]=clDataGid.s4567;
456 140 equemene
}
457 139 equemene

458 160 equemene
__kernel void CoMPotential(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
459 133 equemene
{
460 133 equemene
    int gid = get_global_id(0);
461 133 equemene

462 160 equemene
    clPotential[gid]=PairPotential(clDataX[gid],clCoM[0]);
463 139 equemene
}
464 139 equemene

465 160 equemene
__kernel void Potential(__global MYFLOAT4* clDataX,__global MYFLOAT* clPotential)
466 139 equemene
{
467 139 equemene
    int gid = get_global_id(0);
468 139 equemene

469 155 equemene
    MYFLOAT potential=(MYFLOAT)0.e0f;
470 160 equemene
    MYFLOAT4 x=clDataX[gid];
471 133 equemene

472 133 equemene
    for (int i=0;i<get_global_size(0);i++)
473 133 equemene
    {
474 133 equemene
        if (gid != i)
475 160 equemene
        potential+=PairPotential(x,clDataX[i]);
476 133 equemene
    }
477 133 equemene

478 133 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
479 155 equemene
    clPotential[gid]=potential*(MYFLOAT)5.e-1f;
480 133 equemene
}
481 133 equemene

482 160 equemene
__kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int Size)
483 139 equemene
{
484 160 equemene
    MYFLOAT4 CoM=clDataX[0];
485 142 equemene

486 139 equemene
    for (int i=1;i<Size;i++)
487 139 equemene
    {
488 160 equemene
        CoM+=clDataX[i];
489 139 equemene
    }
490 142 equemene

491 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
492 170 equemene
    clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,0.e0f)/(MYFLOAT)Size;
493 139 equemene
}
494 139 equemene

495 160 equemene
__kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
496 133 equemene
{
497 133 equemene
    int gid = get_global_id(0);
498 133 equemene

499 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
500 160 equemene
    MYFLOAT d=(MYFLOAT)length(clDataV[gid]);
501 155 equemene
    clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)(d*d);
502 133 equemene
}
503 170 equemene

504 116 equemene
"""
505 116 equemene
506 170 equemene
def MainOpenCL(clDataX,clDataV,Step,Method):
507 170 equemene
    time_start=time.time()
508 170 equemene
    if Method=="RungeKutta":
509 170 equemene
        CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clDataX,clDataV,Step)
510 170 equemene
    elif Method=="ExplicitEuler":
511 170 equemene
        CLLaunch=MyRoutines.ExplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
512 170 equemene
    elif Method=="Heun":
513 170 equemene
        CLLaunch=MyRoutines.Heun(queue,(Number,1),None,clDataX,clDataV,Step)
514 170 equemene
    else:
515 170 equemene
        CLLaunch=MyRoutines.ImplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
516 170 equemene
    CLLaunch.wait()
517 170 equemene
    Elapsed=time.time()-time_start
518 170 equemene
    return(Elapsed)
519 170 equemene
520 170 equemene
def display(*args):
521 133 equemene
522 170 equemene
    global MyDataX,MyDataV,clDataX,clDataV,Step,Method,Number,Iterations,Durations,Verbose,SpeedRendering
523 170 equemene
524 170 equemene
    glClearColor(0.0, 0.0, 0.0, 0.0)
525 170 equemene
    glClear(GL_COLOR_BUFFER_BIT)
526 170 equemene
    glColor3f(1.0,1.0,1.0)
527 170 equemene
528 170 equemene
    Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
529 170 equemene
    if SpeedRendering:
530 170 equemene
        cl.enqueue_copy(queue, MyDataV, clDataV)
531 170 equemene
        MyDataV.reshape(Number,4)[:,3]=1
532 170 equemene
        glVertexPointerf(MyDataV.reshape(Number,4))
533 170 equemene
    else:
534 170 equemene
        cl.enqueue_copy(queue, MyDataX, clDataX)
535 170 equemene
        MyDataX.reshape(Number,4)[:,3]=1
536 170 equemene
        glVertexPointerf(MyDataX.reshape(Number,4))
537 170 equemene
538 170 equemene
    if Verbose:
539 170 equemene
        print("Positions for #%s iteration: %s" % (Iterations,MyDataX))
540 170 equemene
    else:
541 170 equemene
        sys.stdout.write('.')
542 170 equemene
        sys.stdout.flush()
543 170 equemene
    Durations=np.append(Durations,MainOpenCL(clDataX,clDataV,Step,Method))
544 170 equemene
    glEnableClientState(GL_VERTEX_ARRAY)
545 170 equemene
    glDrawArrays(GL_POINTS, 0, Number)
546 170 equemene
    glDisableClientState(GL_VERTEX_ARRAY)
547 170 equemene
    glFlush()
548 170 equemene
    Iterations+=1
549 170 equemene
    glutSwapBuffers()
550 170 equemene
551 170 equemene
def halt():
552 170 equemene
    pass
553 170 equemene
554 170 equemene
def keyboard(k,x,y):
555 174 equemene
    global ViewRZ,SpeedRendering
556 170 equemene
    LC_Z = as_8_bit( 'z' )
557 170 equemene
    UC_Z = as_8_bit( 'Z' )
558 170 equemene
    Plus = as_8_bit( '+' )
559 170 equemene
    Minus = as_8_bit( '-' )
560 170 equemene
    Switch = as_8_bit( 's' )
561 170 equemene
562 170 equemene
    Zoom=1
563 175 equemene
    if k == LC_Z:
564 170 equemene
        ViewRZ += 1.0
565 170 equemene
    elif k == UC_Z:
566 170 equemene
        ViewRZ -= 1.0
567 170 equemene
    elif k == Plus:
568 170 equemene
        Zoom *= 2.0
569 170 equemene
    elif k == Minus:
570 170 equemene
        Zoom /= 2.0
571 170 equemene
    elif k == Switch:
572 170 equemene
        if SpeedRendering:
573 170 equemene
            SpeedRendering=False
574 170 equemene
        else:
575 170 equemene
            SpeedRendering=True
576 170 equemene
    elif ord(k) == 27: # Escape
577 170 equemene
        glutLeaveMainLoop()
578 170 equemene
        return(False)
579 170 equemene
    else:
580 170 equemene
        return
581 170 equemene
    glRotatef(ViewRZ, 0.0, 0.0, 1.0)
582 170 equemene
    glScalef(Zoom,Zoom,Zoom)
583 170 equemene
    glutPostRedisplay()
584 170 equemene
585 170 equemene
def special(k,x,y):
586 174 equemene
    global ViewRX, ViewRY
587 170 equemene
588 178 equemene
    Step=1.
589 170 equemene
    if k == GLUT_KEY_UP:
590 178 equemene
        ViewRX += Step
591 170 equemene
    elif k == GLUT_KEY_DOWN:
592 178 equemene
        ViewRX -= Step
593 170 equemene
    elif k == GLUT_KEY_LEFT:
594 178 equemene
        ViewRY += Step
595 170 equemene
    elif k == GLUT_KEY_RIGHT:
596 178 equemene
        ViewRY -= Step
597 170 equemene
    else:
598 170 equemene
        return
599 170 equemene
    glRotatef(ViewRX, 1.0, 0.0, 0.0)
600 170 equemene
    glRotatef(ViewRY, 0.0, 1.0, 0.0)
601 170 equemene
    glutPostRedisplay()
602 170 equemene
603 170 equemene
def setup_viewport():
604 170 equemene
    global SizeOfBox
605 170 equemene
    glMatrixMode(GL_PROJECTION)
606 170 equemene
    glLoadIdentity()
607 170 equemene
    glOrtho(-SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox)
608 170 equemene
    glutPostRedisplay()
609 170 equemene
610 170 equemene
def reshape(w, h):
611 170 equemene
    glViewport(0, 0, w, h)
612 170 equemene
    setup_viewport()
613 170 equemene
614 116 equemene
if __name__=='__main__':
615 170 equemene
616 170 equemene
    global Number,Step,clDataX,clDataV,MyDataX,MyDataV,Method,SizeOfBox,Iterations,Verbose,Durations
617 116 equemene
618 132 equemene
    # ValueType
619 132 equemene
    ValueType='FP32'
620 132 equemene
    class MyFloat(np.float32):pass
621 160 equemene
    #    clType8=cl_array.vec.float8
622 116 equemene
    # Set defaults values
623 118 equemene
    np.set_printoptions(precision=2)
624 116 equemene
    # Id of Device : 1 is for first find !
625 160 equemene
    Device=0
626 170 equemene
    # Number of bodies is integer
627 160 equemene
    Number=2
628 170 equemene
    # Number of iterations (for standalone execution)
629 170 equemene
    Iterations=10
630 170 equemene
    # Size of shape
631 170 equemene
    SizeOfShape=MyFloat(1.)
632 116 equemene
    # Initial velocity of particules
633 132 equemene
    Velocity=MyFloat(1.)
634 116 equemene
    # Step
635 170 equemene
    Step=MyFloat(1./32)
636 121 equemene
    # Method of integration
637 150 equemene
    Method='ImplicitEuler'
638 132 equemene
    # InitialRandom
639 132 equemene
    InitialRandom=False
640 132 equemene
    # RNG Marsaglia Method
641 132 equemene
    RNG='MWC'
642 139 equemene
    # Viriel Distribution of stress
643 139 equemene
    VirielStress=True
644 170 equemene
    # Verbose
645 170 equemene
    Verbose=False
646 170 equemene
    # OpenGL real time rendering
647 170 equemene
    OpenGL=False
648 170 equemene
    # Speed rendering
649 170 equemene
    SpeedRendering=False
650 171 equemene
    # Counter ArtEvasions Measures (artefact evasion)
651 171 equemene
    CoArEv='None'
652 170 equemene
    # Shape to distribute
653 170 equemene
    Shape='Ball'
654 170 equemene
    # Type of Interaction
655 172 equemene
    InterType='Force'
656 132 equemene
657 175 equemene
    HowToUse='%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [Potential] -x <None|NegExp|CorRad> -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
658 116 equemene
659 116 equemene
    try:
660 175 equemene
        opts, args = getopt.getopt(sys.argv[1:],"rpgehod:n:i:z:v:s:m:t:b:x:",["random","potential","coarev","opengl","viriel","verbose","device=","number=","iterations=","size=","velocity=","step=","method=","valuetype=","shape="])
661 116 equemene
    except getopt.GetoptError:
662 128 equemene
        print(HowToUse % sys.argv[0])
663 116 equemene
        sys.exit(2)
664 116 equemene
665 116 equemene
    for opt, arg in opts:
666 116 equemene
        if opt == '-h':
667 128 equemene
            print(HowToUse % sys.argv[0])
668 116 equemene
669 128 equemene
            print("\nInformations about devices detected under OpenCL:")
670 116 equemene
            try:
671 132 equemene
                Id=0
672 116 equemene
                for platform in cl.get_platforms():
673 116 equemene
                    for device in platform.get_devices():
674 170 equemene
                        # Failed now because of POCL implementation
675 137 equemene
                        #deviceType=cl.device_type.to_string(device.type)
676 149 equemene
                        deviceType="xPU"
677 128 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
678 116 equemene
                        Id=Id+1
679 116 equemene
                sys.exit()
680 116 equemene
            except ImportError:
681 128 equemene
                print("Your platform does not seem to support OpenCL")
682 116 equemene
                sys.exit()
683 116 equemene
684 132 equemene
        elif opt in ("-t", "--valuetype"):
685 132 equemene
            if arg=='FP64':
686 132 equemene
                class MyFloat(np.float64): pass
687 132 equemene
            else:
688 132 equemene
                class MyFloat(np.float32):pass
689 132 equemene
            ValueType = arg
690 116 equemene
        elif opt in ("-d", "--device"):
691 116 equemene
            Device=int(arg)
692 121 equemene
        elif opt in ("-m", "--method"):
693 121 equemene
            Method=arg
694 170 equemene
        elif opt in ("-b", "--shape"):
695 170 equemene
            Shape=arg
696 175 equemene
            if Shape!='Ball' or Shape!='Box':
697 175 equemene
                print('Wrong argument: set to Ball')
698 116 equemene
        elif opt in ("-n", "--number"):
699 116 equemene
            Number=int(arg)
700 170 equemene
        elif opt in ("-i", "--iterations"):
701 170 equemene
            Iterations=int(arg)
702 116 equemene
        elif opt in ("-z", "--size"):
703 170 equemene
            SizeOfShape=MyFloat(arg)
704 116 equemene
        elif opt in ("-v", "--velocity"):
705 132 equemene
            Velocity=MyFloat(arg)
706 139 equemene
            VirielStress=False
707 116 equemene
        elif opt in ("-s", "--step"):
708 132 equemene
            Step=MyFloat(arg)
709 132 equemene
        elif opt in ("-r", "--random"):
710 132 equemene
            InitialRandom=True
711 133 equemene
        elif opt in ("-c", "--check"):
712 133 equemene
            CheckEnergies=True
713 139 equemene
        elif opt in ("-e", "--viriel"):
714 139 equemene
            VirielStress=True
715 170 equemene
        elif opt in ("-g", "--opengl"):
716 170 equemene
            OpenGL=True
717 172 equemene
        elif opt in ("-p", "--potential"):
718 172 equemene
            InterType='Potential'
719 175 equemene
        elif opt in ("-x", "--coarev"):
720 175 equemene
            CoArEv=arg
721 170 equemene
        elif opt in ("-o", "--verbose"):
722 170 equemene
            Verbose=True
723 170 equemene
724 175 equemene
    SizeOfShape=np.sqrt(MyFloat(SizeOfShape*Number))
725 132 equemene
    Velocity=MyFloat(Velocity)
726 132 equemene
    Step=MyFloat(Step)
727 132 equemene
728 128 equemene
    print("Device choosed : %s" % Device)
729 128 equemene
    print("Number of particules : %s" % Number)
730 170 equemene
    print("Size of Shape : %s" % SizeOfShape)
731 160 equemene
    print("Initial velocity : %s" % Velocity)
732 170 equemene
    print("Step of iteration : %s" % Step)
733 160 equemene
    print("Number of iterations : %s" % Iterations)
734 160 equemene
    print("Method of resolution : %s" % Method)
735 160 equemene
    print("Initial Random for RNG Seed : %s" % InitialRandom)
736 160 equemene
    print("ValueType is : %s" % ValueType)
737 170 equemene
    print("Viriel distribution of stress : %s" % VirielStress)
738 170 equemene
    print("OpenGL real time rendering : %s" % OpenGL)
739 170 equemene
    print("Speed rendering : %s" % SpeedRendering)
740 170 equemene
    print("Interaction type : %s" % InterType)
741 171 equemene
    print("Counter Artevasion type : %s" % CoArEv)
742 116 equemene
743 132 equemene
    # Create Numpy array of CL vector with 8 FP32
744 170 equemene
    MyCoM = np.zeros(4,dtype=MyFloat)
745 170 equemene
    MyDataX = np.zeros(Number*4, dtype=MyFloat)
746 170 equemene
    MyDataV = np.zeros(Number*4, dtype=MyFloat)
747 133 equemene
    MyPotential = np.zeros(Number, dtype=MyFloat)
748 133 equemene
    MyKinetic = np.zeros(Number, dtype=MyFloat)
749 132 equemene
750 171 equemene
    Marsaglia,Computing,Interaction,Artevasion=DictionariesAPI()
751 132 equemene
752 132 equemene
    # Scan the OpenCL arrays
753 132 equemene
    Id=0
754 116 equemene
    HasXPU=False
755 116 equemene
    for platform in cl.get_platforms():
756 116 equemene
        for device in platform.get_devices():
757 116 equemene
            if Id==Device:
758 116 equemene
                PlatForm=platform
759 116 equemene
                XPU=device
760 128 equemene
                print("CPU/GPU selected: ",device.name.lstrip())
761 151 equemene
                print("Platform selected: ",platform.name)
762 116 equemene
                HasXPU=True
763 116 equemene
            Id+=1
764 116 equemene
765 116 equemene
    if HasXPU==False:
766 128 equemene
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
767 116 equemene
        sys.exit()
768 116 equemene
769 132 equemene
    # Create Context
770 116 equemene
    try:
771 116 equemene
        ctx = cl.Context([XPU])
772 116 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
773 116 equemene
    except:
774 128 equemene
        print("Crash during context creation")
775 116 equemene
776 132 equemene
    # Build all routines used for the computing
777 170 equemene
778 170 equemene
    #BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
779 171 equemene
    BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i -DARTEVASION=%i" % (Marsaglia[RNG],Computing[ValueType],Interaction[InterType],Artevasion[CoArEv])
780 170 equemene
781 170 equemene
    if 'Intel' in PlatForm.name or 'Experimental' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name :
782 151 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
783 151 equemene
    else:
784 151 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions+" -cl-strict-aliasing")
785 170 equemene
786 170 equemene
    mf = cl.mem_flags
787 170 equemene
    # Read/Write approach for buffering
788 170 equemene
    clDataX = cl.Buffer(ctx, mf.READ_WRITE, MyDataX.nbytes)
789 170 equemene
    clDataV = cl.Buffer(ctx, mf.READ_WRITE, MyDataV.nbytes)
790 170 equemene
    clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
791 170 equemene
    clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
792 170 equemene
    clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
793 160 equemene
794 170 equemene
    # Write/HostPointer approach for buffering
795 170 equemene
    # clDataX = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataX)
796 170 equemene
    # clDataV = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataV)
797 170 equemene
    # clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential)
798 170 equemene
    # clKinetic = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyKinetic)
799 170 equemene
    # clCoM = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyCoM)
800 116 equemene
801 134 equemene
    print('All particles superimposed.')
802 116 equemene
803 132 equemene
    # Set particles to RNG points
804 132 equemene
    if InitialRandom:
805 170 equemene
        seed_w=np.uint32(nprnd(2**32))
806 170 equemene
        seed_z=np.uint32(nprnd(2**32))
807 132 equemene
    else:
808 170 equemene
        seed_w=np.uint32(19710211)
809 170 equemene
        seed_z=np.uint32(20081010)
810 170 equemene
811 170 equemene
    if Shape=='Ball':
812 170 equemene
        MyRoutines.InBallSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
813 170 equemene
    else:
814 170 equemene
        MyRoutines.InBoxSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
815 116 equemene
816 132 equemene
    print('All particules distributed')
817 139 equemene
818 160 equemene
    CLLaunch=MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
819 160 equemene
    CLLaunch.wait()
820 142 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
821 170 equemene
    print('Center Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
822 139 equemene
823 139 equemene
    if VirielStress:
824 160 equemene
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,MyFloat(0.),np.uint32(110271),np.uint32(250173))
825 139 equemene
    else:
826 160 equemene
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,Velocity,np.uint32(110271),np.uint32(250173))
827 160 equemene
    CLLaunch.wait()
828 139 equemene
829 170 equemene
    print('All particules stressed')
830 170 equemene
831 160 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
832 160 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
833 133 equemene
    CLLaunch.wait()
834 141 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
835 141 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
836 170 equemene
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
837 116 equemene
838 170 equemene
    if SpeedRendering:
839 170 equemene
        SizeOfBox=max(2*MyKinetic)
840 170 equemene
    else:
841 170 equemene
        SizeOfBox=SizeOfShape
842 170 equemene
843 174 equemene
    if OpenGL:
844 174 equemene
        print('\tTiny documentation to interact OpenGL rendering:\n')
845 174 equemene
        print('\t<Left|Right> Rotate around X axis')
846 174 equemene
        print('\t  <Up|Down>  Rotate around Y axis')
847 174 equemene
        print('\t   <z|Z>     Rotate around Z axis')
848 174 equemene
        print('\t   <-|+>     Unzoom/Zoom')
849 174 equemene
        print('\t    <s>      Toggle to display Positions or Velocities')
850 174 equemene
        print('\t   <Esc>     Quit\n')
851 174 equemene
852 170 equemene
    wall_time_start=time.time()
853 160 equemene
854 170 equemene
    Durations=np.array([],dtype=MyFloat)
855 170 equemene
    print('Starting!')
856 170 equemene
    if OpenGL:
857 170 equemene
        global ViewRX,ViewRY,ViewRZ
858 170 equemene
        Iterations=0
859 170 equemene
        ViewRX,ViewRY,ViewRZ = 0.,0.,0.
860 170 equemene
        # Launch OpenGL Loop
861 170 equemene
        glutInit(sys.argv)
862 170 equemene
        glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB)
863 170 equemene
        glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE,GLUT_ACTION_CONTINUE_EXECUTION)
864 170 equemene
        glutInitWindowSize(512,512)
865 170 equemene
        glutCreateWindow(b'NBodyGL')
866 170 equemene
        setup_viewport()
867 170 equemene
        glutReshapeFunc(reshape)
868 170 equemene
        glutDisplayFunc(display)
869 170 equemene
        glutIdleFunc(display)
870 170 equemene
        #   glutMouseFunc(mouse)
871 170 equemene
        glutSpecialFunc(special)
872 170 equemene
        glutKeyboardFunc(keyboard)
873 170 equemene
        glutMainLoop()
874 170 equemene
    else:
875 170 equemene
        for iteration in range(Iterations):
876 170 equemene
            Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
877 170 equemene
            if Verbose:
878 170 equemene
                # print("Duration of #%s iteration: %s" % (iteration,Elapsed))
879 170 equemene
                cl.enqueue_copy(queue, MyDataX, clDataX)
880 170 equemene
                print("Positions for #%s iteration: %s" % (iteration,MyDataX))
881 170 equemene
            else:
882 170 equemene
                sys.stdout.write('.')
883 170 equemene
                sys.stdout.flush()
884 170 equemene
            Durations=np.append(Durations,Elapsed)
885 133 equemene
886 170 equemene
    print('\nEnding!')
887 139 equemene
888 160 equemene
    MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
889 160 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
890 160 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
891 141 equemene
    CLLaunch.wait()
892 160 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
893 141 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
894 141 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
895 170 equemene
    print('\nCenter Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
896 170 equemene
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
897 135 equemene
898 170 equemene
    print("Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n" % (Device,Iterations,np.mean(Durations),np.median(Durations),np.std(Durations),np.min(Durations),np.max(Durations),np.std(Durations)/np.median(Durations)))
899 170 equemene
900 171 equemene
    # FPS: 1/Elapsed
901 171 equemene
    FPS=np.ones(len(Durations))
902 171 equemene
    FPS/=Durations
903 171 equemene
904 171 equemene
    print("FPS stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" % (Device,Iterations,np.mean(FPS),np.median(FPS),np.std(FPS),np.min(FPS),np.max(FPS)))
905 171 equemene
906 170 equemene
    # Contraction of Square*Size*Hertz: Size*Size/Elapsed
907 170 equemene
    Squertz=np.ones(len(Durations))
908 170 equemene
    Squertz*=Number*Number
909 170 equemene
    Squertz/=Durations
910 170 equemene
911 172 equemene
    print("Squertz in log10 & complete stats on device %s with %s iterations :\n\tMean:\t%s\t%s\n\tMedian:\t%s\t%s\n\tStddev:\t%s\t%s\n\tMin:\t%s\t%s\n\tMax:\t%s\t%s\n" % (Device,Iterations,np.log10(np.mean(Squertz)),np.mean(Squertz),np.log10(np.median(Squertz)),np.median(Squertz),np.log10(np.std(Squertz)),np.std(Squertz),np.log10(np.min(Squertz)),np.min(Squertz),np.log10(np.max(Squertz)),np.max(Squertz)))
912 170 equemene
913 160 equemene
    clDataX.release()
914 160 equemene
    clDataV.release()
915 133 equemene
    clKinetic.release()
916 133 equemene
    clPotential.release()