Statistiques
| Révision :

root / NBody / NBody.py @ 175

Historique | Voir | Annoter | Télécharger (28,12 ko)

1 128 equemene
#!/usr/bin/env python3
2 136 equemene
# -*- coding: utf-8 -*-
3 116 equemene
"""
4 171 equemene
NBody Demonstrator implemented in OpenCL, rendering OpenGL
5 116 equemene

6 174 equemene
CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
7 174 equemene
Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8 174 equemene

9 174 equemene
Thanks to Andreas Klockner for PyOpenCL:
10 174 equemene
http://mathema.tician.de/software/pyopencl
11 174 equemene

12 116 equemene
"""
13 116 equemene
import getopt
14 116 equemene
import sys
15 116 equemene
import time
16 116 equemene
import numpy as np
17 116 equemene
import pyopencl as cl
18 116 equemene
import pyopencl.array as cl_array
19 116 equemene
from numpy.random import randint as nprnd
20 170 equemene
import string, sys
21 170 equemene
from OpenGL.GL import *
22 170 equemene
from OpenGL.GLUT import *
23 116 equemene
24 132 equemene
def DictionariesAPI():
25 132 equemene
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
26 132 equemene
    Computing={'FP32':0,'FP64':1}
27 170 equemene
    Interaction={'Force':0,'Potential':1}
28 175 equemene
    Artevasion={'None':0,'NegExp':1,'CorRad':2}
29 171 equemene
    return(Marsaglia,Computing,Interaction,Artevasion)
30 132 equemene
31 142 equemene
BlobOpenCL= """
32 132 equemene
#define TFP32 0
33 132 equemene
#define TFP64 1
34 132 equemene

35 170 equemene
#define TFORCE 0
36 170 equemene
#define TPOTENTIAL 1
37 116 equemene

38 171 equemene
#define NONE 0
39 171 equemene
#define NEGEXP 1
40 175 equemene
#define CORRAD 2
41 171 equemene

42 132 equemene
#if TYPE == TFP32
43 132 equemene
#define MYFLOAT4 float4
44 132 equemene
#define MYFLOAT8 float8
45 132 equemene
#define MYFLOAT float
46 151 equemene
#define DISTANCE fast_distance
47 132 equemene
#else
48 132 equemene
#define MYFLOAT4 double4
49 132 equemene
#define MYFLOAT8 double8
50 132 equemene
#define MYFLOAT double
51 151 equemene
#define DISTANCE distance
52 170 equemene
#if defined(cl_khr_fp64)  // Khronos extension available?
53 170 equemene
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
54 132 equemene
#endif
55 170 equemene
#endif
56 132 equemene

57 170 equemene
#define znew  ((zmwc=36969*(zmwc&65535)+(zmwc>>16))<<16)
58 170 equemene
#define wnew  ((wmwc=18000*(wmwc&65535)+(wmwc>>16))&65535)
59 170 equemene
#define MWC   (znew+wnew)
60 170 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
61 170 equemene
#define CONG  (jcong=69069*jcong+1234567)
62 170 equemene
#define KISS  ((MWC^CONG)+SHR3)
63 170 equemene

64 160 equemene
#define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f)
65 160 equemene
#define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f)
66 160 equemene
#define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f)
67 160 equemene
#define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f)
68 160 equemene

69 171 equemene
#define PI (MYFLOAT)3.141592653589793238e0f
70 160 equemene

71 170 equemene
#define SMALL_NUM (MYFLOAT)1.e-9f
72 160 equemene

73 175 equemene
#define CoreRadius (MYFLOAT)(1.e0f)
74 175 equemene

75 170 equemene
// Create my own Distance implementation: distance buggy on Oland AMD chipset
76 116 equemene

77 170 equemene
MYFLOAT MyDistance(MYFLOAT4 n,MYFLOAT4 m)
78 141 equemene
{
79 170 equemene
    private MYFLOAT x2,y2,z2;
80 170 equemene
    x2=n.s0-m.s0;
81 170 equemene
    x2*=x2;
82 170 equemene
    y2=n.s1-m.s1;
83 170 equemene
    y2*=y2;
84 170 equemene
    z2=n.s2-m.s2;
85 170 equemene
    z2*=z2;
86 170 equemene
    return(sqrt(x2+y2+z2));
87 141 equemene
}
88 141 equemene

89 171 equemene
// Potential between 2 m,n bodies
90 133 equemene
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
91 171 equemene
#if ARTEVASION == NEGEXP
92 171 equemene
// Add exp(-r) to numerator to avoid divergence for low distances
93 133 equemene
{
94 171 equemene
    MYFLOAT r=DISTANCE(n,m);
95 171 equemene
    return((-1.e0f+exp(-r))/r);
96 171 equemene
}
97 175 equemene
#elif ARTEVASION == CORRAD
98 175 equemene
// Add Core Radius to avoid divergence for low distances
99 175 equemene
{
100 175 equemene
    MYFLOAT r=DISTANCE(n,m);
101 175 equemene
    return(-1.e0f/sqrt(r*r+CoreRadius*CoreRadius));
102 175 equemene
}
103 171 equemene
#else
104 171 equemene
// Classical potential in 1/r
105 171 equemene
{
106 170 equemene
//    return((MYFLOAT)(-1.e0f)/(MyDistance(m,n)));
107 151 equemene
    return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
108 133 equemene
}
109 171 equemene
#endif
110 133 equemene

111 171 equemene
// Interaction based of Force (1/r**2) or Potential (-grad(1/r))
112 170 equemene
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
113 170 equemene
#if INTERACTION == TFORCE
114 175 equemene
#if ARTEVASION == NEGEXP
115 175 equemene
// Force gradient of potential, set as (1-exp(-r))/r
116 175 equemene
{
117 175 equemene
    private MYFLOAT r=MyDistance(n,m);
118 175 equemene
    private MYFLOAT num=1.e0f+exp(-r)*(r-1.e0f);
119 175 equemene
    return((n-m)*num/(MYFLOAT)(r*r*r));
120 175 equemene
}
121 175 equemene
#elif ARTEVASION == CORRAD
122 175 equemene
// Force gradient of potential, (Core Radius) set as 1/sqrt(r**2+CoreRadius**2)
123 175 equemene
{
124 175 equemene
    private MYFLOAT r=MyDistance(n,m);
125 175 equemene
    private MYFLOAT den=sqrt(r*r+CoreRadius*CoreRadius);
126 175 equemene
    return((n-m)/(MYFLOAT)(den*den*den));
127 175 equemene
}
128 175 equemene
#else
129 170 equemene
// Simplest implementation of force (equals to acceleration)
130 170 equemene
// seems to bo bad (numerous artevasions)
131 170 equemene
// MYFLOAT4 InteractionForce(MYFLOAT4 m,MYFLOAT4 n)
132 170 equemene
{
133 170 equemene
    private MYFLOAT r=MyDistance(n,m);
134 170 equemene
    return((n-m)/(MYFLOAT)(r*r*r));
135 170 equemene
}
136 175 equemene
#endif
137 170 equemene
#else
138 170 equemene
// Force definited as gradient of potential
139 170 equemene
// Estimate potential and proximate potential to estimate force
140 170 equemene
// MYFLOAT4 InteractionPotential(MYFLOAT4 m,MYFLOAT4 n)
141 170 equemene
{
142 171 equemene
    // 1/1024 seems to be a good factor: larger one provides bad results
143 171 equemene
    private MYFLOAT epsilon=(MYFLOAT)(1.e0f/1024);
144 170 equemene
    private MYFLOAT4 er=normalize(n-m);
145 170 equemene
    private MYFLOAT4 dr=er*(MYFLOAT)epsilon;
146 170 equemene

147 171 equemene
    return(er/epsilon*(PairPotential(m,n)-PairPotential(m+dr,n)));
148 170 equemene
}
149 170 equemene
#endif
150 170 equemene

151 160 equemene
MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
152 139 equemene
{
153 160 equemene
    private MYFLOAT potential=(MYFLOAT)0.e0f;
154 160 equemene
    private MYFLOAT4 x=clDataX[gid];
155 139 equemene

156 139 equemene
    for (int i=0;i<get_global_size(0);i++)
157 139 equemene
    {
158 139 equemene
        if (gid != i)
159 160 equemene
        potential+=PairPotential(x,clDataX[i]);
160 139 equemene
    }
161 133 equemene

162 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
163 141 equemene
    return(potential);
164 139 equemene
}
165 139 equemene

166 160 equemene
MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid)
167 139 equemene
{
168 160 equemene
    return(PairPotential(clDataX[gid],clCoM[0]));
169 139 equemene
}
170 139 equemene

171 170 equemene
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
172 170 equemene

173 160 equemene
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
174 116 equemene
{
175 160 equemene
    private MYFLOAT4 a0,v0,x0,a1,v1,x1,a2,v2,x2,a3,v3,x3,a4,v4,x4,xf,vf;
176 170 equemene
    MYFLOAT4 DT=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
177 160 equemene

178 160 equemene
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
179 160 equemene
    v0=(MYFLOAT4)clDataInV[gid];
180 160 equemene
    x0=(MYFLOAT4)clDataInX[gid];
181 133 equemene
    int N = get_global_size(0);
182 133 equemene

183 170 equemene
    for (private int i=0;i<N;i++)
184 121 equemene
    {
185 121 equemene
        if (gid != i)
186 160 equemene
        a0+=Interaction(x0,clDataInX[i]);
187 121 equemene
    }
188 121 equemene

189 160 equemene
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
190 170 equemene
    v1=a0*dt+v0;
191 170 equemene
    x1=v0*dt+x0;
192 170 equemene
    for (private int j=0;j<N;j++)
193 121 equemene
    {
194 170 equemene
        if (gid != j)
195 170 equemene
        a1+=Interaction(x1,clDataInX[j]);
196 121 equemene
    }
197 121 equemene

198 160 equemene
    a2=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
199 170 equemene
    v2=a1*(MYFLOAT)(dt/2.e0f)+v0;
200 170 equemene
    x2=v1*(MYFLOAT)(dt/2.e0f)+x0;
201 170 equemene
    for (private int k=0;k<N;k++)
202 121 equemene
    {
203 170 equemene
        if (gid != k)
204 170 equemene
        a2+=Interaction(x2,clDataInX[k]);
205 121 equemene
    }
206 121 equemene

207 160 equemene
    a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
208 170 equemene
    v3=a2*(MYFLOAT)(dt/2.e0f)+v0;
209 170 equemene
    x3=v2*(MYFLOAT)(dt/2.e0f)+x0;
210 170 equemene
    for (private int l=0;l<N;l++)
211 121 equemene
    {
212 170 equemene
        if (gid != l)
213 170 equemene
        a3+=Interaction(x3,clDataInX[l]);
214 121 equemene
    }
215 121 equemene

216 160 equemene
    a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
217 170 equemene
    v4=a3*dt+v0;
218 170 equemene
    x4=v3*dt+x0;
219 170 equemene
    for (private int m=0;m<N;m++)
220 141 equemene
    {
221 170 equemene
        if (gid != m)
222 170 equemene
        a4+=Interaction(x4,clDataInX[m]);
223 141 equemene
    }
224 141 equemene

225 160 equemene
    xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
226 160 equemene
    vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
227 121 equemene

228 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
229 121 equemene
}
230 121 equemene

231 160 equemene
MYFLOAT8 AtomicHeun(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
232 121 equemene
{
233 170 equemene
    private MYFLOAT4 x0,v0,a0,x1,v1,a1,xf,vf;
234 170 equemene
    MYFLOAT4 Dt=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
235 116 equemene

236 170 equemene
    x0=(MYFLOAT4)clDataInX[gid];
237 170 equemene
    v0=(MYFLOAT4)clDataInV[gid];
238 170 equemene
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
239 141 equemene

240 170 equemene
    for (private int i=0;i<get_global_size(0);i++)
241 116 equemene
    {
242 116 equemene
        if (gid != i)
243 170 equemene
        a0+=Interaction(x0,clDataInX[i]);
244 116 equemene
    }
245 141 equemene

246 170 equemene
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
247 170 equemene
    //v1=v0+dt*a0;
248 170 equemene
    //x1=x0+dt*v0;
249 170 equemene
    v1=dt*a0+v0;
250 170 equemene
    x1=dt*v0+x0;
251 141 equemene

252 170 equemene
    for (private int j=0;j<get_global_size(0);j++)
253 116 equemene
    {
254 170 equemene
        if (gid != j)
255 170 equemene
        a1+=Interaction(x1,clDataInX[j]);
256 116 equemene
    }
257 118 equemene

258 170 equemene
    vf=v0+dt*(a0+a1)/(MYFLOAT)2.e0f;
259 170 equemene
    xf=x0+dt*(v0+v1)/(MYFLOAT)2.e0f;
260 170 equemene

261 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
262 119 equemene
}
263 119 equemene

264 160 equemene
MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
265 119 equemene
{
266 170 equemene
    MYFLOAT4 x0,v0,a,xf,vf;
267 119 equemene

268 170 equemene
    x0=(MYFLOAT4)clDataInX[gid];
269 170 equemene
    v0=(MYFLOAT4)clDataInV[gid];
270 151 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
271 160 equemene

272 170 equemene
    for (private int i=0;i<get_global_size(0);i++)
273 119 equemene
    {
274 119 equemene
        if (gid != i)
275 170 equemene
          a+=Interaction(x0,clDataInX[i]);
276 119 equemene
    }
277 133 equemene

278 170 equemene
    vf=v0+dt*a;
279 170 equemene
    xf=x0+dt*vf;
280 170 equemene

281 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
282 116 equemene
}
283 116 equemene

284 160 equemene
MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
285 140 equemene
{
286 170 equemene
    MYFLOAT4 x0,v0,a,xf,vf;
287 140 equemene

288 170 equemene
    x0=(MYFLOAT4)clDataInX[gid];
289 170 equemene
    v0=(MYFLOAT4)clDataInV[gid];
290 151 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
291 160 equemene

292 170 equemene
    for (private int i=0;i<get_global_size(0);i++)
293 140 equemene
    {
294 140 equemene
        if (gid != i)
295 170 equemene
        a+=Interaction(x0,clDataInX[i]);
296 140 equemene
    }
297 140 equemene

298 170 equemene
    vf=v0+dt*a;
299 170 equemene
    xf=x0+dt*v0;
300 140 equemene

301 170 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
302 140 equemene
}
303 140 equemene

304 170 equemene
__kernel void InBallSplutterPoints(__global MYFLOAT4* clDataX,
305 170 equemene
                                   MYFLOAT diameter,uint seed_z,uint seed_w)
306 170 equemene
{
307 170 equemene
    private int gid=get_global_id(0);
308 170 equemene
    private uint zmwc=seed_z+gid;
309 170 equemene
    private uint wmwc=seed_w+(gid+1)%2;
310 170 equemene
    private MYFLOAT Heat;
311 170 equemene

312 170 equemene
    for (int i=0;i<gid;i++)
313 170 equemene
    {
314 170 equemene
        Heat=MWCfp;
315 170 equemene
    }
316 170 equemene

317 170 equemene
// More accurate distribution based on spherical coordonates
318 170 equemene
// Disactivated because of AMD Oland GPU crash on launch
319 170 equemene
//     private MYFLOAT Radius,Theta,Phi,PosX,PosY,PosZ,SinTheta;
320 170 equemene
//     Radius=MWCfp*diameter/2.e0f;
321 170 equemene
//     Theta=(MYFLOAT)acos((float)(-2.e0f*MWCfp+1.0e0f));
322 170 equemene
//     Phi=(MYFLOAT)(2.e0f*PI*MWCfp);
323 170 equemene
//     SinTheta=sin((float)Theta);
324 170 equemene
//     PosX=cos((float)Phi)*Radius*SinTheta;
325 170 equemene
//     PosY=sin((float)Phi)*Radius*SinTheta;
326 170 equemene
//     PosZ=cos((float)Theta)*Radius;
327 170 equemene
//     clDataX[gid]=(MYFLOAT4)(PosX,PosY,PosZ,0.e0f);
328 170 equemene

329 170 equemene
    private MYFLOAT Radius=diameter/2.e0f;
330 170 equemene
    private MYFLOAT Length=diameter;
331 170 equemene
    private MYFLOAT4 Position;
332 170 equemene
    while (Length>Radius) {
333 170 equemene
       Position=(MYFLOAT4)((MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,0.e0f);
334 170 equemene
       Length=(MYFLOAT)length((MYFLOAT4)Position);
335 170 equemene
    }
336 170 equemene

337 170 equemene
    clDataX[gid]=Position;
338 170 equemene

339 170 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
340 170 equemene
}
341 170 equemene

342 170 equemene
__kernel void InBoxSplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box,
343 139 equemene
                             uint seed_z,uint seed_w)
344 116 equemene
{
345 170 equemene
    int gid=get_global_id(0);
346 170 equemene
    uint zmwc=seed_z+gid;
347 170 equemene
    uint wmwc=seed_w-gid;
348 170 equemene
    private MYFLOAT Heat;
349 170 equemene

350 170 equemene
    for (int i=0;i<gid;i++)
351 170 equemene
    {
352 170 equemene
        Heat=MWCfp;
353 170 equemene
    }
354 137 equemene

355 170 equemene
    clDataX[gid]=(MYFLOAT4)((MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,0.e0f);
356 170 equemene

357 170 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
358 116 equemene
}
359 116 equemene

360 160 equemene
__kernel void SplutterStress(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
361 139 equemene
{
362 139 equemene
    int gid = get_global_id(0);
363 142 equemene
    MYFLOAT N = (MYFLOAT)get_global_size(0);
364 170 equemene
    uint zmwc=seed_z+(uint)gid;
365 170 equemene
    uint wmwc=seed_w-(uint)gid;
366 171 equemene
    MYFLOAT4 CrossVector,SpeedVector;
367 139 equemene

368 171 equemene
    if (get_global_size(0)==2) {
369 171 equemene
       CrossVector=(MYFLOAT4)(1.e0f,1.e0f,1.e0f,0.e0f);
370 171 equemene
    } else {
371 171 equemene
       CrossVector=(MYFLOAT4)(MWCfp-5e-1f,MWCfp-5e-1f,MWCfp-5e-1f,0.e0f);
372 171 equemene
    }
373 171 equemene

374 139 equemene
    if (velocity<SMALL_NUM) {
375 171 equemene
       SpeedVector=(MYFLOAT4)normalize(cross(clDataX[gid]-clCoM[0],CrossVector))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
376 139 equemene
    }
377 139 equemene
    else
378 139 equemene
    {
379 155 equemene
       // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
380 170 equemene
       // Implemention on AMD Oland are probably broken in float
381 170 equemene

382 170 equemene
       SpeedVector=(MYFLOAT4)((MWCfp-0.5e0f)*velocity,(MWCfp-0.5e0f)*velocity,
383 170 equemene
                              (MWCfp-0.5e0f)*velocity,0.e0f);
384 139 equemene
    }
385 170 equemene
    clDataV[gid]=SpeedVector;
386 170 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
387 139 equemene
}
388 139 equemene

389 160 equemene
__kernel void RungeKutta(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
390 116 equemene
{
391 170 equemene
    private int gid = get_global_id(0);
392 170 equemene
    private MYFLOAT8 clDataGid;
393 116 equemene

394 170 equemene
    clDataGid=AtomicRungeKutta(clDataX,clDataV,gid,h);
395 116 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
396 170 equemene
    clDataX[gid]=clDataGid.s0123;
397 170 equemene
    clDataV[gid]=clDataGid.s4567;
398 116 equemene
}
399 116 equemene

400 170 equemene
__kernel void Heun(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
401 116 equemene
{
402 170 equemene
    private int gid = get_global_id(0);
403 170 equemene
    private MYFLOAT8 clDataGid;
404 116 equemene

405 170 equemene
    clDataGid=AtomicHeun(clDataX,clDataV,gid,h);
406 116 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
407 170 equemene
    clDataX[gid]=clDataGid.s0123;
408 170 equemene
    clDataV[gid]=clDataGid.s4567;
409 116 equemene
}
410 133 equemene

411 170 equemene
__kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
412 141 equemene
{
413 170 equemene
    private int gid = get_global_id(0);
414 170 equemene
    private MYFLOAT8 clDataGid;
415 141 equemene

416 170 equemene
    clDataGid=AtomicImplicitEuler(clDataX,clDataV,gid,h);
417 141 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
418 170 equemene
    clDataX[gid]=clDataGid.s0123;
419 170 equemene
    clDataV[gid]=clDataGid.s4567;
420 141 equemene
}
421 141 equemene

422 160 equemene
__kernel void ExplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
423 140 equemene
{
424 170 equemene
    private int gid = get_global_id(0);
425 170 equemene
    private MYFLOAT8 clDataGid;
426 170 equemene

427 170 equemene
    clDataGid=AtomicExplicitEuler(clDataX,clDataV,gid,h);
428 140 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
429 170 equemene
    clDataX[gid]=clDataGid.s0123;
430 170 equemene
    clDataV[gid]=clDataGid.s4567;
431 140 equemene
}
432 139 equemene

433 160 equemene
__kernel void CoMPotential(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
434 133 equemene
{
435 133 equemene
    int gid = get_global_id(0);
436 133 equemene

437 160 equemene
    clPotential[gid]=PairPotential(clDataX[gid],clCoM[0]);
438 139 equemene
}
439 139 equemene

440 160 equemene
__kernel void Potential(__global MYFLOAT4* clDataX,__global MYFLOAT* clPotential)
441 139 equemene
{
442 139 equemene
    int gid = get_global_id(0);
443 139 equemene

444 155 equemene
    MYFLOAT potential=(MYFLOAT)0.e0f;
445 160 equemene
    MYFLOAT4 x=clDataX[gid];
446 133 equemene

447 133 equemene
    for (int i=0;i<get_global_size(0);i++)
448 133 equemene
    {
449 133 equemene
        if (gid != i)
450 160 equemene
        potential+=PairPotential(x,clDataX[i]);
451 133 equemene
    }
452 133 equemene

453 133 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
454 155 equemene
    clPotential[gid]=potential*(MYFLOAT)5.e-1f;
455 133 equemene
}
456 133 equemene

457 160 equemene
__kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int Size)
458 139 equemene
{
459 160 equemene
    MYFLOAT4 CoM=clDataX[0];
460 142 equemene

461 139 equemene
    for (int i=1;i<Size;i++)
462 139 equemene
    {
463 160 equemene
        CoM+=clDataX[i];
464 139 equemene
    }
465 142 equemene

466 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
467 170 equemene
    clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,0.e0f)/(MYFLOAT)Size;
468 139 equemene
}
469 139 equemene

470 160 equemene
__kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
471 133 equemene
{
472 133 equemene
    int gid = get_global_id(0);
473 133 equemene

474 139 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
475 160 equemene
    MYFLOAT d=(MYFLOAT)length(clDataV[gid]);
476 155 equemene
    clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)(d*d);
477 133 equemene
}
478 170 equemene

479 116 equemene
"""
480 116 equemene
481 170 equemene
def MainOpenCL(clDataX,clDataV,Step,Method):
482 170 equemene
    time_start=time.time()
483 170 equemene
    if Method=="RungeKutta":
484 170 equemene
        CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clDataX,clDataV,Step)
485 170 equemene
    elif Method=="ExplicitEuler":
486 170 equemene
        CLLaunch=MyRoutines.ExplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
487 170 equemene
    elif Method=="Heun":
488 170 equemene
        CLLaunch=MyRoutines.Heun(queue,(Number,1),None,clDataX,clDataV,Step)
489 170 equemene
    else:
490 170 equemene
        CLLaunch=MyRoutines.ImplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
491 170 equemene
    CLLaunch.wait()
492 170 equemene
    Elapsed=time.time()-time_start
493 170 equemene
    return(Elapsed)
494 170 equemene
495 170 equemene
def display(*args):
496 133 equemene
497 170 equemene
    global MyDataX,MyDataV,clDataX,clDataV,Step,Method,Number,Iterations,Durations,Verbose,SpeedRendering
498 170 equemene
499 170 equemene
    glClearColor(0.0, 0.0, 0.0, 0.0)
500 170 equemene
    glClear(GL_COLOR_BUFFER_BIT)
501 170 equemene
    glColor3f(1.0,1.0,1.0)
502 170 equemene
503 170 equemene
    Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
504 170 equemene
    if SpeedRendering:
505 170 equemene
        cl.enqueue_copy(queue, MyDataV, clDataV)
506 170 equemene
        MyDataV.reshape(Number,4)[:,3]=1
507 170 equemene
        glVertexPointerf(MyDataV.reshape(Number,4))
508 170 equemene
    else:
509 170 equemene
        cl.enqueue_copy(queue, MyDataX, clDataX)
510 170 equemene
        MyDataX.reshape(Number,4)[:,3]=1
511 170 equemene
        glVertexPointerf(MyDataX.reshape(Number,4))
512 170 equemene
513 170 equemene
    if Verbose:
514 170 equemene
        print("Positions for #%s iteration: %s" % (Iterations,MyDataX))
515 170 equemene
    else:
516 170 equemene
        sys.stdout.write('.')
517 170 equemene
        sys.stdout.flush()
518 170 equemene
    Durations=np.append(Durations,MainOpenCL(clDataX,clDataV,Step,Method))
519 170 equemene
    glEnableClientState(GL_VERTEX_ARRAY)
520 170 equemene
    glDrawArrays(GL_POINTS, 0, Number)
521 170 equemene
    glDisableClientState(GL_VERTEX_ARRAY)
522 170 equemene
    glFlush()
523 170 equemene
    Iterations+=1
524 170 equemene
    glutSwapBuffers()
525 170 equemene
526 170 equemene
def halt():
527 170 equemene
    pass
528 170 equemene
529 170 equemene
def keyboard(k,x,y):
530 174 equemene
    global ViewRZ,SpeedRendering
531 170 equemene
    LC_Z = as_8_bit( 'z' )
532 170 equemene
    UC_Z = as_8_bit( 'Z' )
533 170 equemene
    Plus = as_8_bit( '+' )
534 170 equemene
    Minus = as_8_bit( '-' )
535 170 equemene
    Switch = as_8_bit( 's' )
536 170 equemene
537 170 equemene
    Zoom=1
538 175 equemene
    if k == LC_Z:
539 170 equemene
        ViewRZ += 1.0
540 170 equemene
    elif k == UC_Z:
541 170 equemene
        ViewRZ -= 1.0
542 170 equemene
    elif k == Plus:
543 170 equemene
        Zoom *= 2.0
544 170 equemene
    elif k == Minus:
545 170 equemene
        Zoom /= 2.0
546 170 equemene
    elif k == Switch:
547 170 equemene
        if SpeedRendering:
548 170 equemene
            SpeedRendering=False
549 170 equemene
        else:
550 170 equemene
            SpeedRendering=True
551 170 equemene
    elif ord(k) == 27: # Escape
552 170 equemene
        glutLeaveMainLoop()
553 170 equemene
        return(False)
554 170 equemene
    else:
555 170 equemene
        return
556 170 equemene
    glRotatef(ViewRZ, 0.0, 0.0, 1.0)
557 170 equemene
    glScalef(Zoom,Zoom,Zoom)
558 170 equemene
    glutPostRedisplay()
559 170 equemene
560 170 equemene
def special(k,x,y):
561 174 equemene
    global ViewRX, ViewRY
562 170 equemene
563 170 equemene
    if k == GLUT_KEY_UP:
564 170 equemene
        ViewRX += 1.0
565 170 equemene
    elif k == GLUT_KEY_DOWN:
566 170 equemene
        ViewRX -= 1.0
567 170 equemene
    elif k == GLUT_KEY_LEFT:
568 170 equemene
        ViewRY += 1.0
569 170 equemene
    elif k == GLUT_KEY_RIGHT:
570 170 equemene
        ViewRY -= 1.0
571 170 equemene
    else:
572 170 equemene
        return
573 170 equemene
    glRotatef(ViewRX, 1.0, 0.0, 0.0)
574 170 equemene
    glRotatef(ViewRY, 0.0, 1.0, 0.0)
575 170 equemene
    glutPostRedisplay()
576 170 equemene
577 170 equemene
def setup_viewport():
578 170 equemene
    global SizeOfBox
579 170 equemene
    glMatrixMode(GL_PROJECTION)
580 170 equemene
    glLoadIdentity()
581 170 equemene
    glOrtho(-SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox)
582 170 equemene
    glutPostRedisplay()
583 170 equemene
584 170 equemene
def reshape(w, h):
585 170 equemene
    glViewport(0, 0, w, h)
586 170 equemene
    setup_viewport()
587 170 equemene
588 116 equemene
if __name__=='__main__':
589 170 equemene
590 170 equemene
    global Number,Step,clDataX,clDataV,MyDataX,MyDataV,Method,SizeOfBox,Iterations,Verbose,Durations
591 116 equemene
592 132 equemene
    # ValueType
593 132 equemene
    ValueType='FP32'
594 132 equemene
    class MyFloat(np.float32):pass
595 160 equemene
    #    clType8=cl_array.vec.float8
596 116 equemene
    # Set defaults values
597 118 equemene
    np.set_printoptions(precision=2)
598 116 equemene
    # Id of Device : 1 is for first find !
599 160 equemene
    Device=0
600 170 equemene
    # Number of bodies is integer
601 160 equemene
    Number=2
602 170 equemene
    # Number of iterations (for standalone execution)
603 170 equemene
    Iterations=10
604 170 equemene
    # Size of shape
605 170 equemene
    SizeOfShape=MyFloat(1.)
606 116 equemene
    # Initial velocity of particules
607 132 equemene
    Velocity=MyFloat(1.)
608 116 equemene
    # Step
609 170 equemene
    Step=MyFloat(1./32)
610 121 equemene
    # Method of integration
611 150 equemene
    Method='ImplicitEuler'
612 132 equemene
    # InitialRandom
613 132 equemene
    InitialRandom=False
614 132 equemene
    # RNG Marsaglia Method
615 132 equemene
    RNG='MWC'
616 139 equemene
    # Viriel Distribution of stress
617 139 equemene
    VirielStress=True
618 170 equemene
    # Verbose
619 170 equemene
    Verbose=False
620 170 equemene
    # OpenGL real time rendering
621 170 equemene
    OpenGL=False
622 170 equemene
    # Speed rendering
623 170 equemene
    SpeedRendering=False
624 171 equemene
    # Counter ArtEvasions Measures (artefact evasion)
625 171 equemene
    CoArEv='None'
626 170 equemene
    # Shape to distribute
627 170 equemene
    Shape='Ball'
628 170 equemene
    # Type of Interaction
629 172 equemene
    InterType='Force'
630 132 equemene
631 175 equemene
    HowToUse='%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [Potential] -x <None|NegExp|CorRad> -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
632 116 equemene
633 116 equemene
    try:
634 175 equemene
        opts, args = getopt.getopt(sys.argv[1:],"rpgehod:n:i:z:v:s:m:t:b:x:",["random","potential","coarev","opengl","viriel","verbose","device=","number=","iterations=","size=","velocity=","step=","method=","valuetype=","shape="])
635 116 equemene
    except getopt.GetoptError:
636 128 equemene
        print(HowToUse % sys.argv[0])
637 116 equemene
        sys.exit(2)
638 116 equemene
639 116 equemene
    for opt, arg in opts:
640 116 equemene
        if opt == '-h':
641 128 equemene
            print(HowToUse % sys.argv[0])
642 116 equemene
643 128 equemene
            print("\nInformations about devices detected under OpenCL:")
644 116 equemene
            try:
645 132 equemene
                Id=0
646 116 equemene
                for platform in cl.get_platforms():
647 116 equemene
                    for device in platform.get_devices():
648 170 equemene
                        # Failed now because of POCL implementation
649 137 equemene
                        #deviceType=cl.device_type.to_string(device.type)
650 149 equemene
                        deviceType="xPU"
651 128 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
652 116 equemene
                        Id=Id+1
653 116 equemene
                sys.exit()
654 116 equemene
            except ImportError:
655 128 equemene
                print("Your platform does not seem to support OpenCL")
656 116 equemene
                sys.exit()
657 116 equemene
658 132 equemene
        elif opt in ("-t", "--valuetype"):
659 132 equemene
            if arg=='FP64':
660 132 equemene
                class MyFloat(np.float64): pass
661 132 equemene
            else:
662 132 equemene
                class MyFloat(np.float32):pass
663 132 equemene
            ValueType = arg
664 116 equemene
        elif opt in ("-d", "--device"):
665 116 equemene
            Device=int(arg)
666 121 equemene
        elif opt in ("-m", "--method"):
667 121 equemene
            Method=arg
668 170 equemene
        elif opt in ("-b", "--shape"):
669 170 equemene
            Shape=arg
670 175 equemene
            if Shape!='Ball' or Shape!='Box':
671 175 equemene
                print('Wrong argument: set to Ball')
672 116 equemene
        elif opt in ("-n", "--number"):
673 116 equemene
            Number=int(arg)
674 170 equemene
        elif opt in ("-i", "--iterations"):
675 170 equemene
            Iterations=int(arg)
676 116 equemene
        elif opt in ("-z", "--size"):
677 170 equemene
            SizeOfShape=MyFloat(arg)
678 116 equemene
        elif opt in ("-v", "--velocity"):
679 132 equemene
            Velocity=MyFloat(arg)
680 139 equemene
            VirielStress=False
681 116 equemene
        elif opt in ("-s", "--step"):
682 132 equemene
            Step=MyFloat(arg)
683 132 equemene
        elif opt in ("-r", "--random"):
684 132 equemene
            InitialRandom=True
685 133 equemene
        elif opt in ("-c", "--check"):
686 133 equemene
            CheckEnergies=True
687 139 equemene
        elif opt in ("-e", "--viriel"):
688 139 equemene
            VirielStress=True
689 170 equemene
        elif opt in ("-g", "--opengl"):
690 170 equemene
            OpenGL=True
691 172 equemene
        elif opt in ("-p", "--potential"):
692 172 equemene
            InterType='Potential'
693 175 equemene
        elif opt in ("-x", "--coarev"):
694 175 equemene
            CoArEv=arg
695 170 equemene
        elif opt in ("-o", "--verbose"):
696 170 equemene
            Verbose=True
697 170 equemene
698 175 equemene
    SizeOfShape=np.sqrt(MyFloat(SizeOfShape*Number))
699 132 equemene
    Velocity=MyFloat(Velocity)
700 132 equemene
    Step=MyFloat(Step)
701 132 equemene
702 128 equemene
    print("Device choosed : %s" % Device)
703 128 equemene
    print("Number of particules : %s" % Number)
704 170 equemene
    print("Size of Shape : %s" % SizeOfShape)
705 160 equemene
    print("Initial velocity : %s" % Velocity)
706 170 equemene
    print("Step of iteration : %s" % Step)
707 160 equemene
    print("Number of iterations : %s" % Iterations)
708 160 equemene
    print("Method of resolution : %s" % Method)
709 160 equemene
    print("Initial Random for RNG Seed : %s" % InitialRandom)
710 160 equemene
    print("ValueType is : %s" % ValueType)
711 170 equemene
    print("Viriel distribution of stress : %s" % VirielStress)
712 170 equemene
    print("OpenGL real time rendering : %s" % OpenGL)
713 170 equemene
    print("Speed rendering : %s" % SpeedRendering)
714 170 equemene
    print("Interaction type : %s" % InterType)
715 171 equemene
    print("Counter Artevasion type : %s" % CoArEv)
716 116 equemene
717 132 equemene
    # Create Numpy array of CL vector with 8 FP32
718 170 equemene
    MyCoM = np.zeros(4,dtype=MyFloat)
719 170 equemene
    MyDataX = np.zeros(Number*4, dtype=MyFloat)
720 170 equemene
    MyDataV = np.zeros(Number*4, dtype=MyFloat)
721 133 equemene
    MyPotential = np.zeros(Number, dtype=MyFloat)
722 133 equemene
    MyKinetic = np.zeros(Number, dtype=MyFloat)
723 132 equemene
724 171 equemene
    Marsaglia,Computing,Interaction,Artevasion=DictionariesAPI()
725 132 equemene
726 132 equemene
    # Scan the OpenCL arrays
727 132 equemene
    Id=0
728 116 equemene
    HasXPU=False
729 116 equemene
    for platform in cl.get_platforms():
730 116 equemene
        for device in platform.get_devices():
731 116 equemene
            if Id==Device:
732 116 equemene
                PlatForm=platform
733 116 equemene
                XPU=device
734 128 equemene
                print("CPU/GPU selected: ",device.name.lstrip())
735 151 equemene
                print("Platform selected: ",platform.name)
736 116 equemene
                HasXPU=True
737 116 equemene
            Id+=1
738 116 equemene
739 116 equemene
    if HasXPU==False:
740 128 equemene
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
741 116 equemene
        sys.exit()
742 116 equemene
743 132 equemene
    # Create Context
744 116 equemene
    try:
745 116 equemene
        ctx = cl.Context([XPU])
746 116 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
747 116 equemene
    except:
748 128 equemene
        print("Crash during context creation")
749 116 equemene
750 132 equemene
    # Build all routines used for the computing
751 170 equemene
752 170 equemene
    #BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
753 171 equemene
    BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i -DARTEVASION=%i" % (Marsaglia[RNG],Computing[ValueType],Interaction[InterType],Artevasion[CoArEv])
754 170 equemene
755 170 equemene
    if 'Intel' in PlatForm.name or 'Experimental' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name :
756 151 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
757 151 equemene
    else:
758 151 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions+" -cl-strict-aliasing")
759 170 equemene
760 170 equemene
    mf = cl.mem_flags
761 170 equemene
    # Read/Write approach for buffering
762 170 equemene
    clDataX = cl.Buffer(ctx, mf.READ_WRITE, MyDataX.nbytes)
763 170 equemene
    clDataV = cl.Buffer(ctx, mf.READ_WRITE, MyDataV.nbytes)
764 170 equemene
    clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
765 170 equemene
    clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
766 170 equemene
    clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
767 160 equemene
768 170 equemene
    # Write/HostPointer approach for buffering
769 170 equemene
    # clDataX = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataX)
770 170 equemene
    # clDataV = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataV)
771 170 equemene
    # clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential)
772 170 equemene
    # clKinetic = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyKinetic)
773 170 equemene
    # clCoM = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyCoM)
774 116 equemene
775 134 equemene
    print('All particles superimposed.')
776 116 equemene
777 132 equemene
    # Set particles to RNG points
778 132 equemene
    if InitialRandom:
779 170 equemene
        seed_w=np.uint32(nprnd(2**32))
780 170 equemene
        seed_z=np.uint32(nprnd(2**32))
781 132 equemene
    else:
782 170 equemene
        seed_w=np.uint32(19710211)
783 170 equemene
        seed_z=np.uint32(20081010)
784 170 equemene
785 170 equemene
    if Shape=='Ball':
786 170 equemene
        MyRoutines.InBallSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
787 170 equemene
    else:
788 170 equemene
        MyRoutines.InBoxSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
789 116 equemene
790 132 equemene
    print('All particules distributed')
791 139 equemene
792 160 equemene
    CLLaunch=MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
793 160 equemene
    CLLaunch.wait()
794 142 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
795 170 equemene
    print('Center Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
796 139 equemene
797 139 equemene
    if VirielStress:
798 160 equemene
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,MyFloat(0.),np.uint32(110271),np.uint32(250173))
799 139 equemene
    else:
800 160 equemene
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,Velocity,np.uint32(110271),np.uint32(250173))
801 160 equemene
    CLLaunch.wait()
802 139 equemene
803 170 equemene
    print('All particules stressed')
804 170 equemene
805 160 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
806 160 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
807 133 equemene
    CLLaunch.wait()
808 141 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
809 141 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
810 170 equemene
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
811 116 equemene
812 170 equemene
    if SpeedRendering:
813 170 equemene
        SizeOfBox=max(2*MyKinetic)
814 170 equemene
    else:
815 170 equemene
        SizeOfBox=SizeOfShape
816 170 equemene
817 174 equemene
    if OpenGL:
818 174 equemene
        print('\tTiny documentation to interact OpenGL rendering:\n')
819 174 equemene
        print('\t<Left|Right> Rotate around X axis')
820 174 equemene
        print('\t  <Up|Down>  Rotate around Y axis')
821 174 equemene
        print('\t   <z|Z>     Rotate around Z axis')
822 174 equemene
        print('\t   <-|+>     Unzoom/Zoom')
823 174 equemene
        print('\t    <s>      Toggle to display Positions or Velocities')
824 174 equemene
        print('\t   <Esc>     Quit\n')
825 174 equemene
826 170 equemene
    wall_time_start=time.time()
827 160 equemene
828 170 equemene
    Durations=np.array([],dtype=MyFloat)
829 170 equemene
    print('Starting!')
830 170 equemene
    if OpenGL:
831 170 equemene
        global ViewRX,ViewRY,ViewRZ
832 170 equemene
        Iterations=0
833 170 equemene
        ViewRX,ViewRY,ViewRZ = 0.,0.,0.
834 170 equemene
        # Launch OpenGL Loop
835 170 equemene
        glutInit(sys.argv)
836 170 equemene
        glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB)
837 170 equemene
        glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE,GLUT_ACTION_CONTINUE_EXECUTION)
838 170 equemene
        glutInitWindowSize(512,512)
839 170 equemene
        glutCreateWindow(b'NBodyGL')
840 170 equemene
        setup_viewport()
841 170 equemene
        glutReshapeFunc(reshape)
842 170 equemene
        glutDisplayFunc(display)
843 170 equemene
        glutIdleFunc(display)
844 170 equemene
        #   glutMouseFunc(mouse)
845 170 equemene
        glutSpecialFunc(special)
846 170 equemene
        glutKeyboardFunc(keyboard)
847 170 equemene
        glutMainLoop()
848 170 equemene
    else:
849 170 equemene
        for iteration in range(Iterations):
850 170 equemene
            Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
851 170 equemene
            if Verbose:
852 170 equemene
                # print("Duration of #%s iteration: %s" % (iteration,Elapsed))
853 170 equemene
                cl.enqueue_copy(queue, MyDataX, clDataX)
854 170 equemene
                print("Positions for #%s iteration: %s" % (iteration,MyDataX))
855 170 equemene
            else:
856 170 equemene
                sys.stdout.write('.')
857 170 equemene
                sys.stdout.flush()
858 170 equemene
            Durations=np.append(Durations,Elapsed)
859 133 equemene
860 170 equemene
    print('\nEnding!')
861 139 equemene
862 160 equemene
    MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
863 160 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
864 160 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
865 141 equemene
    CLLaunch.wait()
866 160 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
867 141 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
868 141 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
869 170 equemene
    print('\nCenter Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
870 170 equemene
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
871 135 equemene
872 170 equemene
    print("Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n" % (Device,Iterations,np.mean(Durations),np.median(Durations),np.std(Durations),np.min(Durations),np.max(Durations),np.std(Durations)/np.median(Durations)))
873 170 equemene
874 171 equemene
    # FPS: 1/Elapsed
875 171 equemene
    FPS=np.ones(len(Durations))
876 171 equemene
    FPS/=Durations
877 171 equemene
878 171 equemene
    print("FPS stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" % (Device,Iterations,np.mean(FPS),np.median(FPS),np.std(FPS),np.min(FPS),np.max(FPS)))
879 171 equemene
880 170 equemene
    # Contraction of Square*Size*Hertz: Size*Size/Elapsed
881 170 equemene
    Squertz=np.ones(len(Durations))
882 170 equemene
    Squertz*=Number*Number
883 170 equemene
    Squertz/=Durations
884 170 equemene
885 172 equemene
    print("Squertz in log10 & complete stats on device %s with %s iterations :\n\tMean:\t%s\t%s\n\tMedian:\t%s\t%s\n\tStddev:\t%s\t%s\n\tMin:\t%s\t%s\n\tMax:\t%s\t%s\n" % (Device,Iterations,np.log10(np.mean(Squertz)),np.mean(Squertz),np.log10(np.median(Squertz)),np.median(Squertz),np.log10(np.std(Squertz)),np.std(Squertz),np.log10(np.min(Squertz)),np.min(Squertz),np.log10(np.max(Squertz)),np.max(Squertz)))
886 170 equemene
887 160 equemene
    clDataX.release()
888 160 equemene
    clDataV.release()
889 133 equemene
    clKinetic.release()
890 133 equemene
    clPotential.release()