Statistiques
| Révision :

root / NBody / NBodyGL.py @ 162

Historique | Voir | Annoter | Télécharger (23,02 ko)

1 161 equemene
#!/usr/bin/env python3
2 161 equemene
# -*- coding: utf-8 -*-
3 161 equemene
"""
4 161 equemene
Demonstrateur OpenCL d'interaction NCorps
5 161 equemene

6 161 equemene
Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> CeCILLv2
7 161 equemene
"""
8 161 equemene
import getopt
9 161 equemene
import sys
10 161 equemene
import time
11 161 equemene
import numpy as np
12 161 equemene
import pyopencl as cl
13 161 equemene
import pyopencl.array as cl_array
14 161 equemene
from numpy.random import randint as nprnd
15 161 equemene
import string, sys
16 161 equemene
from OpenGL.GL import *
17 161 equemene
from OpenGL.GLUT import *
18 161 equemene
19 161 equemene
def DictionariesAPI():
20 161 equemene
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
21 161 equemene
    Computing={'FP32':0,'FP64':1}
22 161 equemene
    return(Marsaglia,Computing)
23 161 equemene
24 161 equemene
BlobOpenCL= """
25 161 equemene
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
26 161 equemene
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
27 161 equemene
#define MWC   (znew+wnew)
28 161 equemene
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
29 161 equemene
#define CONG  (jcong=69069*jcong+1234567)
30 161 equemene
#define KISS  ((MWC^CONG)+SHR3)
31 161 equemene

32 161 equemene
#define TFP32 0
33 161 equemene
#define TFP64 1
34 161 equemene

35 161 equemene
#define MWCfp MWC * 2.3283064365386963e-10f
36 161 equemene
#define KISSfp KISS * 2.3283064365386963e-10f
37 161 equemene
#define SHR3fp SHR3 * 2.3283064365386963e-10f
38 161 equemene
#define CONGfp CONG * 2.3283064365386963e-10f
39 161 equemene

40 161 equemene
#define PI 3.141592653589793238462643197169399375105820974944592307816406286e0f
41 161 equemene

42 161 equemene
#define SMALL_NUM 1.e-9f
43 161 equemene

44 161 equemene
#define LENGTH 1.e0f
45 161 equemene

46 161 equemene
#if TYPE == TFP32
47 161 equemene
#define MYFLOAT4 float4
48 161 equemene
#define MYFLOAT8 float8
49 161 equemene
#define MYFLOAT float
50 161 equemene
#define DISTANCE fast_distance
51 161 equemene
#else
52 161 equemene
#if defined(cl_khr_fp64)  // Khronos extension available?
53 161 equemene
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
54 161 equemene
#define DOUBLE_SUPPORT_AVAILABLE
55 161 equemene
#elif defined(cl_amd_fp64)  // AMD extension available?
56 161 equemene
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
57 161 equemene
#define DOUBLE_SUPPORT_AVAILABLE
58 161 equemene
#endif
59 161 equemene
#define MYFLOAT4 double4
60 161 equemene
#define MYFLOAT8 double8
61 161 equemene
#define MYFLOAT double
62 161 equemene
#define DISTANCE distance
63 161 equemene
#endif
64 161 equemene

65 161 equemene

66 161 equemene
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
67 161 equemene
{
68 161 equemene
    private MYFLOAT r=DISTANCE(n,m);
69 161 equemene

70 161 equemene
    return((n-m)/(MYFLOAT)(r*r*r));
71 161 equemene
}
72 161 equemene

73 161 equemene
MYFLOAT4 InteractionCore(MYFLOAT4 m,MYFLOAT4 n)
74 161 equemene
{
75 161 equemene
    private MYFLOAT core=(MYFLOAT)1.e5f;
76 161 equemene
    private MYFLOAT r=DISTANCE(n,m);
77 161 equemene
    private MYFLOAT d=r*r+core*core;
78 161 equemene

79 161 equemene
    return(core*(n-m)/(MYFLOAT)(d*d));
80 161 equemene
}
81 161 equemene

82 161 equemene
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
83 161 equemene
{
84 161 equemene
    return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
85 161 equemene
}
86 161 equemene

87 161 equemene
MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
88 161 equemene
{
89 161 equemene
    private MYFLOAT potential=(MYFLOAT)0.e0f;
90 161 equemene
    private MYFLOAT4 x=clDataX[gid];
91 161 equemene

92 161 equemene
    for (int i=0;i<get_global_size(0);i++)
93 161 equemene
    {
94 161 equemene
        if (gid != i)
95 161 equemene
        potential+=PairPotential(x,clDataX[i]);
96 161 equemene
    }
97 161 equemene

98 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
99 161 equemene
    return(potential);
100 161 equemene
}
101 161 equemene

102 161 equemene
MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid)
103 161 equemene
{
104 161 equemene
    return(PairPotential(clDataX[gid],clCoM[0]));
105 161 equemene
}
106 161 equemene

107 161 equemene
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
108 161 equemene
{
109 161 equemene
    private MYFLOAT4 a0,v0,x0,a1,v1,x1,a2,v2,x2,a3,v3,x3,a4,v4,x4,xf,vf;
110 161 equemene

111 161 equemene
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
112 161 equemene
    v0=(MYFLOAT4)clDataInV[gid];
113 161 equemene
    x0=(MYFLOAT4)clDataInX[gid];
114 161 equemene
    int N = get_global_size(0);
115 161 equemene

116 161 equemene
    for (int i=0;i<N;i++)
117 161 equemene
    {
118 161 equemene
        if (gid != i)
119 161 equemene
        a0+=Interaction(x0,clDataInX[i]);
120 161 equemene
    }
121 161 equemene

122 161 equemene
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
123 161 equemene
    v1=v0+a0*dt;
124 161 equemene
    x1=x0+v0*dt;
125 161 equemene
    for (int i=0;i<N;i++)
126 161 equemene
    {
127 161 equemene
        if (gid != i)
128 161 equemene
        a1+=Interaction(x1,clDataInX[i]);
129 161 equemene
    }
130 161 equemene

131 161 equemene
    a2=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
132 161 equemene
    v2=v0+a1*dt*(MYFLOAT)5.e-1f;
133 161 equemene
    x2=x0+v1*dt*(MYFLOAT)5.e-1f;
134 161 equemene
    for (int i=0;i<N;i++)
135 161 equemene
    {
136 161 equemene
        if (gid != i)
137 161 equemene
        a2+=Interaction(x2,clDataInX[i]);
138 161 equemene
    }
139 161 equemene

140 161 equemene
    a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
141 161 equemene
    v3=v0+a2*dt*(MYFLOAT)5.e-1f;
142 161 equemene
    x3=x0+v2*dt*(MYFLOAT)5.e-1f;
143 161 equemene
    for (int i=0;i<N;i++)
144 161 equemene
    {
145 161 equemene
        if (gid != i)
146 161 equemene
        a3+=Interaction(x3,clDataInX[i]);
147 161 equemene
    }
148 161 equemene

149 161 equemene
    a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
150 161 equemene
    v4=v0+a3*dt;
151 161 equemene
    x4=x0+v3*dt;
152 161 equemene
    for (int i=0;i<N;i++)
153 161 equemene
    {
154 161 equemene
        if (gid != i)
155 161 equemene
        a4+=Interaction(x4,clDataInX[i]);
156 161 equemene
    }
157 161 equemene

158 161 equemene
    xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
159 161 equemene
    vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
160 161 equemene

161 162 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,1.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
162 161 equemene
}
163 161 equemene

164 161 equemene
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
165 161 equemene

166 161 equemene
MYFLOAT8 AtomicHeun(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
167 161 equemene
{
168 161 equemene
    private MYFLOAT4 x,v,a,xi,vi,ai,xf,vf;
169 161 equemene

170 161 equemene
    x=(MYFLOAT4)clDataInX[gid];
171 161 equemene
    v=(MYFLOAT4)clDataInV[gid];
172 161 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
173 161 equemene

174 161 equemene
    for (int i=0;i<get_global_size(0);i++)
175 161 equemene
    {
176 161 equemene
        if (gid != i)
177 161 equemene
        a+=Interaction(x,clDataInX[i]);
178 161 equemene
    }
179 161 equemene

180 161 equemene
    vi=v+dt*a;
181 161 equemene
    xi=x+dt*vi;
182 161 equemene
    ai=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
183 161 equemene

184 161 equemene
    for (int i=0;i<get_global_size(0);i++)
185 161 equemene
    {
186 161 equemene
        if (gid != i)
187 161 equemene
        ai+=Interaction(xi,clDataInX[i]);
188 161 equemene
    }
189 161 equemene

190 161 equemene
    vf=v+dt*(a+ai)/(MYFLOAT)2.e0f;
191 161 equemene
    xf=x+dt*(v+vi)/(MYFLOAT)2.e0f;
192 161 equemene

193 161 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,1.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
194 161 equemene
}
195 161 equemene

196 161 equemene
MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
197 161 equemene
{
198 161 equemene
    private MYFLOAT4 x,v,a,xf,vf;
199 161 equemene

200 161 equemene
    x=(MYFLOAT4)clDataInX[gid];
201 161 equemene
    v=(MYFLOAT4)clDataInV[gid];
202 161 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
203 161 equemene

204 161 equemene
    for (int i=0;i<get_global_size(0);i++)
205 161 equemene
    {
206 161 equemene
        if (gid != i)
207 161 equemene
        a+=Interaction(x,clDataInX[i]);
208 161 equemene
    }
209 161 equemene

210 161 equemene
    vf=v+dt*a;
211 161 equemene
    xf=x+dt*vf;
212 161 equemene

213 161 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,1.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
214 161 equemene
}
215 161 equemene

216 161 equemene
MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
217 161 equemene
{
218 161 equemene
    MYFLOAT4 x,v,a,xf,vf;
219 161 equemene

220 161 equemene
    x=(MYFLOAT4)clDataInX[gid];
221 161 equemene
    v=(MYFLOAT4)clDataInV[gid];
222 161 equemene
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
223 161 equemene

224 161 equemene
    for (int i=0;i<get_global_size(0);i++)
225 161 equemene
    {
226 161 equemene
        if (gid != i)
227 161 equemene
        a+=Interaction(x,clDataInX[i]);
228 161 equemene
    }
229 161 equemene

230 161 equemene
    vf=v+dt*a;
231 161 equemene
    xf=x+dt*v;
232 161 equemene

233 161 equemene
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,1.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
234 161 equemene
}
235 161 equemene

236 161 equemene
__kernel void SplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box,
237 161 equemene
                             uint seed_z,uint seed_w)
238 161 equemene
{
239 162 equemene
    int gid=get_global_id(0);
240 162 equemene
    uint z=seed_z+gid;
241 161 equemene
    uint w=seed_w+gid;
242 162 equemene

243 162 equemene
    for (int i=0;i<gid;i++)
244 162 equemene
    {
245 162 equemene
        private MYFLOAT heat=MWCfp;
246 162 equemene
    }
247 162 equemene

248 162 equemene
    // Distribute in sphere
249 161 equemene
    MYFLOAT radius=MWCfp*box;
250 161 equemene
    MYFLOAT theta=MWCfp*PI;
251 161 equemene
    MYFLOAT phi=MWCfp*PI*(MYFLOAT)2.e0f;
252 162 equemene
    // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
253 162 equemene
    MYFLOAT sinTheta=sin((float)theta);
254 161 equemene
    clDataX[gid].s0=radius*sinTheta*cos((float)phi)/2.e0f;
255 161 equemene
    clDataX[gid].s1=radius*sinTheta*sin((float)phi)/2.e0f;
256 161 equemene
    clDataX[gid].s2=radius*cos((float)theta)/2.e0f;
257 162 equemene
    clDataX[gid].s3=(MYFLOAT)0.e0f;
258 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
259 161 equemene

260 162 equemene
    // Distribute in box
261 162 equemene
    // MYFLOAT x0=box*(MYFLOAT)(MWCfp-0.5);
262 162 equemene
    // MYFLOAT y0=box*(MYFLOAT)(MWCfp-0.5);
263 162 equemene
    // MYFLOAT z0=box*(MYFLOAT)(MWCfp-0.5);
264 162 equemene
    // clDataX[gid].s0123 = (MYFLOAT4) (x0,y0,z0,0.e0f);
265 161 equemene
}
266 161 equemene

267 161 equemene
__kernel void SplutterStress(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
268 161 equemene
{
269 161 equemene
    int gid = get_global_id(0);
270 161 equemene
    MYFLOAT N = (MYFLOAT)get_global_size(0);
271 161 equemene
    uint z=seed_z+(uint)gid;
272 161 equemene
    uint w=seed_w-(uint)gid;
273 161 equemene

274 161 equemene
    if (velocity<SMALL_NUM) {
275 161 equemene
       MYFLOAT4 SpeedVector=(MYFLOAT4)normalize(cross(clDataX[gid],clCoM[0]))*sqrt(-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f);
276 161 equemene
       clDataV[gid]=SpeedVector;
277 161 equemene
    }
278 161 equemene
    else
279 161 equemene
    {
280 161 equemene
       // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
281 161 equemene
       MYFLOAT theta=MWCfp*PI;
282 161 equemene
       MYFLOAT phi=MWCfp*PI*(MYFLOAT)2.e0f;
283 161 equemene
       MYFLOAT sinTheta=sin((float)theta);
284 161 equemene

285 161 equemene
       clDataV[gid].s0=velocity*sinTheta*cos((float)phi);
286 161 equemene
       clDataV[gid].s1=velocity*sinTheta*sin((float)phi);
287 161 equemene
       clDataV[gid].s2=velocity*cos((float)theta);
288 162 equemene
       clDataV[gid].s3=(MYFLOAT)0.e0f;
289 161 equemene
    }
290 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
291 161 equemene
}
292 161 equemene

293 161 equemene
__kernel void RungeKutta(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
294 161 equemene
{
295 161 equemene
    int gid = get_global_id(0);
296 161 equemene

297 161 equemene
    MYFLOAT8 clDataGid=AtomicRungeKutta(clDataX,clDataV,gid,h);
298 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
299 161 equemene
    clDataX[gid]=clDataGid.lo;
300 161 equemene
    clDataV[gid]=clDataGid.hi;
301 161 equemene
}
302 161 equemene

303 161 equemene
__kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
304 161 equemene
{
305 161 equemene
    int gid = get_global_id(0);
306 161 equemene

307 161 equemene
    MYFLOAT8 clDataGid=AtomicImplicitEuler(clDataX,clDataV,gid,h);
308 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
309 161 equemene
    clDataX[gid]=clDataGid.lo;
310 161 equemene
    clDataV[gid]=clDataGid.hi;
311 161 equemene
}
312 161 equemene

313 161 equemene
__kernel void Heun(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
314 161 equemene
{
315 161 equemene
    int gid = get_global_id(0);
316 161 equemene

317 161 equemene
    MYFLOAT8 clDataGid=AtomicHeun(clDataX,clDataV,gid,h);
318 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
319 161 equemene
    clDataX[gid]=clDataGid.lo;
320 161 equemene
    clDataV[gid]=clDataGid.hi;
321 161 equemene
}
322 161 equemene

323 161 equemene
__kernel void ExplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
324 161 equemene
{
325 161 equemene
    int gid = get_global_id(0);
326 161 equemene

327 161 equemene
    MYFLOAT8 clDataGid=AtomicExplicitEuler(clDataX,clDataV,gid,h);
328 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
329 161 equemene
    clDataX[gid]=clDataGid.lo;
330 161 equemene
    clDataV[gid]=clDataGid.hi;
331 161 equemene
}
332 161 equemene

333 161 equemene
__kernel void CoMPotential(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
334 161 equemene
{
335 161 equemene
    int gid = get_global_id(0);
336 161 equemene

337 161 equemene
    clPotential[gid]=PairPotential(clDataX[gid],clCoM[0]);
338 161 equemene
}
339 161 equemene

340 161 equemene
__kernel void Potential(__global MYFLOAT4* clDataX,__global MYFLOAT* clPotential)
341 161 equemene
{
342 161 equemene
    int gid = get_global_id(0);
343 161 equemene

344 161 equemene
    MYFLOAT potential=(MYFLOAT)0.e0f;
345 161 equemene
    MYFLOAT4 x=clDataX[gid];
346 161 equemene

347 161 equemene
    for (int i=0;i<get_global_size(0);i++)
348 161 equemene
    {
349 161 equemene
        if (gid != i)
350 161 equemene
        potential+=PairPotential(x,clDataX[i]);
351 161 equemene
    }
352 161 equemene

353 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
354 161 equemene
    clPotential[gid]=potential*(MYFLOAT)5.e-1f;
355 161 equemene
}
356 161 equemene

357 161 equemene
__kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int Size)
358 161 equemene
{
359 161 equemene
    MYFLOAT4 CoM=clDataX[0];
360 161 equemene

361 161 equemene
    for (int i=1;i<Size;i++)
362 161 equemene
    {
363 161 equemene
        CoM+=clDataX[i];
364 161 equemene
    }
365 161 equemene

366 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
367 161 equemene
    clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,1.e0f)/(MYFLOAT)Size;
368 161 equemene
}
369 161 equemene

370 161 equemene
__kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
371 161 equemene
{
372 161 equemene
    int gid = get_global_id(0);
373 161 equemene

374 161 equemene
    barrier(CLK_GLOBAL_MEM_FENCE);
375 161 equemene
    MYFLOAT d=(MYFLOAT)length(clDataV[gid]);
376 161 equemene
    clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)(d*d);
377 161 equemene
}
378 161 equemene
"""
379 161 equemene
380 161 equemene
def display(*args):
381 161 equemene
382 162 equemene
    global MyDataX,MyDataV,clDataX,clDataV,Step,Method,Number,Iterations
383 161 equemene
384 161 equemene
    glClearColor(0.0, 0.0, 0.0, 0.0)
385 161 equemene
    glClear(GL_COLOR_BUFFER_BIT)
386 162 equemene
    glColor3f(1.0,1.0,1.0)
387 162 equemene
388 161 equemene
    time_start=time.time()
389 161 equemene
    if Method=="RungeKutta":
390 161 equemene
        CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clDataX,clDataV,Step)
391 161 equemene
    elif Method=="ExplicitEuler":
392 161 equemene
        CLLaunch=MyRoutines.ExplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
393 161 equemene
    elif Method=="Heun":
394 161 equemene
        CLLaunch=MyRoutines.Heun(queue,(Number,1),None,clDataX,clDataV,Step)
395 161 equemene
    else:
396 161 equemene
        CLLaunch=MyRoutines.ImplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
397 161 equemene
    CLLaunch.wait()
398 162 equemene
    print("Duration of #%s iteration: %s" % (Iterations,(time.time()-time_start)))
399 161 equemene
400 161 equemene
    cl.enqueue_copy(queue, MyDataX, clDataX)
401 162 equemene
    # print(MyDataX.reshape(Number,4))
402 162 equemene
    MyDataX.reshape(Number,4)[:,3]=1
403 161 equemene
    glVertexPointerf(MyDataX.reshape(Number,4))
404 162 equemene
    # cl.enqueue_copy(queue, MyDataV, clDataV)
405 162 equemene
    # print(MyDataV.reshape(Number,4))
406 162 equemene
    # MyDataV.reshape(Number,4)[:,3]=1
407 162 equemene
    # glVertexPointerf(MyDataV.reshape(Number,4))
408 161 equemene
    glEnableClientState(GL_VERTEX_ARRAY)
409 161 equemene
    glDrawArrays(GL_POINTS, 0, Number)
410 161 equemene
    glDisableClientState(GL_VERTEX_ARRAY)
411 161 equemene
    glFlush()
412 162 equemene
    Iterations+=1
413 161 equemene
    glutSwapBuffers()
414 161 equemene
415 161 equemene
def halt():
416 161 equemene
    pass
417 161 equemene
418 162 equemene
def keyboard(k, x, y):
419 162 equemene
    global view_rotz
420 162 equemene
    LC_Z = as_8_bit( 'z' )
421 162 equemene
    UC_Z = as_8_bit( 'Z' )
422 161 equemene
423 162 equemene
    if k == LC_Z:
424 162 equemene
        view_rotz += 1.0
425 162 equemene
    elif k == UC_Z:
426 162 equemene
        view_rotz -= 1.0
427 162 equemene
    elif ord(k) == 27: # Escape
428 162 equemene
        glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE,GLUT_ACTION_CONTINUE_EXECUTION)
429 162 equemene
        glutSetOption(GLUT_ACTION_GLUTMAINLOOP_RETURNS,GLUT_ACTION_CONTINUE_EXECUTION)
430 162 equemene
        glutLeaveMainLoop()
431 162 equemene
        return(False)
432 162 equemene
    else:
433 162 equemene
        return
434 162 equemene
    glRotatef(view_rotz, 0.0, 0.0, 1.0)
435 162 equemene
    glutPostRedisplay()
436 162 equemene
437 162 equemene
def special(k, x, y):
438 162 equemene
    global view_rotx, view_roty, view_rotz
439 162 equemene
440 162 equemene
    if k == GLUT_KEY_UP:
441 162 equemene
        view_rotx += 1.0
442 162 equemene
    elif k == GLUT_KEY_DOWN:
443 162 equemene
        view_rotx -= 1.0
444 162 equemene
    elif k == GLUT_KEY_LEFT:
445 162 equemene
        view_roty += 1.0
446 162 equemene
    elif k == GLUT_KEY_RIGHT:
447 162 equemene
        view_roty -= 1.0
448 162 equemene
    else:
449 162 equemene
        return
450 162 equemene
    glRotatef(view_rotx, 1.0, 0.0, 0.0)
451 162 equemene
    glRotatef(view_roty, 0.0, 1.0, 0.0)
452 162 equemene
    glutPostRedisplay()
453 162 equemene
454 161 equemene
def mouse(button, state, x, y):
455 161 equemene
    global angle, delta_angle, halted
456 161 equemene
    if button == GLUT_LEFT_BUTTON:
457 161 equemene
        angle = angle + delta_angle
458 161 equemene
    elif button == GLUT_RIGHT_BUTTON:
459 161 equemene
        angle = angle - delta_angle
460 161 equemene
    elif button == GLUT_MIDDLE_BUTTON and state == GLUT_DOWN:
461 161 equemene
        if halted:
462 161 equemene
            glutIdleFunc(display)
463 161 equemene
            halted = 0
464 161 equemene
        else:
465 161 equemene
            glutIdleFunc(halt)
466 161 equemene
            halted = 1
467 161 equemene
468 161 equemene
def setup_viewport():
469 161 equemene
    global SizeOfBox
470 161 equemene
    glMatrixMode(GL_PROJECTION)
471 161 equemene
    glLoadIdentity()
472 161 equemene
    glOrtho(-SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox)
473 162 equemene
474 161 equemene
def reshape(w, h):
475 161 equemene
    glViewport(0, 0, w, h)
476 161 equemene
    setup_viewport()
477 161 equemene
478 161 equemene
if __name__=='__main__':
479 161 equemene
480 161 equemene
    global Number,Step,clDataX,clDataV,MyDataX,MyDataV,Method,SizeOfBox
481 161 equemene
482 161 equemene
    # ValueType
483 161 equemene
    ValueType='FP32'
484 161 equemene
    class MyFloat(np.float32):pass
485 161 equemene
    #    clType8=cl_array.vec.float8
486 161 equemene
    clType4=cl_array.vec.float4
487 161 equemene
    # Set defaults values
488 161 equemene
    np.set_printoptions(precision=2)
489 161 equemene
    # Id of Device : 1 is for first find !
490 161 equemene
    Device=0
491 161 equemene
    # Iterations is integer
492 161 equemene
    Number=2
493 161 equemene
    # Size of box
494 161 equemene
    SizeOfBox=MyFloat(1.)
495 161 equemene
    # Initial velocity of particules
496 161 equemene
    Velocity=MyFloat(1.)
497 161 equemene
    # Redo the last process
498 161 equemene
    Iterations=int(np.pi*1024)
499 161 equemene
    # Step
500 161 equemene
    Step=MyFloat(1./256)
501 161 equemene
    # Method of integration
502 161 equemene
    Method='ImplicitEuler'
503 161 equemene
    # InitialRandom
504 161 equemene
    InitialRandom=False
505 161 equemene
    # RNG Marsaglia Method
506 161 equemene
    RNG='MWC'
507 161 equemene
    # CheckEnergies
508 161 equemene
    CheckEnergies=False
509 161 equemene
    # Display samples in 3D
510 161 equemene
    GraphSamples=False
511 161 equemene
    # Viriel Distribution of stress
512 161 equemene
    VirielStress=True
513 161 equemene
514 161 equemene
    HowToUse='%s -h [Help] -r [InitialRandom] -e [VirielStress] -g [GraphSamples] -c [CheckEnergies] -d <DeviceId> -n <NumberOfParticules> -z <SizeOfBox> -v <Velocity> -s <Step> -i <Iterations> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
515 161 equemene
516 161 equemene
    try:
517 161 equemene
        opts, args = getopt.getopt(sys.argv[1:],"rehgcd:n:z:v:i:s:m:t:",["random","viriel","graph","check","device=","number=","size=","velocity=","iterations=","step=","method=","valuetype="])
518 161 equemene
    except getopt.GetoptError:
519 161 equemene
        print(HowToUse % sys.argv[0])
520 161 equemene
        sys.exit(2)
521 161 equemene
522 161 equemene
    for opt, arg in opts:
523 161 equemene
        if opt == '-h':
524 161 equemene
            print(HowToUse % sys.argv[0])
525 161 equemene
526 161 equemene
            print("\nInformations about devices detected under OpenCL:")
527 161 equemene
            try:
528 161 equemene
                Id=0
529 161 equemene
                for platform in cl.get_platforms():
530 161 equemene
                    for device in platform.get_devices():
531 161 equemene
                        #deviceType=cl.device_type.to_string(device.type)
532 161 equemene
                        deviceType="xPU"
533 161 equemene
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
534 161 equemene
                        Id=Id+1
535 161 equemene
                sys.exit()
536 161 equemene
            except ImportError:
537 161 equemene
                print("Your platform does not seem to support OpenCL")
538 161 equemene
                sys.exit()
539 161 equemene
540 161 equemene
        elif opt in ("-t", "--valuetype"):
541 161 equemene
            if arg=='FP64':
542 161 equemene
                class MyFloat(np.float64): pass
543 161 equemene
                clType4=cl_array.vec.double4
544 161 equemene
            else:
545 161 equemene
                class MyFloat(np.float32):pass
546 161 equemene
                clType4=cl_array.vec.float4
547 161 equemene
            ValueType = arg
548 161 equemene
        elif opt in ("-d", "--device"):
549 161 equemene
            Device=int(arg)
550 161 equemene
        elif opt in ("-m", "--method"):
551 161 equemene
            Method=arg
552 161 equemene
        elif opt in ("-n", "--number"):
553 161 equemene
            Number=int(arg)
554 161 equemene
        elif opt in ("-z", "--size"):
555 161 equemene
            SizeOfBox=MyFloat(arg)
556 161 equemene
        elif opt in ("-v", "--velocity"):
557 161 equemene
            Velocity=MyFloat(arg)
558 161 equemene
            VirielStress=False
559 161 equemene
        elif opt in ("-s", "--step"):
560 161 equemene
            Step=MyFloat(arg)
561 161 equemene
        elif opt in ("-i", "--iterations"):
562 161 equemene
            Iterations=int(arg)
563 161 equemene
        elif opt in ("-r", "--random"):
564 161 equemene
            InitialRandom=True
565 161 equemene
        elif opt in ("-c", "--check"):
566 161 equemene
            CheckEnergies=True
567 161 equemene
        elif opt in ("-g", "--graph"):
568 161 equemene
            GraphSamples=True
569 161 equemene
        elif opt in ("-e", "--viriel"):
570 161 equemene
            VirielStress=True
571 161 equemene
572 161 equemene
    SizeOfBox=MyFloat(SizeOfBox*Number)
573 161 equemene
    Velocity=MyFloat(Velocity)
574 161 equemene
    Step=MyFloat(Step)
575 161 equemene
576 161 equemene
    print("Device choosed : %s" % Device)
577 161 equemene
    print("Number of particules : %s" % Number)
578 161 equemene
    print("Size of Box : %s" % SizeOfBox)
579 161 equemene
    print("Initial velocity : %s" % Velocity)
580 161 equemene
    print("Number of iterations : %s" % Iterations)
581 161 equemene
    print("Step of iteration : %s" % Step)
582 161 equemene
    print("Method of resolution : %s" % Method)
583 161 equemene
    print("Initial Random for RNG Seed : %s" % InitialRandom)
584 161 equemene
    print("Check for Energies : %s" % CheckEnergies)
585 161 equemene
    print("Graph for Samples : %s" % GraphSamples)
586 161 equemene
    print("ValueType is : %s" % ValueType)
587 161 equemene
    print("Viriel distribution of stress %s" % VirielStress)
588 161 equemene
589 161 equemene
    # Create Numpy array of CL vector with 8 FP32
590 161 equemene
    MyCoM = np.zeros(4,dtype=MyFloat)
591 161 equemene
    MyDataX = np.zeros(Number*4, dtype=MyFloat)
592 161 equemene
    MyDataV = np.zeros(Number*4, dtype=MyFloat)
593 161 equemene
    # MyCoM = np.zeros(1,dtype=clType4)
594 161 equemene
    # MyDataX = np.zeros(Number, dtype=clType4)
595 161 equemene
    # MyDataV = np.zeros(Number, dtype=clType4)
596 161 equemene
    MyPotential = np.zeros(Number, dtype=MyFloat)
597 161 equemene
    MyKinetic = np.zeros(Number, dtype=MyFloat)
598 161 equemene
599 161 equemene
    Marsaglia,Computing=DictionariesAPI()
600 161 equemene
601 161 equemene
    # Scan the OpenCL arrays
602 161 equemene
    Id=0
603 161 equemene
    HasXPU=False
604 161 equemene
    for platform in cl.get_platforms():
605 161 equemene
        for device in platform.get_devices():
606 161 equemene
            if Id==Device:
607 161 equemene
                PlatForm=platform
608 161 equemene
                XPU=device
609 161 equemene
                print("CPU/GPU selected: ",device.name.lstrip())
610 161 equemene
                print("Platform selected: ",platform.name)
611 161 equemene
                HasXPU=True
612 161 equemene
            Id+=1
613 161 equemene
614 161 equemene
    if HasXPU==False:
615 161 equemene
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
616 161 equemene
        sys.exit()
617 161 equemene
618 161 equemene
    # Create Context
619 161 equemene
    try:
620 161 equemene
        ctx = cl.Context([XPU])
621 161 equemene
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
622 161 equemene
    except:
623 161 equemene
        print("Crash during context creation")
624 161 equemene
625 161 equemene
    print(Marsaglia[RNG],Computing[ValueType])
626 161 equemene
    # Build all routines used for the computing
627 161 equemene
    BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
628 161 equemene
629 162 equemene
    if 'Intel' in PlatForm.name or 'Experimental' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name :
630 161 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
631 161 equemene
    else:
632 161 equemene
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions+" -cl-strict-aliasing")
633 161 equemene
634 161 equemene
    mf = cl.mem_flags
635 162 equemene
    # Read/Write approach for buffering
636 161 equemene
    # clDataX = cl.Buffer(ctx, mf.READ_WRITE, MyDataX.nbytes)
637 161 equemene
    # clDataV = cl.Buffer(ctx, mf.READ_WRITE, MyDataV.nbytes)
638 161 equemene
    # clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
639 161 equemene
    # clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
640 161 equemene
    # clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
641 161 equemene
642 162 equemene
    # Write/HostPoniter approach for buffering
643 161 equemene
    clDataX = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataX)
644 161 equemene
    clDataV = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataV)
645 161 equemene
    clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential)
646 161 equemene
    clKinetic = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyKinetic)
647 161 equemene
    clCoM = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyCoM)
648 161 equemene
649 161 equemene
    print('All particles superimposed.')
650 161 equemene
651 161 equemene
    # Set particles to RNG points
652 161 equemene
    if InitialRandom:
653 161 equemene
        MyRoutines.SplutterPoints(queue,(Number,1),None,clDataX,SizeOfBox,np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32)))
654 161 equemene
    else:
655 161 equemene
        MyRoutines.SplutterPoints(queue,(Number,1),None,clDataX,SizeOfBox,np.uint32(110271),np.uint32(250173))
656 161 equemene
657 161 equemene
    print('All particules distributed')
658 161 equemene
659 161 equemene
    CLLaunch=MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
660 161 equemene
    CLLaunch.wait()
661 161 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
662 161 equemene
    print('Center Of Mass: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
663 161 equemene
664 161 equemene
    if VirielStress:
665 161 equemene
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,MyFloat(0.),np.uint32(110271),np.uint32(250173))
666 161 equemene
    else:
667 161 equemene
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,Velocity,np.uint32(110271),np.uint32(250173))
668 161 equemene
    CLLaunch.wait()
669 161 equemene
670 161 equemene
    if GraphSamples:
671 161 equemene
        cl.enqueue_copy(queue, MyDataX, clDataX)
672 161 equemene
        # t0=np.array([[MyDataX[0][0],MyDataX[0][1],MyDataX[0][2]]])
673 161 equemene
        # t1=np.array([[MyDataX[1][0],MyDataX[1][1],MyDataX[1][2]]])
674 161 equemene
        # tL=np.array([[MyDataX[-1][0],MyDataX[-1][1],MyDataX[-1][2]]])
675 161 equemene
676 161 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
677 161 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
678 161 equemene
    CLLaunch.wait()
679 161 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
680 161 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
681 161 equemene
    print('Viriel=%s Potential=%s Kinetic=%s'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
682 161 equemene
683 161 equemene
    if GraphSamples:
684 161 equemene
        cl.enqueue_copy(queue, MyDataX, clDataX)
685 161 equemene
        # t0=np.array([[MyDataX[0][0],MyDataX[0][1],MyDataX[0][2]]])
686 161 equemene
        # t1=np.array([[MyDataX[1][0],MyDataX[1][1],MyDataX[1][2]]])
687 161 equemene
        # tL=np.array([[MyDataX[-1][0],MyDataX[-1][1],MyDataX[-1][2]]])
688 161 equemene
689 162 equemene
    wall_time_start=time.time()
690 162 equemene
691 161 equemene
    print("Use the mouse buttons to control some of the dots.")
692 161 equemene
    print("Hit any key to quit.")
693 162 equemene
    global view_rotx,view_roty,view_rotz,Iterations
694 162 equemene
    (view_rotx,view_roty,view_rotz)=(0.0, 0.0, 0.0)
695 162 equemene
    Iterations=0
696 162 equemene
    glutInit(sys.argv)
697 162 equemene
    glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB)
698 162 equemene
    glutInitWindowSize(512,512)
699 162 equemene
    glutCreateWindow(b'NBodyGL')
700 162 equemene
    setup_viewport()
701 162 equemene
    glutReshapeFunc(reshape)
702 162 equemene
    glutDisplayFunc(display)
703 162 equemene
    glutIdleFunc(display)
704 162 equemene
#   glutMouseFunc(mouse)
705 162 equemene
    glutSpecialFunc(special)
706 162 equemene
    Loop=glutKeyboardFunc(keyboard)
707 162 equemene
    glutMainLoop()
708 161 equemene
709 162 equemene
    print("\nWall Duration on %s for each %s\n" % (Device,(time.time()-wall_time_start)/Iterations))
710 162 equemene
711 161 equemene
    MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
712 161 equemene
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
713 161 equemene
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
714 161 equemene
    CLLaunch.wait()
715 161 equemene
    cl.enqueue_copy(queue,MyCoM,clCoM)
716 161 equemene
    cl.enqueue_copy(queue,MyPotential,clPotential)
717 161 equemene
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
718 161 equemene
    print('Center Of Mass: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
719 161 equemene
    print('Viriel=%s Potential=%s Kinetic=%s'% (np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
720 161 equemene
721 161 equemene
    if GraphSamples:
722 161 equemene
        t0=np.transpose(np.reshape(t0,(Iterations+1,3)))
723 161 equemene
        t1=np.transpose(np.reshape(t1,(Iterations+1,3)))
724 161 equemene
        tL=np.transpose(np.reshape(tL,(Iterations+1,3)))
725 161 equemene
726 161 equemene
        import matplotlib.pyplot as plt
727 161 equemene
        from mpl_toolkits.mplot3d import Axes3D
728 161 equemene
729 161 equemene
        fig = plt.figure()
730 161 equemene
        ax = fig.gca(projection='3d')
731 161 equemene
        ax.scatter(t0[0],t0[1],t0[2], marker='^',color='blue')
732 161 equemene
        ax.scatter(t1[0],t1[1],t1[2], marker='o',color='red')
733 161 equemene
        ax.scatter(tL[0],tL[1],tL[2], marker='D',color='green')
734 161 equemene
735 161 equemene
        ax.set_xlabel('X Label')
736 161 equemene
        ax.set_ylabel('Y Label')
737 161 equemene
        ax.set_zlabel('Z Label')
738 161 equemene
739 161 equemene
        plt.show()
740 161 equemene
741 161 equemene
    clDataX.release()
742 161 equemene
    clDataV.release()
743 161 equemene
    clKinetic.release()
744 161 equemene
    clPotential.release()