Statistiques
| Révision :

root / NBody / NBody.py @ 189

Historique | Voir | Annoter | Télécharger (29,02 ko)

1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
NBody Demonstrator implemented in OpenCL, rendering OpenGL
5

6
CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com> 
7
Cecill v2 : Emmanuel QUEMENER <emmanuel.quemener@gmail.com>
8

9
Thanks to Andreas Klockner for PyOpenCL:
10
http://mathema.tician.de/software/pyopencl
11
 
12
"""
13
import getopt
14
import sys
15
import time
16
import numpy as np
17
import pyopencl as cl
18
import pyopencl.array as cl_array
19
from numpy.random import randint as nprnd
20
import string, sys
21
from OpenGL.GL import *
22
from OpenGL.GLUT import *
23

    
24
def DictionariesAPI():
25
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
26
    Computing={'FP32':0,'FP64':1}
27
    Interaction={'Force':0,'Potential':1}
28
    Artevasion={'None':0,'NegExp':1,'CorRad':2}
29
    return(Marsaglia,Computing,Interaction,Artevasion)
30

    
31
BlobOpenCL= """
32
#define TFP32 0
33
#define TFP64 1
34

35
#define TFORCE 0
36
#define TPOTENTIAL 1
37

38
#define NONE 0
39
#define NEGEXP 1
40
#define CORRAD 2
41

42
#if TYPE == TFP32
43
#define MYFLOAT4 float4
44
#define MYFLOAT8 float8
45
#define MYFLOAT float
46
#define DISTANCE fast_distance
47
#else
48
#define MYFLOAT4 double4
49
#define MYFLOAT8 double8
50
#define MYFLOAT double
51
#define DISTANCE distance
52
#if defined(cl_khr_fp64)  // Khronos extension available?
53
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
54
#endif
55
#endif
56

57
#define znew  ((zmwc=36969*(zmwc&65535)+(zmwc>>16))<<16)
58
#define wnew  ((wmwc=18000*(wmwc&65535)+(wmwc>>16))&65535)
59
#define MWC   (znew+wnew)
60
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
61
#define CONG  (jcong=69069*jcong+1234567)
62
#define KISS  ((MWC^CONG)+SHR3)
63

64
#define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f)
65
#define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f)
66
#define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f)
67
#define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f)
68

69
#define PI (MYFLOAT)3.141592653589793238e0f
70

71
#define SMALL_NUM (MYFLOAT)1.e-9f
72

73
#define CoreRadius (MYFLOAT)(1.e0f)
74

75
// Create my own Distance implementation: distance buggy on Oland AMD chipset
76

77
MYFLOAT MyDistance(MYFLOAT4 n,MYFLOAT4 m)
78
{
79
    private MYFLOAT x2,y2,z2;
80
    x2=n.s0-m.s0;
81
    x2*=x2;
82
    y2=n.s1-m.s1;
83
    y2*=y2;
84
    z2=n.s2-m.s2;
85
    z2*=z2;
86
    return(sqrt(x2+y2+z2));
87
}
88

89
// Potential between 2 m,n bodies
90
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
91
#if ARTEVASION == NEGEXP
92
// Add exp(-r) to numerator to avoid divergence for low distances
93
{
94
    MYFLOAT r=DISTANCE(n,m);
95
    return((-1.e0f+exp(-r))/r);
96
}
97
#elif ARTEVASION == CORRAD
98
// Add Core Radius to avoid divergence for low distances
99
{
100
    MYFLOAT r=DISTANCE(n,m);
101
    return(-1.e0f/sqrt(r*r+CoreRadius*CoreRadius));
102
}
103
#else
104
// Classical potential in 1/r
105
{
106
//    return((MYFLOAT)(-1.e0f)/(MyDistance(m,n)));
107
    return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
108
}
109
#endif
110

111
// Interaction based of Force as gradient of Potential
112
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
113
#if INTERACTION == TFORCE
114
#if ARTEVASION == NEGEXP
115
// Force gradient of potential, set as (1-exp(-r))/r 
116
{
117
    private MYFLOAT r=MyDistance(n,m);
118
    private MYFLOAT num=1.e0f+exp(-r)*(r-1.e0f);
119
    return((n-m)*num/(MYFLOAT)(r*r*r));
120
}
121
#elif ARTEVASION == CORRAD
122
// Force gradient of potential, (Core Radius) set as 1/sqrt(r**2+CoreRadius**2) 
123
{
124
    private MYFLOAT r=MyDistance(n,m);
125
    private MYFLOAT den=sqrt(r*r+CoreRadius*CoreRadius);
126
    return((n-m)/(MYFLOAT)(den*den*den));
127
}
128
#else
129
// Simplest implementation of force (equals to acceleration)
130
// seems to bo bad (numerous artevasions)
131
// MYFLOAT4 InteractionForce(MYFLOAT4 m,MYFLOAT4 n)
132
{
133
    private MYFLOAT r=MyDistance(n,m);
134
    return((n-m)/(MYFLOAT)(r*r*r));
135
}
136
#endif
137
#else
138
// Force definited as gradient of potential
139
// Estimate potential and proximate potential to estimate force
140
{
141
    // 1/1024 seems to be a good factor: larger one provides bad results
142
    private MYFLOAT epsilon=(MYFLOAT)(1.e0f/1024);
143
    private MYFLOAT4 er=normalize(n-m);
144
    private MYFLOAT4 dr=er*(MYFLOAT)epsilon;
145

146
    return(er/epsilon*(PairPotential(m,n)-PairPotential(m+dr,n)));
147
}
148
#endif
149

150
MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
151
{
152
    private MYFLOAT potential=(MYFLOAT)0.e0f;
153
    private MYFLOAT4 x=clDataX[gid]; 
154
    
155
    for (int i=0;i<get_global_size(0);i++)
156
    {
157
        if (gid != i)
158
        potential+=PairPotential(x,clDataX[i]);
159
    }
160

161
    barrier(CLK_GLOBAL_MEM_FENCE);
162
    return(potential);
163
}
164

165
MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid)
166
{
167
    return(PairPotential(clDataX[gid],clCoM[0]));
168
}
169

170
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
171

172
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
173
{
174
    private MYFLOAT4 a0,v0,x0,a1,v1,x1,a2,v2,x2,a3,v3,x3,a4,v4,x4,xf,vf;
175
    MYFLOAT4 DT=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
176

177
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
178
    v0=(MYFLOAT4)clDataInV[gid];
179
    x0=(MYFLOAT4)clDataInX[gid];
180
    int N = get_global_size(0);    
181
    
182
    for (private int i=0;i<N;i++)
183
    {
184
        if (gid != i)
185
        a0+=Interaction(x0,clDataInX[i]);
186
    }
187
        
188
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
189
    v1=a0*dt+v0;
190
    x1=v0*dt+x0;
191
    for (private int j=0;j<N;j++)
192
    {
193
        if (gid != j)
194
        a1+=Interaction(x1,clDataInX[j]);
195
    }
196

197
    a2=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
198
    v2=a1*(MYFLOAT)(dt/2.e0f)+v0;
199
    x2=v1*(MYFLOAT)(dt/2.e0f)+x0;
200
    for (private int k=0;k<N;k++)
201
    {
202
        if (gid != k)
203
        a2+=Interaction(x2,clDataInX[k]);
204
    }
205
    
206
    a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
207
    v3=a2*(MYFLOAT)(dt/2.e0f)+v0;
208
    x3=v2*(MYFLOAT)(dt/2.e0f)+x0;
209
    for (private int l=0;l<N;l++)
210
    {
211
        if (gid != l)
212
        a3+=Interaction(x3,clDataInX[l]);
213
    }
214
    
215
    a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
216
    v4=a3*dt+v0;
217
    x4=v3*dt+x0;
218
    for (private int m=0;m<N;m++)
219
    {
220
        if (gid != m)
221
        a4+=Interaction(x4,clDataInX[m]);
222
    }
223
    
224
    xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
225
    vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
226
     
227
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
228
}
229

230
MYFLOAT8 AtomicHeun(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
231
{
232
    private MYFLOAT4 x0,v0,a0,x1,v1,a1,xf,vf;
233
    MYFLOAT4 Dt=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
234

235
    x0=(MYFLOAT4)clDataInX[gid];
236
    v0=(MYFLOAT4)clDataInV[gid];
237
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
238

239
    for (private int i=0;i<get_global_size(0);i++)
240
    {
241
        if (gid != i)
242
        a0+=Interaction(x0,clDataInX[i]);
243
    }
244

245
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
246
    //v1=v0+dt*a0;
247
    //x1=x0+dt*v0;
248
    v1=dt*a0+v0;
249
    x1=dt*v0+x0;
250

251
    for (private int j=0;j<get_global_size(0);j++)
252
    {
253
        if (gid != j)
254
        a1+=Interaction(x1,clDataInX[j]);
255
    }
256

257
    vf=v0+dt*(a0+a1)/(MYFLOAT)2.e0f;
258
    xf=x0+dt*(v0+v1)/(MYFLOAT)2.e0f;
259

260
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
261
}
262

263
MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
264
{
265
    MYFLOAT4 x0,v0,a,xf,vf;
266

267
    x0=(MYFLOAT4)clDataInX[gid];
268
    v0=(MYFLOAT4)clDataInV[gid];
269
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
270

271
    for (private int i=0;i<get_global_size(0);i++)
272
    {
273
        if (gid != i)
274
          a+=Interaction(x0,clDataInX[i]);
275
    }
276
       
277
    vf=v0+dt*a;
278
    xf=x0+dt*vf;
279

280
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
281
}
282

283
MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
284
{
285
    MYFLOAT4 x0,v0,a,xf,vf;
286

287
    x0=(MYFLOAT4)clDataInX[gid];
288
    v0=(MYFLOAT4)clDataInV[gid];
289
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
290

291
    for (private int i=0;i<get_global_size(0);i++)
292
    {
293
        if (gid != i)
294
        a+=Interaction(x0,clDataInX[i]);
295
    }
296
       
297
    vf=v0+dt*a;
298
    xf=x0+dt*v0;
299
 
300
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
301
}
302

303
__kernel void InBallSplutterPoints(__global MYFLOAT4* clDataX, 
304
                                   MYFLOAT diameter,uint seed_z,uint seed_w)
305
{
306
    private int gid=get_global_id(0);
307
    private uint zmwc=seed_z+gid;
308
    private uint wmwc=seed_w+(gid+1)%2;
309
    private MYFLOAT Heat;
310
 
311
    for (int i=0;i<gid;i++)
312
    {
313
        Heat=MWCfp;
314
    }
315

316
// More accurate distribution based on spherical coordonates
317
// Disactivated because of AMD Oland GPU crash on launch
318
//     private MYFLOAT Radius,Theta,Phi,PosX,PosY,PosZ,SinTheta;
319
//     Radius=MWCfp*diameter/2.e0f;
320
//     Theta=(MYFLOAT)acos((float)(-2.e0f*MWCfp+1.0e0f));
321
//     Phi=(MYFLOAT)(2.e0f*PI*MWCfp);
322
//     SinTheta=sin((float)Theta);
323
//     PosX=cos((float)Phi)*Radius*SinTheta;
324
//     PosY=sin((float)Phi)*Radius*SinTheta;
325
//     PosZ=cos((float)Theta)*Radius;
326
//     clDataX[gid]=(MYFLOAT4)(PosX,PosY,PosZ,0.e0f);
327

328
    private MYFLOAT Radius=diameter/2.e0f;
329
    private MYFLOAT Length=diameter;
330
    private MYFLOAT4 Position;
331
    while (Length>Radius) {
332
       Position=(MYFLOAT4)((MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,0.e0f);
333
       Length=(MYFLOAT)length((MYFLOAT4)Position);
334
    }
335

336
    clDataX[gid]=Position;    
337

338
    barrier(CLK_GLOBAL_MEM_FENCE);
339
}
340

341
__kernel void InBoxSplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box, 
342
                             uint seed_z,uint seed_w)
343
{
344
    int gid=get_global_id(0);
345
    uint zmwc=seed_z+gid;
346
    uint wmwc=seed_w-gid;
347
    private MYFLOAT Heat;
348
 
349
    for (int i=0;i<gid;i++)
350
    {
351
        Heat=MWCfp;
352
    }
353

354
    clDataX[gid]=(MYFLOAT4)((MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,0.e0f);
355

356
    barrier(CLK_GLOBAL_MEM_FENCE);
357
}
358

359
__kernel void SplutterStress(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
360
{
361
    int gid = get_global_id(0);
362
    MYFLOAT N = (MYFLOAT)get_global_size(0);
363
    uint zmwc=seed_z+(uint)gid;
364
    uint wmwc=seed_w-(uint)gid;
365
    MYFLOAT4 CrossVector,SpeedVector,FromCoM;
366
    MYFLOAT Heat,ThetaA,PhiA,ThetaB,PhiB,Length,tA,tB,Polar;
367

368
    for (int i=0;i<gid;i++)
369
    {
370
        Heat=MWCfp;
371
    }
372

373
    // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
374
    // Implemention on AMD Oland are probably broken in float
375

376
    FromCoM=(MYFLOAT4)(clDataX[gid]-clCoM[0]);
377
    Length=length(FromCoM);
378
    //Theta=acos(FromCoM.z/Length);
379
    //Phi=atan(FromCoM.y/FromCoM.x);
380
    // First tangential vector to sphere of length radius
381
    ThetaA=acos(FromCoM.x/Length)+5.e-1f*PI;
382
    PhiA=atan(FromCoM.y/FromCoM.z);
383
    // Second tangential vector to sphere of length radius
384
    ThetaB=acos((float)(FromCoM.x/Length));
385
    PhiB=atan((float)(FromCoM.y/FromCoM.z))+5.e-1f*PI;
386
    // (x,y) random coordonates to plane tangential to sphere
387
    Polar=MWCfp*2.e0f*PI;
388
    tA=cos((float)Polar);
389
    tB=sin((float)Polar);
390

391
    // Exception for 2 particules to ovoid shifting
392
    if (get_global_size(0)==2) {
393
       CrossVector=(MYFLOAT4)(1.e0f,1.e0f,1.e0f,0.e0f);
394
    } else {
395
       CrossVector.s0=tA*cos((float)ThetaA)+tB*cos((float)ThetaB);
396
       CrossVector.s1=tA*sin((float)ThetaA)*sin((float)PhiA)+tB*sin((float)ThetaB)*sin((float)PhiB);
397
       CrossVector.s2=tA*sin((float)ThetaA)*cos((float)PhiA)+tB*sin((float)ThetaB)*cos((float)PhiB);
398
       CrossVector.s3=0.e0f;
399
    }
400

401
    if (velocity<SMALL_NUM) {
402
       SpeedVector=(MYFLOAT4)normalize(cross(FromCoM,CrossVector))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
403
    }
404
    else
405
    {
406

407
       SpeedVector=(MYFLOAT4)((MWCfp-5e-1f)*velocity,(MWCfp-5e-1f)*velocity,
408
                              (MWCfp-5e-1f)*velocity,0.e0f);
409
    }
410
    clDataV[gid]=SpeedVector;
411
    barrier(CLK_GLOBAL_MEM_FENCE);
412
}
413

414
__kernel void RungeKutta(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
415
{
416
    private int gid = get_global_id(0);
417
    private MYFLOAT8 clDataGid;
418
    
419
    clDataGid=AtomicRungeKutta(clDataX,clDataV,gid,h);
420
    barrier(CLK_GLOBAL_MEM_FENCE);
421
    clDataX[gid]=clDataGid.s0123;
422
    clDataV[gid]=clDataGid.s4567;
423
}
424

425
__kernel void Heun(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
426
{
427
    private int gid = get_global_id(0);
428
    private MYFLOAT8 clDataGid;
429
    
430
    clDataGid=AtomicHeun(clDataX,clDataV,gid,h);
431
    barrier(CLK_GLOBAL_MEM_FENCE);
432
    clDataX[gid]=clDataGid.s0123;
433
    clDataV[gid]=clDataGid.s4567;
434
}
435

436
__kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
437
{
438
    private int gid = get_global_id(0);
439
    private MYFLOAT8 clDataGid;
440
    
441
    clDataGid=AtomicImplicitEuler(clDataX,clDataV,gid,h);
442
    barrier(CLK_GLOBAL_MEM_FENCE);
443
    clDataX[gid]=clDataGid.s0123;
444
    clDataV[gid]=clDataGid.s4567;
445
}
446

447
__kernel void ExplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
448
{
449
    private int gid = get_global_id(0);
450
    private MYFLOAT8 clDataGid;    
451

452
    clDataGid=AtomicExplicitEuler(clDataX,clDataV,gid,h);
453
    barrier(CLK_GLOBAL_MEM_FENCE);
454
    clDataX[gid]=clDataGid.s0123;
455
    clDataV[gid]=clDataGid.s4567;
456
}
457

458
__kernel void CoMPotential(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
459
{
460
    int gid = get_global_id(0);
461

462
    clPotential[gid]=PairPotential(clDataX[gid],clCoM[0]);
463
}
464

465
__kernel void Potential(__global MYFLOAT4* clDataX,__global MYFLOAT* clPotential)
466
{
467
    int gid = get_global_id(0);
468

469
    MYFLOAT potential=(MYFLOAT)0.e0f;
470
    MYFLOAT4 x=clDataX[gid]; 
471
    
472
    for (int i=0;i<get_global_size(0);i++)
473
    {
474
        if (gid != i)
475
        potential+=PairPotential(x,clDataX[i]);
476
    }
477
                 
478
    barrier(CLK_GLOBAL_MEM_FENCE);
479
    clPotential[gid]=potential*(MYFLOAT)5.e-1f;
480
}
481

482
__kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int Size)
483
{
484
    MYFLOAT4 CoM=clDataX[0]; 
485

486
    for (int i=1;i<Size;i++)
487
    {
488
        CoM+=clDataX[i];
489
    }
490

491
    barrier(CLK_GLOBAL_MEM_FENCE);
492
    clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,0.e0f)/(MYFLOAT)Size;
493
}
494

495
__kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
496
{
497
    int gid = get_global_id(0);
498
    
499
    barrier(CLK_GLOBAL_MEM_FENCE);
500
    MYFLOAT d=(MYFLOAT)length(clDataV[gid]);
501
    clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)(d*d);
502
}
503

504
"""
505

    
506
def MainOpenCL(clDataX,clDataV,Step,Method):
507
    time_start=time.time()
508
    if Method=="RungeKutta":
509
        CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clDataX,clDataV,Step)
510
    elif Method=="ExplicitEuler":
511
        CLLaunch=MyRoutines.ExplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
512
    elif Method=="Heun":
513
        CLLaunch=MyRoutines.Heun(queue,(Number,1),None,clDataX,clDataV,Step)
514
    else:
515
        CLLaunch=MyRoutines.ImplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
516
    CLLaunch.wait()
517
    Elapsed=time.time()-time_start
518
    return(Elapsed)
519
    
520
def display(*args):
521

    
522
    global MyDataX,MyDataV,clDataX,clDataV,Step,Method,Number,Iterations,Durations,Verbose,SpeedRendering
523
    
524
    glClearColor(0.0, 0.0, 0.0, 0.0)
525
    glClear(GL_COLOR_BUFFER_BIT)
526
    glColor3f(1.0,1.0,1.0)
527
    
528
    Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
529
    if SpeedRendering:
530
        cl.enqueue_copy(queue, MyDataV, clDataV)
531
        MyDataV.reshape(Number,4)[:,3]=1
532
        glVertexPointerf(MyDataV.reshape(Number,4))
533
    else:
534
        cl.enqueue_copy(queue, MyDataX, clDataX)
535
        MyDataX.reshape(Number,4)[:,3]=1
536
        glVertexPointerf(MyDataX.reshape(Number,4))
537

    
538
    if Verbose:
539
        print("Positions for #%s iteration: %s" % (Iterations,MyDataX))
540
    else:
541
        sys.stdout.write('.')
542
        sys.stdout.flush()
543
    Durations=np.append(Durations,MainOpenCL(clDataX,clDataV,Step,Method))    
544
    glEnableClientState(GL_VERTEX_ARRAY)
545
    glDrawArrays(GL_POINTS, 0, Number)
546
    glDisableClientState(GL_VERTEX_ARRAY)
547
    glFlush()
548
    Iterations+=1
549
    glutSwapBuffers()
550

    
551
def halt():
552
    pass
553

    
554
def keyboard(k,x,y):
555
    global ViewRZ,SpeedRendering
556
    LC_Z = as_8_bit( 'z' )
557
    UC_Z = as_8_bit( 'Z' )
558
    Plus = as_8_bit( '+' )
559
    Minus = as_8_bit( '-' )
560
    Switch = as_8_bit( 's' )
561

    
562
    Zoom=1
563
    if k == LC_Z:
564
        ViewRZ += 1.0
565
    elif k == UC_Z:
566
        ViewRZ -= 1.0
567
    elif k == Plus:
568
        Zoom *= 2.0
569
    elif k == Minus:
570
        Zoom /= 2.0
571
    elif k == Switch:
572
        if SpeedRendering:
573
            SpeedRendering=False
574
        else:
575
            SpeedRendering=True
576
    elif ord(k) == 27: # Escape
577
        glutLeaveMainLoop()
578
        return(False)
579
    else:
580
        return
581
    glRotatef(ViewRZ, 0.0, 0.0, 1.0)
582
    glScalef(Zoom,Zoom,Zoom)
583
    glutPostRedisplay()
584

    
585
def special(k,x,y):
586
    global ViewRX, ViewRY
587

    
588
    Step=1.
589
    if k == GLUT_KEY_UP:
590
        ViewRX += Step
591
    elif k == GLUT_KEY_DOWN:
592
        ViewRX -= Step
593
    elif k == GLUT_KEY_LEFT:
594
        ViewRY += Step
595
    elif k == GLUT_KEY_RIGHT:
596
        ViewRY -= Step
597
    else:
598
        return
599
    glRotatef(ViewRX, 1.0, 0.0, 0.0)
600
    glRotatef(ViewRY, 0.0, 1.0, 0.0)
601
    glutPostRedisplay()
602

    
603
def setup_viewport():
604
    global SizeOfBox
605
    glMatrixMode(GL_PROJECTION)
606
    glLoadIdentity()
607
    glOrtho(-SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox)
608
    glutPostRedisplay()
609
    
610
def reshape(w, h):
611
    glViewport(0, 0, w, h)
612
    setup_viewport()
613

    
614
if __name__=='__main__':
615

    
616
    global Number,Step,clDataX,clDataV,MyDataX,MyDataV,Method,SizeOfBox,Iterations,Verbose,Durations
617
    
618
    # ValueType
619
    ValueType='FP32'
620
    class MyFloat(np.float32):pass
621
    #    clType8=cl_array.vec.float8
622
    # Set defaults values
623
    np.set_printoptions(precision=2)  
624
    # Id of Device : 1 is for first find !
625
    Device=0
626
    # Number of bodies is integer
627
    Number=2
628
    # Number of iterations (for standalone execution)
629
    Iterations=10
630
    # Size of shape
631
    SizeOfShape=MyFloat(1.)
632
    # Initial velocity of particules
633
    Velocity=MyFloat(1.)
634
    # Step
635
    Step=MyFloat(1./32)
636
    # Method of integration
637
    Method='ImplicitEuler'
638
    # InitialRandom
639
    InitialRandom=False
640
    # RNG Marsaglia Method
641
    RNG='MWC'
642
    # Viriel Distribution of stress
643
    VirielStress=True
644
    # Verbose
645
    Verbose=False
646
    # OpenGL real time rendering
647
    OpenGL=False
648
    # Speed rendering
649
    SpeedRendering=False
650
    # Counter ArtEvasions Measures (artefact evasion)
651
    CoArEv='None'
652
    # Shape to distribute
653
    Shape='Ball'
654
    # Type of Interaction
655
    InterType='Force'
656
    
657
    HowToUse='%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [Potential] -x <None|NegExp|CorRad> -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
658

    
659
    try:
660
        opts, args = getopt.getopt(sys.argv[1:],"rpgehod:n:i:z:v:s:m:t:b:x:",["random","potential","coarev","opengl","viriel","verbose","device=","number=","iterations=","size=","velocity=","step=","method=","valuetype=","shape="])
661
    except getopt.GetoptError:
662
        print(HowToUse % sys.argv[0])
663
        sys.exit(2)
664

    
665
    for opt, arg in opts:
666
        if opt == '-h':
667
            print(HowToUse % sys.argv[0])
668

    
669
            print("\nInformations about devices detected under OpenCL:")
670
            try:
671
                Id=0
672
                for platform in cl.get_platforms():
673
                    for device in platform.get_devices():
674
                        # Failed now because of POCL implementation
675
                        #deviceType=cl.device_type.to_string(device.type)
676
                        deviceType="xPU"
677
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
678
                        Id=Id+1
679
                sys.exit()
680
            except ImportError:
681
                print("Your platform does not seem to support OpenCL")
682
                sys.exit()
683

    
684
        elif opt in ("-t", "--valuetype"):
685
            if arg=='FP64':
686
                class MyFloat(np.float64): pass
687
            else:
688
                class MyFloat(np.float32):pass
689
            ValueType = arg
690
        elif opt in ("-d", "--device"):
691
            Device=int(arg)
692
        elif opt in ("-m", "--method"):
693
            Method=arg
694
        elif opt in ("-b", "--shape"):
695
            Shape=arg
696
            if Shape!='Ball' or Shape!='Box':
697
                print('Wrong argument: set to Ball')
698
        elif opt in ("-n", "--number"):
699
            Number=int(arg)
700
        elif opt in ("-i", "--iterations"):
701
            Iterations=int(arg)
702
        elif opt in ("-z", "--size"):
703
            SizeOfShape=MyFloat(arg)
704
        elif opt in ("-v", "--velocity"):
705
            Velocity=MyFloat(arg)
706
            VirielStress=False
707
        elif opt in ("-s", "--step"):
708
            Step=MyFloat(arg)
709
        elif opt in ("-r", "--random"):
710
            InitialRandom=True
711
        elif opt in ("-c", "--check"):
712
            CheckEnergies=True
713
        elif opt in ("-e", "--viriel"):
714
            VirielStress=True
715
        elif opt in ("-g", "--opengl"):
716
            OpenGL=True
717
        elif opt in ("-p", "--potential"):
718
            InterType='Potential'
719
        elif opt in ("-x", "--coarev"):
720
            CoArEv=arg
721
        elif opt in ("-o", "--verbose"):
722
            Verbose=True
723

    
724
    SizeOfShape=np.sqrt(MyFloat(SizeOfShape*Number))
725
    Velocity=MyFloat(Velocity)
726
    Step=MyFloat(Step)
727
                
728
    print("Device choosed : %s" % Device)
729
    print("Number of particules : %s" % Number)
730
    print("Size of Shape : %s" % SizeOfShape)
731
    print("Initial velocity : %s" % Velocity)
732
    print("Step of iteration : %s" % Step)
733
    print("Number of iterations : %s" % Iterations)
734
    print("Method of resolution : %s" % Method)
735
    print("Initial Random for RNG Seed : %s" % InitialRandom)
736
    print("ValueType is : %s" % ValueType)
737
    print("Viriel distribution of stress : %s" % VirielStress)
738
    print("OpenGL real time rendering : %s" % OpenGL)
739
    print("Speed rendering : %s" % SpeedRendering)
740
    print("Interaction type : %s" % InterType)
741
    print("Counter Artevasion type : %s" % CoArEv)
742

    
743
    # Create Numpy array of CL vector with 8 FP32    
744
    MyCoM = np.zeros(4,dtype=MyFloat)
745
    MyDataX = np.zeros(Number*4, dtype=MyFloat)
746
    MyDataV = np.zeros(Number*4, dtype=MyFloat)
747
    MyPotential = np.zeros(Number, dtype=MyFloat)
748
    MyKinetic = np.zeros(Number, dtype=MyFloat)
749

    
750
    Marsaglia,Computing,Interaction,Artevasion=DictionariesAPI()
751

    
752
    # Scan the OpenCL arrays
753
    Id=0
754
    HasXPU=False
755
    for platform in cl.get_platforms():
756
        for device in platform.get_devices():
757
            if Id==Device:
758
                PlatForm=platform
759
                XPU=device
760
                print("CPU/GPU selected: ",device.name.lstrip())
761
                print("Platform selected: ",platform.name)
762
                HasXPU=True
763
            Id+=1
764

    
765
    if HasXPU==False:
766
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
767
        sys.exit()      
768

    
769
    # Create Context
770
    try:
771
        ctx = cl.Context([XPU])
772
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
773
    except:
774
        print("Crash during context creation")
775

    
776
    # Build all routines used for the computing
777

    
778
    #BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
779
    BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i -DARTEVASION=%i" % (Marsaglia[RNG],Computing[ValueType],Interaction[InterType],Artevasion[CoArEv])
780

    
781
    if 'Intel' in PlatForm.name or 'Experimental' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name :
782
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
783
    else:
784
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions+" -cl-strict-aliasing")
785
        
786
    mf = cl.mem_flags
787
    # Read/Write approach for buffering
788
    clDataX = cl.Buffer(ctx, mf.READ_WRITE, MyDataX.nbytes)
789
    clDataV = cl.Buffer(ctx, mf.READ_WRITE, MyDataV.nbytes)
790
    clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
791
    clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
792
    clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
793
    
794
    # Write/HostPointer approach for buffering
795
    # clDataX = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataX)
796
    # clDataV = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataV)
797
    # clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential)
798
    # clKinetic = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyKinetic)
799
    # clCoM = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyCoM)
800

    
801
    print('All particles superimposed.')
802

    
803
    # Set particles to RNG points
804
    if InitialRandom:
805
        seed_w=np.uint32(nprnd(2**32))
806
        seed_z=np.uint32(nprnd(2**32))
807
    else:
808
        seed_w=np.uint32(19710211)
809
        seed_z=np.uint32(20081010)
810
            
811
    if Shape=='Ball':
812
        MyRoutines.InBallSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
813
    else:
814
        MyRoutines.InBoxSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
815

    
816
    print('All particules distributed')
817

    
818
    CLLaunch=MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
819
    CLLaunch.wait()
820
    cl.enqueue_copy(queue,MyCoM,clCoM)
821
    print('Center Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
822
    
823
    if VirielStress:
824
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,MyFloat(0.),np.uint32(110271),np.uint32(250173))
825
    else:
826
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,Velocity,np.uint32(110271),np.uint32(250173))
827
    CLLaunch.wait()
828

    
829
    print('All particules stressed')
830

    
831
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
832
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
833
    CLLaunch.wait()
834
    cl.enqueue_copy(queue,MyPotential,clPotential)
835
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
836
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
837

    
838
    if SpeedRendering:
839
        SizeOfBox=max(2*MyKinetic)
840
    else:
841
        SizeOfBox=SizeOfShape        
842
    
843
    if OpenGL:
844
        print('\tTiny documentation to interact OpenGL rendering:\n')
845
        print('\t<Left|Right> Rotate around X axis')
846
        print('\t  <Up|Down>  Rotate around Y axis')
847
        print('\t   <z|Z>     Rotate around Z axis')
848
        print('\t   <-|+>     Unzoom/Zoom')
849
        print('\t    <s>      Toggle to display Positions or Velocities')
850
        print('\t   <Esc>     Quit\n')
851
    
852
    wall_time_start=time.time()
853

    
854
    Durations=np.array([],dtype=MyFloat)
855
    print('Starting!')
856
    if OpenGL:
857
        global ViewRX,ViewRY,ViewRZ
858
        Iterations=0
859
        ViewRX,ViewRY,ViewRZ = 0.,0.,0.
860
        # Launch OpenGL Loop
861
        glutInit(sys.argv)
862
        glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB)
863
        glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE,GLUT_ACTION_CONTINUE_EXECUTION)
864
        glutInitWindowSize(512,512)
865
        glutCreateWindow(b'NBodyGL')
866
        setup_viewport()
867
        glutReshapeFunc(reshape)
868
        glutDisplayFunc(display)
869
        glutIdleFunc(display)
870
        #   glutMouseFunc(mouse)
871
        glutSpecialFunc(special)
872
        glutKeyboardFunc(keyboard)
873
        glutMainLoop()
874
    else:
875
        for iteration in range(Iterations):
876
            Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
877
            if Verbose:
878
                # print("Duration of #%s iteration: %s" % (iteration,Elapsed))
879
                cl.enqueue_copy(queue, MyDataX, clDataX)
880
                print("Positions for #%s iteration: %s" % (iteration,MyDataX))
881
            else:
882
                sys.stdout.write('.')
883
                sys.stdout.flush()
884
            Durations=np.append(Durations,Elapsed)
885

    
886
    print('\nEnding!')
887
            
888
    MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
889
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
890
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
891
    CLLaunch.wait()
892
    cl.enqueue_copy(queue,MyCoM,clCoM)
893
    cl.enqueue_copy(queue,MyPotential,clPotential)
894
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
895
    print('\nCenter Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
896
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
897

    
898
    print("Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n" % (Device,Iterations,np.mean(Durations),np.median(Durations),np.std(Durations),np.min(Durations),np.max(Durations),np.std(Durations)/np.median(Durations)))
899

    
900
    # FPS: 1/Elapsed
901
    FPS=np.ones(len(Durations))
902
    FPS/=Durations
903

    
904
    print("FPS stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" % (Device,Iterations,np.mean(FPS),np.median(FPS),np.std(FPS),np.min(FPS),np.max(FPS)))
905

    
906
    # Contraction of Square*Size*Hertz: Size*Size/Elapsed
907
    Squertz=np.ones(len(Durations))
908
    Squertz*=Number*Number
909
    Squertz/=Durations
910

    
911
    print("Squertz in log10 & complete stats on device %s with %s iterations :\n\tMean:\t%s\t%s\n\tMedian:\t%s\t%s\n\tStddev:\t%s\t%s\n\tMin:\t%s\t%s\n\tMax:\t%s\t%s\n" % (Device,Iterations,np.log10(np.mean(Squertz)),np.mean(Squertz),np.log10(np.median(Squertz)),np.median(Squertz),np.log10(np.std(Squertz)),np.std(Squertz),np.log10(np.min(Squertz)),np.min(Squertz),np.log10(np.max(Squertz)),np.max(Squertz)))
912
        
913
    clDataX.release()
914
    clDataV.release()
915
    clKinetic.release()
916
    clPotential.release()
917