Statistiques
| Révision :

root / NBody / NBody.py @ 279

Historique | Voir | Annoter | Télécharger (29,1 ko)

1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
NBody Demonstrator implemented in OpenCL, rendering OpenGL
5

6
By default, rendering in OpenGL is disabled. Add -g option to activate.
7

8
Part of matrix programs from: https://forge.cbp.ens-lyon.fr/svn/bench4gpu/
9

10
CC BY-NC-SA 2011 : Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> 
11

12
Thanks to Andreas Klockner for PyOpenCL:
13
http://mathema.tician.de/software/pyopencl
14
 
15
"""
16
import getopt
17
import sys
18
import time
19
import numpy as np
20
import pyopencl as cl
21
import pyopencl.array as cl_array
22
from numpy.random import randint as nprnd
23
import string, sys
24
from OpenGL.GL import *
25
from OpenGL.GLUT import *
26

    
27
def DictionariesAPI():
28
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
29
    Computing={'FP32':0,'FP64':1}
30
    Interaction={'Force':0,'Potential':1}
31
    Artevasion={'None':0,'NegExp':1,'CorRad':2}
32
    return(Marsaglia,Computing,Interaction,Artevasion)
33

    
34
BlobOpenCL= """
35
#define TFP32 0
36
#define TFP64 1
37

38
#define TFORCE 0
39
#define TPOTENTIAL 1
40

41
#define NONE 0
42
#define NEGEXP 1
43
#define CORRAD 2
44

45
#if TYPE == TFP32
46
#define MYFLOAT4 float4
47
#define MYFLOAT8 float8
48
#define MYFLOAT float
49
#define DISTANCE fast_distance
50
#else
51
#define MYFLOAT4 double4
52
#define MYFLOAT8 double8
53
#define MYFLOAT double
54
#define DISTANCE distance
55
#if defined(cl_khr_fp64)  // Khronos extension available?
56
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
57
#endif
58
#endif
59

60
#define znew  ((zmwc=36969*(zmwc&65535)+(zmwc>>16))<<16)
61
#define wnew  ((wmwc=18000*(wmwc&65535)+(wmwc>>16))&65535)
62
#define MWC   (znew+wnew)
63
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
64
#define CONG  (jcong=69069*jcong+1234567)
65
#define KISS  ((MWC^CONG)+SHR3)
66

67
#define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f)
68
#define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f)
69
#define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f)
70
#define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f)
71

72
#define PI (MYFLOAT)3.141592653589793238e0f
73

74
#define SMALL_NUM (MYFLOAT)1.e-9f
75

76
#define CoreRadius (MYFLOAT)(1.e0f)
77

78
// Create my own Distance implementation: distance buggy on Oland AMD chipset
79

80
MYFLOAT MyDistance(MYFLOAT4 n,MYFLOAT4 m)
81
{
82
    private MYFLOAT x2,y2,z2;
83
    x2=n.s0-m.s0;
84
    x2*=x2;
85
    y2=n.s1-m.s1;
86
    y2*=y2;
87
    z2=n.s2-m.s2;
88
    z2*=z2;
89
    return(sqrt(x2+y2+z2));
90
}
91

92
// Potential between 2 m,n bodies
93
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
94
#if ARTEVASION == NEGEXP
95
// Add exp(-r) to numerator to avoid divergence for low distances
96
{
97
    MYFLOAT r=DISTANCE(n,m);
98
    return((-1.e0f+exp(-r))/r);
99
}
100
#elif ARTEVASION == CORRAD
101
// Add Core Radius to avoid divergence for low distances
102
{
103
    MYFLOAT r=DISTANCE(n,m);
104
    return(-1.e0f/sqrt(r*r+CoreRadius*CoreRadius));
105
}
106
#else
107
// Classical potential in 1/r
108
{
109
//    return((MYFLOAT)(-1.e0f)/(MyDistance(m,n)));
110
    return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
111
}
112
#endif
113

114
// Interaction based of Force as gradient of Potential
115
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
116
#if INTERACTION == TFORCE
117
#if ARTEVASION == NEGEXP
118
// Force gradient of potential, set as (1-exp(-r))/r 
119
{
120
    private MYFLOAT r=MyDistance(n,m);
121
    private MYFLOAT num=1.e0f+exp(-r)*(r-1.e0f);
122
    return((n-m)*num/(MYFLOAT)(r*r*r));
123
}
124
#elif ARTEVASION == CORRAD
125
// Force gradient of potential, (Core Radius) set as 1/sqrt(r**2+CoreRadius**2) 
126
{
127
    private MYFLOAT r=MyDistance(n,m);
128
    private MYFLOAT den=sqrt(r*r+CoreRadius*CoreRadius);
129
    return((n-m)/(MYFLOAT)(den*den*den));
130
}
131
#else
132
// Simplest implementation of force (equals to acceleration)
133
// seems to bo bad (numerous artevasions)
134
// MYFLOAT4 InteractionForce(MYFLOAT4 m,MYFLOAT4 n)
135
{
136
    private MYFLOAT r=MyDistance(n,m);
137
    return((n-m)/(MYFLOAT)(r*r*r));
138
}
139
#endif
140
#else
141
// Force definited as gradient of potential
142
// Estimate potential and proximate potential to estimate force
143
{
144
    // 1/1024 seems to be a good factor: larger one provides bad results
145
    private MYFLOAT epsilon=(MYFLOAT)(1.e0f/1024);
146
    private MYFLOAT4 er=normalize(n-m);
147
    private MYFLOAT4 dr=er*(MYFLOAT)epsilon;
148

149
    return(er/epsilon*(PairPotential(m,n)-PairPotential(m+dr,n)));
150
}
151
#endif
152

153
MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
154
{
155
    private MYFLOAT potential=(MYFLOAT)0.e0f;
156
    private MYFLOAT4 x=clDataX[gid]; 
157
    
158
    for (int i=0;i<get_global_size(0);i++)
159
    {
160
        if (gid != i)
161
        potential+=PairPotential(x,clDataX[i]);
162
    }
163

164
    barrier(CLK_GLOBAL_MEM_FENCE);
165
    return(potential);
166
}
167

168
MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid)
169
{
170
    return(PairPotential(clDataX[gid],clCoM[0]));
171
}
172

173
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
174

175
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
176
{
177
    private MYFLOAT4 a0,v0,x0,a1,v1,x1,a2,v2,x2,a3,v3,x3,a4,v4,x4,xf,vf;
178
    MYFLOAT4 DT=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
179

180
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
181
    v0=(MYFLOAT4)clDataInV[gid];
182
    x0=(MYFLOAT4)clDataInX[gid];
183
    int N = get_global_size(0);    
184
    
185
    for (private int i=0;i<N;i++)
186
    {
187
        if (gid != i)
188
        a0+=Interaction(x0,clDataInX[i]);
189
    }
190
        
191
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
192
    v1=a0*dt+v0;
193
    x1=v0*dt+x0;
194
    for (private int j=0;j<N;j++)
195
    {
196
        if (gid != j)
197
        a1+=Interaction(x1,clDataInX[j]);
198
    }
199

200
    a2=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
201
    v2=a1*(MYFLOAT)(dt/2.e0f)+v0;
202
    x2=v1*(MYFLOAT)(dt/2.e0f)+x0;
203
    for (private int k=0;k<N;k++)
204
    {
205
        if (gid != k)
206
        a2+=Interaction(x2,clDataInX[k]);
207
    }
208
    
209
    a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
210
    v3=a2*(MYFLOAT)(dt/2.e0f)+v0;
211
    x3=v2*(MYFLOAT)(dt/2.e0f)+x0;
212
    for (private int l=0;l<N;l++)
213
    {
214
        if (gid != l)
215
        a3+=Interaction(x3,clDataInX[l]);
216
    }
217
    
218
    a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
219
    v4=a3*dt+v0;
220
    x4=v3*dt+x0;
221
    for (private int m=0;m<N;m++)
222
    {
223
        if (gid != m)
224
        a4+=Interaction(x4,clDataInX[m]);
225
    }
226
    
227
    xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
228
    vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
229
     
230
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
231
}
232

233
MYFLOAT8 AtomicHeun(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
234
{
235
    private MYFLOAT4 x0,v0,a0,x1,v1,a1,xf,vf;
236
    MYFLOAT4 Dt=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
237

238
    x0=(MYFLOAT4)clDataInX[gid];
239
    v0=(MYFLOAT4)clDataInV[gid];
240
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
241

242
    for (private int i=0;i<get_global_size(0);i++)
243
    {
244
        if (gid != i)
245
        a0+=Interaction(x0,clDataInX[i]);
246
    }
247

248
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
249
    //v1=v0+dt*a0;
250
    //x1=x0+dt*v0;
251
    v1=dt*a0+v0;
252
    x1=dt*v0+x0;
253

254
    for (private int j=0;j<get_global_size(0);j++)
255
    {
256
        if (gid != j)
257
        a1+=Interaction(x1,clDataInX[j]);
258
    }
259

260
    vf=v0+dt*(a0+a1)/(MYFLOAT)2.e0f;
261
    xf=x0+dt*(v0+v1)/(MYFLOAT)2.e0f;
262

263
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
264
}
265

266
MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
267
{
268
    MYFLOAT4 x0,v0,a,xf,vf;
269

270
    x0=(MYFLOAT4)clDataInX[gid];
271
    v0=(MYFLOAT4)clDataInV[gid];
272
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
273

274
    for (private int i=0;i<get_global_size(0);i++)
275
    {
276
        if (gid != i)
277
          a+=Interaction(x0,clDataInX[i]);
278
    }
279
       
280
    vf=v0+dt*a;
281
    xf=x0+dt*vf;
282

283
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
284
}
285

286
MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
287
{
288
    MYFLOAT4 x0,v0,a,xf,vf;
289

290
    x0=(MYFLOAT4)clDataInX[gid];
291
    v0=(MYFLOAT4)clDataInV[gid];
292
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
293

294
    for (private int i=0;i<get_global_size(0);i++)
295
    {
296
        if (gid != i)
297
        a+=Interaction(x0,clDataInX[i]);
298
    }
299
       
300
    vf=v0+dt*a;
301
    xf=x0+dt*v0;
302
 
303
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
304
}
305

306
__kernel void InBallSplutterPoints(__global MYFLOAT4* clDataX, 
307
                                   MYFLOAT diameter,uint seed_z,uint seed_w)
308
{
309
    private int gid=get_global_id(0);
310
    private uint zmwc=seed_z+gid;
311
    private uint wmwc=seed_w+(gid+1)%2;
312
    private MYFLOAT Heat;
313
 
314
    for (int i=0;i<gid;i++)
315
    {
316
        Heat=MWCfp;
317
    }
318

319
// More accurate distribution based on spherical coordonates
320
// Disactivated because of AMD Oland GPU crash on launch
321
//     private MYFLOAT Radius,Theta,Phi,PosX,PosY,PosZ,SinTheta;
322
//     Radius=MWCfp*diameter/2.e0f;
323
//     Theta=(MYFLOAT)acos((float)(-2.e0f*MWCfp+1.0e0f));
324
//     Phi=(MYFLOAT)(2.e0f*PI*MWCfp);
325
//     SinTheta=sin((float)Theta);
326
//     PosX=cos((float)Phi)*Radius*SinTheta;
327
//     PosY=sin((float)Phi)*Radius*SinTheta;
328
//     PosZ=cos((float)Theta)*Radius;
329
//     clDataX[gid]=(MYFLOAT4)(PosX,PosY,PosZ,0.e0f);
330

331
    private MYFLOAT Radius=diameter/2.e0f;
332
    private MYFLOAT Length=diameter;
333
    private MYFLOAT4 Position;
334
    while (Length>Radius) {
335
       Position=(MYFLOAT4)((MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,(MWCfp-0.5e0f)*diameter,0.e0f);
336
       Length=(MYFLOAT)length((MYFLOAT4)Position);
337
    }
338

339
    clDataX[gid]=Position;    
340

341
    barrier(CLK_GLOBAL_MEM_FENCE);
342
}
343

344
__kernel void InBoxSplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box, 
345
                             uint seed_z,uint seed_w)
346
{
347
    int gid=get_global_id(0);
348
    uint zmwc=seed_z+gid;
349
    uint wmwc=seed_w-gid;
350
    private MYFLOAT Heat;
351
 
352
    for (int i=0;i<gid;i++)
353
    {
354
        Heat=MWCfp;
355
    }
356

357
    clDataX[gid]=(MYFLOAT4)((MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,0.e0f);
358

359
    barrier(CLK_GLOBAL_MEM_FENCE);
360
}
361

362
__kernel void SplutterStress(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
363
{
364
    int gid = get_global_id(0);
365
    MYFLOAT N = (MYFLOAT)get_global_size(0);
366
    uint zmwc=seed_z+(uint)gid;
367
    uint wmwc=seed_w-(uint)gid;
368
    MYFLOAT4 CrossVector,SpeedVector,FromCoM;
369
    MYFLOAT Heat,ThetaA,PhiA,ThetaB,PhiB,Length,tA,tB,Polar;
370

371
    for (int i=0;i<gid;i++)
372
    {
373
        Heat=MWCfp;
374
    }
375

376
    // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
377
    // Implemention on AMD Oland are probably broken in float
378

379
    FromCoM=(MYFLOAT4)(clDataX[gid]-clCoM[0]);
380
    Length=length(FromCoM);
381
    //Theta=acos(FromCoM.z/Length);
382
    //Phi=atan(FromCoM.y/FromCoM.x);
383
    // First tangential vector to sphere of length radius
384
    ThetaA=acos(FromCoM.x/Length)+5.e-1f*PI;
385
    PhiA=atan(FromCoM.y/FromCoM.z);
386
    // Second tangential vector to sphere of length radius
387
    ThetaB=acos((float)(FromCoM.x/Length));
388
    PhiB=atan((float)(FromCoM.y/FromCoM.z))+5.e-1f*PI;
389
    // (x,y) random coordonates to plane tangential to sphere
390
    Polar=MWCfp*2.e0f*PI;
391
    tA=cos((float)Polar);
392
    tB=sin((float)Polar);
393

394
    // Exception for 2 particules to ovoid shifting
395
    if (get_global_size(0)==2) {
396
       CrossVector=(MYFLOAT4)(1.e0f,1.e0f,1.e0f,0.e0f);
397
    } else {
398
       CrossVector.s0=tA*cos((float)ThetaA)+tB*cos((float)ThetaB);
399
       CrossVector.s1=tA*sin((float)ThetaA)*sin((float)PhiA)+tB*sin((float)ThetaB)*sin((float)PhiB);
400
       CrossVector.s2=tA*sin((float)ThetaA)*cos((float)PhiA)+tB*sin((float)ThetaB)*cos((float)PhiB);
401
       CrossVector.s3=0.e0f;
402
    }
403

404
    if (velocity<SMALL_NUM) {
405
       SpeedVector=(MYFLOAT4)normalize(cross(FromCoM,CrossVector))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
406
    }
407
    else
408
    {
409

410
       SpeedVector=(MYFLOAT4)((MWCfp-5e-1f)*velocity,(MWCfp-5e-1f)*velocity,
411
                              (MWCfp-5e-1f)*velocity,0.e0f);
412
    }
413
    clDataV[gid]=SpeedVector;
414
    barrier(CLK_GLOBAL_MEM_FENCE);
415
}
416

417
__kernel void RungeKutta(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
418
{
419
    private int gid = get_global_id(0);
420
    private MYFLOAT8 clDataGid;
421
    
422
    clDataGid=AtomicRungeKutta(clDataX,clDataV,gid,h);
423
    barrier(CLK_GLOBAL_MEM_FENCE);
424
    clDataX[gid]=clDataGid.s0123;
425
    clDataV[gid]=clDataGid.s4567;
426
}
427

428
__kernel void Heun(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
429
{
430
    private int gid = get_global_id(0);
431
    private MYFLOAT8 clDataGid;
432
    
433
    clDataGid=AtomicHeun(clDataX,clDataV,gid,h);
434
    barrier(CLK_GLOBAL_MEM_FENCE);
435
    clDataX[gid]=clDataGid.s0123;
436
    clDataV[gid]=clDataGid.s4567;
437
}
438

439
__kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
440
{
441
    private int gid = get_global_id(0);
442
    private MYFLOAT8 clDataGid;
443
    
444
    clDataGid=AtomicImplicitEuler(clDataX,clDataV,gid,h);
445
    barrier(CLK_GLOBAL_MEM_FENCE);
446
    clDataX[gid]=clDataGid.s0123;
447
    clDataV[gid]=clDataGid.s4567;
448
}
449

450
__kernel void ExplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
451
{
452
    private int gid = get_global_id(0);
453
    private MYFLOAT8 clDataGid;    
454

455
    clDataGid=AtomicExplicitEuler(clDataX,clDataV,gid,h);
456
    barrier(CLK_GLOBAL_MEM_FENCE);
457
    clDataX[gid]=clDataGid.s0123;
458
    clDataV[gid]=clDataGid.s4567;
459
}
460

461
__kernel void CoMPotential(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
462
{
463
    int gid = get_global_id(0);
464

465
    clPotential[gid]=PairPotential(clDataX[gid],clCoM[0]);
466
}
467

468
__kernel void Potential(__global MYFLOAT4* clDataX,__global MYFLOAT* clPotential)
469
{
470
    int gid = get_global_id(0);
471

472
    MYFLOAT potential=(MYFLOAT)0.e0f;
473
    MYFLOAT4 x=clDataX[gid]; 
474
    
475
    for (int i=0;i<get_global_size(0);i++)
476
    {
477
        if (gid != i)
478
        potential+=PairPotential(x,clDataX[i]);
479
    }
480
                 
481
    barrier(CLK_GLOBAL_MEM_FENCE);
482
    clPotential[gid]=potential*(MYFLOAT)5.e-1f;
483
}
484

485
__kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int Size)
486
{
487
    MYFLOAT4 CoM=clDataX[0]; 
488

489
    for (int i=1;i<Size;i++)
490
    {
491
        CoM+=clDataX[i];
492
    }
493

494
    barrier(CLK_GLOBAL_MEM_FENCE);
495
    clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,0.e0f)/(MYFLOAT)Size;
496
}
497

498
__kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
499
{
500
    int gid = get_global_id(0);
501
    
502
    barrier(CLK_GLOBAL_MEM_FENCE);
503
    MYFLOAT d=(MYFLOAT)length(clDataV[gid]);
504
    clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)(d*d);
505
}
506

507
"""
508

    
509
def MainOpenCL(clDataX,clDataV,Step,Method):
510
    time_start=time.time()
511
    if Method=="RungeKutta":
512
        CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clDataX,clDataV,Step)
513
    elif Method=="ExplicitEuler":
514
        CLLaunch=MyRoutines.ExplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
515
    elif Method=="Heun":
516
        CLLaunch=MyRoutines.Heun(queue,(Number,1),None,clDataX,clDataV,Step)
517
    else:
518
        CLLaunch=MyRoutines.ImplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
519
    CLLaunch.wait()
520
    Elapsed=time.time()-time_start
521
    return(Elapsed)
522
    
523
def display(*args):
524

    
525
    global MyDataX,MyDataV,clDataX,clDataV,Step,Method,Number,Iterations,Durations,Verbose,SpeedRendering
526
    
527
    glClearColor(0.0, 0.0, 0.0, 0.0)
528
    glClear(GL_COLOR_BUFFER_BIT)
529
    glColor3f(1.0,1.0,1.0)
530
    
531
    Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
532
    if SpeedRendering:
533
        cl.enqueue_copy(queue, MyDataV, clDataV)
534
        MyDataV.reshape(Number,4)[:,3]=1
535
        glVertexPointerf(MyDataV.reshape(Number,4))
536
    else:
537
        cl.enqueue_copy(queue, MyDataX, clDataX)
538
        MyDataX.reshape(Number,4)[:,3]=1
539
        glVertexPointerf(MyDataX.reshape(Number,4))
540

    
541
    if Verbose:
542
        print("Positions for #%s iteration: %s" % (Iterations,MyDataX))
543
    else:
544
        sys.stdout.write('.')
545
        sys.stdout.flush()
546
    Durations=np.append(Durations,MainOpenCL(clDataX,clDataV,Step,Method))    
547
    glEnableClientState(GL_VERTEX_ARRAY)
548
    glDrawArrays(GL_POINTS, 0, Number)
549
    glDisableClientState(GL_VERTEX_ARRAY)
550
    glFlush()
551
    Iterations+=1
552
    glutSwapBuffers()
553

    
554
def halt():
555
    pass
556

    
557
def keyboard(k,x,y):
558
    global ViewRZ,SpeedRendering
559
    LC_Z = as_8_bit( 'z' )
560
    UC_Z = as_8_bit( 'Z' )
561
    Plus = as_8_bit( '+' )
562
    Minus = as_8_bit( '-' )
563
    Switch = as_8_bit( 's' )
564

    
565
    Zoom=1
566
    if k == LC_Z:
567
        ViewRZ += 1.0
568
    elif k == UC_Z:
569
        ViewRZ -= 1.0
570
    elif k == Plus:
571
        Zoom *= 2.0
572
    elif k == Minus:
573
        Zoom /= 2.0
574
    elif k == Switch:
575
        if SpeedRendering:
576
            SpeedRendering=False
577
        else:
578
            SpeedRendering=True
579
    elif ord(k) == 27: # Escape
580
        glutLeaveMainLoop()
581
        return(False)
582
    else:
583
        return
584
    glRotatef(ViewRZ, 0.0, 0.0, 1.0)
585
    glScalef(Zoom,Zoom,Zoom)
586
    glutPostRedisplay()
587

    
588
def special(k,x,y):
589
    global ViewRX, ViewRY
590

    
591
    Step=1.
592
    if k == GLUT_KEY_UP:
593
        ViewRX += Step
594
    elif k == GLUT_KEY_DOWN:
595
        ViewRX -= Step
596
    elif k == GLUT_KEY_LEFT:
597
        ViewRY += Step
598
    elif k == GLUT_KEY_RIGHT:
599
        ViewRY -= Step
600
    else:
601
        return
602
    glRotatef(ViewRX, 1.0, 0.0, 0.0)
603
    glRotatef(ViewRY, 0.0, 1.0, 0.0)
604
    glutPostRedisplay()
605

    
606
def setup_viewport():
607
    global SizeOfBox
608
    glMatrixMode(GL_PROJECTION)
609
    glLoadIdentity()
610
    glOrtho(-SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox)
611
    glutPostRedisplay()
612
    
613
def reshape(w, h):
614
    glViewport(0, 0, w, h)
615
    setup_viewport()
616

    
617
if __name__=='__main__':
618

    
619
    global Number,Step,clDataX,clDataV,MyDataX,MyDataV,Method,SizeOfBox,Iterations,Verbose,Durations
620
    
621
    # ValueType
622
    ValueType='FP32'
623
    class MyFloat(np.float32):pass
624
    #    clType8=cl_array.vec.float8
625
    # Set defaults values
626
    np.set_printoptions(precision=2)  
627
    # Id of Device : 1 is for first find !
628
    Device=0
629
    # Number of bodies is integer
630
    Number=2
631
    # Number of iterations (for standalone execution)
632
    Iterations=10
633
    # Size of shape
634
    SizeOfShape=MyFloat(1.)
635
    # Initial velocity of particules
636
    Velocity=MyFloat(1.)
637
    # Step
638
    Step=MyFloat(1./32)
639
    # Method of integration
640
    Method='ImplicitEuler'
641
    # InitialRandom
642
    InitialRandom=False
643
    # RNG Marsaglia Method
644
    RNG='MWC'
645
    # Viriel Distribution of stress
646
    VirielStress=True
647
    # Verbose
648
    Verbose=False
649
    # OpenGL real time rendering
650
    OpenGL=False
651
    # Speed rendering
652
    SpeedRendering=False
653
    # Counter ArtEvasions Measures (artefact evasion)
654
    CoArEv='None'
655
    # Shape to distribute
656
    Shape='Ball'
657
    # Type of Interaction
658
    InterType='Force'
659
    
660
    HowToUse='%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [Potential] -x <None|NegExp|CorRad> -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
661

    
662
    try:
663
        opts, args = getopt.getopt(sys.argv[1:],"rpgehod:n:i:z:v:s:m:t:b:x:",["random","potential","coarev","opengl","viriel","verbose","device=","number=","iterations=","size=","velocity=","step=","method=","valuetype=","shape="])
664
    except getopt.GetoptError:
665
        print(HowToUse % sys.argv[0])
666
        sys.exit(2)
667

    
668
    for opt, arg in opts:
669
        if opt == '-h':
670
            print(HowToUse % sys.argv[0])
671

    
672
            print("\nInformations about devices detected under OpenCL:")
673
            try:
674
                Id=0
675
                for platform in cl.get_platforms():
676
                    for device in platform.get_devices():
677
                        # Failed now because of POCL implementation
678
                        #deviceType=cl.device_type.to_string(device.type)
679
                        deviceType="xPU"
680
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
681
                        Id=Id+1
682
                sys.exit()
683
            except ImportError:
684
                print("Your platform does not seem to support OpenCL")
685
                sys.exit()
686

    
687
        elif opt in ("-t", "--valuetype"):
688
            if arg=='FP64':
689
                class MyFloat(np.float64): pass
690
            else:
691
                class MyFloat(np.float32):pass
692
            ValueType = arg
693
        elif opt in ("-d", "--device"):
694
            Device=int(arg)
695
        elif opt in ("-m", "--method"):
696
            Method=arg
697
        elif opt in ("-b", "--shape"):
698
            Shape=arg
699
            if Shape!='Ball' or Shape!='Box':
700
                print('Wrong argument: set to Ball')
701
        elif opt in ("-n", "--number"):
702
            Number=int(arg)
703
        elif opt in ("-i", "--iterations"):
704
            Iterations=int(arg)
705
        elif opt in ("-z", "--size"):
706
            SizeOfShape=MyFloat(arg)
707
        elif opt in ("-v", "--velocity"):
708
            Velocity=MyFloat(arg)
709
            VirielStress=False
710
        elif opt in ("-s", "--step"):
711
            Step=MyFloat(arg)
712
        elif opt in ("-r", "--random"):
713
            InitialRandom=True
714
        elif opt in ("-c", "--check"):
715
            CheckEnergies=True
716
        elif opt in ("-e", "--viriel"):
717
            VirielStress=True
718
        elif opt in ("-g", "--opengl"):
719
            OpenGL=True
720
        elif opt in ("-p", "--potential"):
721
            InterType='Potential'
722
        elif opt in ("-x", "--coarev"):
723
            CoArEv=arg
724
        elif opt in ("-o", "--verbose"):
725
            Verbose=True
726

    
727
    SizeOfShape=np.sqrt(MyFloat(SizeOfShape*Number))
728
    Velocity=MyFloat(Velocity)
729
    Step=MyFloat(Step)
730
                
731
    print("Device choosed : %s" % Device)
732
    print("Number of particules : %s" % Number)
733
    print("Size of Shape : %s" % SizeOfShape)
734
    print("Initial velocity : %s" % Velocity)
735
    print("Step of iteration : %s" % Step)
736
    print("Number of iterations : %s" % Iterations)
737
    print("Method of resolution : %s" % Method)
738
    print("Initial Random for RNG Seed : %s" % InitialRandom)
739
    print("ValueType is : %s" % ValueType)
740
    print("Viriel distribution of stress : %s" % VirielStress)
741
    print("OpenGL real time rendering : %s" % OpenGL)
742
    print("Speed rendering : %s" % SpeedRendering)
743
    print("Interaction type : %s" % InterType)
744
    print("Counter Artevasion type : %s" % CoArEv)
745

    
746
    # Create Numpy array of CL vector with 8 FP32    
747
    MyCoM = np.zeros(4,dtype=MyFloat)
748
    MyDataX = np.zeros(Number*4, dtype=MyFloat)
749
    MyDataV = np.zeros(Number*4, dtype=MyFloat)
750
    MyPotential = np.zeros(Number, dtype=MyFloat)
751
    MyKinetic = np.zeros(Number, dtype=MyFloat)
752

    
753
    Marsaglia,Computing,Interaction,Artevasion=DictionariesAPI()
754

    
755
    # Scan the OpenCL arrays
756
    Id=0
757
    HasXPU=False
758
    for platform in cl.get_platforms():
759
        for device in platform.get_devices():
760
            if Id==Device:
761
                PlatForm=platform
762
                XPU=device
763
                print("CPU/GPU selected: ",device.name.lstrip())
764
                print("Platform selected: ",platform.name)
765
                HasXPU=True
766
            Id+=1
767

    
768
    if HasXPU==False:
769
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
770
        sys.exit()      
771

    
772
    # Create Context
773
    try:
774
        ctx = cl.Context([XPU])
775
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
776
    except:
777
        print("Crash during context creation")
778

    
779
    # Build all routines used for the computing
780

    
781
    #BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
782
    BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i -DARTEVASION=%i" % (Marsaglia[RNG],Computing[ValueType],Interaction[InterType],Artevasion[CoArEv])
783

    
784
    if 'Intel' in PlatForm.name or 'Experimental' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name :
785
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
786
    else:
787
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions+" -cl-strict-aliasing")
788
        
789
    mf = cl.mem_flags
790
    # Read/Write approach for buffering
791
    clDataX = cl.Buffer(ctx, mf.READ_WRITE, MyDataX.nbytes)
792
    clDataV = cl.Buffer(ctx, mf.READ_WRITE, MyDataV.nbytes)
793
    clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
794
    clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
795
    clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
796
    
797
    # Write/HostPointer approach for buffering
798
    # clDataX = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataX)
799
    # clDataV = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataV)
800
    # clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential)
801
    # clKinetic = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyKinetic)
802
    # clCoM = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyCoM)
803

    
804
    print('All particles superimposed.')
805

    
806
    # Set particles to RNG points
807
    if InitialRandom:
808
        seed_w=np.uint32(nprnd(2**32))
809
        seed_z=np.uint32(nprnd(2**32))
810
    else:
811
        seed_w=np.uint32(19710211)
812
        seed_z=np.uint32(20081010)
813
            
814
    if Shape=='Ball':
815
        MyRoutines.InBallSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
816
    else:
817
        MyRoutines.InBoxSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
818

    
819
    print('All particules distributed')
820

    
821
    CLLaunch=MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
822
    CLLaunch.wait()
823
    cl.enqueue_copy(queue,MyCoM,clCoM)
824
    print('Center Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
825
    
826
    if VirielStress:
827
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,MyFloat(0.),np.uint32(110271),np.uint32(250173))
828
    else:
829
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,Velocity,np.uint32(110271),np.uint32(250173))
830
    CLLaunch.wait()
831

    
832
    print('All particules stressed')
833

    
834
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
835
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
836
    CLLaunch.wait()
837
    cl.enqueue_copy(queue,MyPotential,clPotential)
838
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
839
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
840

    
841
    if SpeedRendering:
842
        SizeOfBox=max(2*MyKinetic)
843
    else:
844
        SizeOfBox=SizeOfShape        
845
    
846
    if OpenGL:
847
        print('\tTiny documentation to interact OpenGL rendering:\n')
848
        print('\t<Left|Right> Rotate around X axis')
849
        print('\t  <Up|Down>  Rotate around Y axis')
850
        print('\t   <z|Z>     Rotate around Z axis')
851
        print('\t   <-|+>     Unzoom/Zoom')
852
        print('\t    <s>      Toggle to display Positions or Velocities')
853
        print('\t   <Esc>     Quit\n')
854
    
855
    wall_time_start=time.time()
856

    
857
    Durations=np.array([],dtype=MyFloat)
858
    print('Starting!')
859
    if OpenGL:
860
        global ViewRX,ViewRY,ViewRZ
861
        Iterations=0
862
        ViewRX,ViewRY,ViewRZ = 0.,0.,0.
863
        # Launch OpenGL Loop
864
        glutInit(sys.argv)
865
        glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB)
866
        glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE,GLUT_ACTION_CONTINUE_EXECUTION)
867
        glutInitWindowSize(512,512)
868
        glutCreateWindow(b'NBodyGL')
869
        setup_viewport()
870
        glutReshapeFunc(reshape)
871
        glutDisplayFunc(display)
872
        glutIdleFunc(display)
873
        #   glutMouseFunc(mouse)
874
        glutSpecialFunc(special)
875
        glutKeyboardFunc(keyboard)
876
        glutMainLoop()
877
    else:
878
        for iteration in range(Iterations):
879
            Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
880
            if Verbose:
881
                # print("Duration of #%s iteration: %s" % (iteration,Elapsed))
882
                cl.enqueue_copy(queue, MyDataX, clDataX)
883
                print("Positions for #%s iteration: %s" % (iteration,MyDataX))
884
            else:
885
                sys.stdout.write('.')
886
                sys.stdout.flush()
887
            Durations=np.append(Durations,Elapsed)
888

    
889
    print('\nEnding!')
890
            
891
    MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
892
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
893
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
894
    CLLaunch.wait()
895
    cl.enqueue_copy(queue,MyCoM,clCoM)
896
    cl.enqueue_copy(queue,MyPotential,clPotential)
897
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
898
    print('\nCenter Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
899
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
900

    
901
    print("Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n" % (Device,Iterations,np.mean(Durations),np.median(Durations),np.std(Durations),np.min(Durations),np.max(Durations),np.std(Durations)/np.median(Durations)))
902

    
903
    # FPS: 1/Elapsed
904
    FPS=np.ones(len(Durations))
905
    FPS/=Durations
906

    
907
    print("FPS stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" % (Device,Iterations,np.mean(FPS),np.median(FPS),np.std(FPS),np.min(FPS),np.max(FPS)))
908

    
909
    # Contraction of Square*Size*Hertz: Size*Size/Elapsed
910
    Squertz=np.ones(len(Durations))
911
    Squertz*=Number*Number
912
    Squertz/=Durations
913

    
914
    print("Squertz in log10 & complete stats on device %s with %s iterations :\n\tMean:\t%s\t%s\n\tMedian:\t%s\t%s\n\tStddev:\t%s\t%s\n\tMin:\t%s\t%s\n\tMax:\t%s\t%s\n" % (Device,Iterations,np.log10(np.mean(Squertz)),np.mean(Squertz),np.log10(np.median(Squertz)),np.median(Squertz),np.log10(np.std(Squertz)),np.std(Squertz),np.log10(np.min(Squertz)),np.min(Squertz),np.log10(np.max(Squertz)),np.max(Squertz)))
915
        
916
    clDataX.release()
917
    clDataV.release()
918
    clKinetic.release()
919
    clPotential.release()
920