Statistiques
| Révision :

root / NBody / NBodyGL.py @ 171

Historique | Voir | Annoter | Télécharger (24,98 ko)

1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Demonstrateur OpenCL d'interaction NCorps
5

6
Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> CeCILLv2
7
"""
8
import getopt
9
import sys
10
import time
11
import numpy as np
12
import pyopencl as cl
13
import pyopencl.array as cl_array
14
from numpy.random import randint as nprnd
15
import string, sys
16
from OpenGL.GL import *
17
from OpenGL.GLUT import *
18

    
19
def DictionariesAPI():
20
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
21
    Computing={'FP32':0,'FP64':1}
22
    return(Marsaglia,Computing)
23

    
24

    
25

    
26

    
27

    
28

    
29

    
30

    
31

    
32

    
33

    
34

    
35

    
36

    
37

    
38

    
39

    
40

    
41

    
42

    
43

    
44

    
45

    
46

    
47

    
48

    
49

    
50

    
51

    
52

    
53

    
54

    
55

    
56

    
57

    
58

    
59

    
60

    
61

    
62

    
63

    
64

    
65

    
66

    
67

    
68

    
69

    
70

    
71

    
72

    
73

    
74

    
75

    
76

    
77

    
78

    
79

    
80

    
81

    
82

    
83

    
84

    
85

    
86

    
87
BlobOpenCL= """
88
#define TFP32 0
89
#define TFP64 1
90

91
#if TYPE == TFP32
92
#define MYFLOAT4 float4
93
#define MYFLOAT8 float8
94
#define MYFLOAT float
95
#define DISTANCE fast_distance
96
#else
97
#define MYFLOAT4 double4
98
#define MYFLOAT8 double8
99
#define MYFLOAT double
100
#define DISTANCE distance
101
#if defined(cl_khr_fp64)  // Khronos extension available?
102
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
103
#endif
104
#endif
105

106

107
#define znew  ((zmwc=36969*(zmwc&65535)+(zmwc>>16))<<16)
108
#define wnew  ((wmwc=18000*(wmwc&65535)+(wmwc>>16))&65535)
109
#define MWC   (znew+wnew)
110
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
111
#define CONG  (jcong=69069*jcong+1234567)
112
#define KISS  ((MWC^CONG)+SHR3)
113

114

115
#define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f)
116
#define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f)
117
#define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f)
118
#define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f)
119

120
#define PI (MYFLOAT)3.141592653589793238462643197169399375105820974944592307816406286e0f
121

122
#define SMALL_NUM (MYFLOAT)1.e-9f
123

124
#define LENGTH 1.e0f
125

126
// Create my own Distance implementation: distance buggy on Oland AMD chipset
127

128
MYFLOAT MyDistance(MYFLOAT4 n,MYFLOAT4 m)
129
{
130
    private MYFLOAT x2,y2,z2;
131
    x2=n.s0-m.s0;
132
    x2*=x2;
133
    y2=n.s1-m.s1;
134
    y2*=y2;
135
    z2=n.s2-m.s2;
136
    z2*=z2;
137
    return(sqrt(x2+y2+z2));
138
}
139

140
// MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
141
// {
142
//     private MYFLOAT r=MyDistance((MYFLOAT4)n,(MYFLOAT4)m);
143
//
144
//     return(((MYFLOAT4)n-(MYFLOAT4)m)/(MYFLOAT)(r*r*r));
145
// }
146

147
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
148
{
149
    private MYFLOAT r=MyDistance((MYFLOAT4)n,(MYFLOAT4)m);
150
    private MYFLOAT r2=r*r;
151
    private MYFLOAT c=1.e0f/(MYFLOAT)get_global_size(0);
152

153
    return(((MYFLOAT4)n-(MYFLOAT4)m)*(MYFLOAT)(1.e0f-exp(-c*r2))/(MYFLOAT)(r*r2));
154
}
155

156
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
157
{
158
    return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
159

160
//    return((MYFLOAT)(-1.e0f)/(MyDistance(n,m)));
161
}
162

163
MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
164
{
165
    private MYFLOAT potential=(MYFLOAT)0.e0f;
166
    private MYFLOAT4 x=clDataX[gid]; 
167
    
168
    for (int i=0;i<get_global_size(0);i++)
169
    {
170
        if (gid != i)
171
        potential+=PairPotential(x,clDataX[i]);
172
    }
173

174
    barrier(CLK_GLOBAL_MEM_FENCE);
175
    return(potential);
176
}
177

178
MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid)
179
{
180
    return(PairPotential(clDataX[gid],clCoM[0]));
181
}
182

183
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
184

185
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
186
{
187
    private MYFLOAT4 a0,v0,x0,a1,v1,x1,a2,v2,x2,a3,v3,x3,a4,v4,x4,xf,vf;
188
    MYFLOAT4 DT=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
189

190
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
191
    v0=(MYFLOAT4)clDataInV[gid];
192
    x0=(MYFLOAT4)clDataInX[gid];
193
    int N = get_global_size(0);    
194
    
195
    for (private int i=0;i<N;i++)
196
    {
197
        if (gid != i)
198
        a0+=Interaction(x0,clDataInX[i]);
199
    }
200
        
201
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
202
    v1=a0*dt+v0;
203
    x1=v0*dt+x0;
204
    for (private int j=0;j<N;j++)
205
    {
206
        if (gid != j)
207
        a1+=Interaction(x1,clDataInX[j]);
208
    }
209

210
    a2=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
211
    v2=a1*(MYFLOAT)(dt/2.e0f)+v0;
212
    x2=v1*(MYFLOAT)(dt/2.e0f)+x0;
213
    for (private int k=0;k<N;k++)
214
    {
215
        if (gid != k)
216
        a2+=Interaction(x2,clDataInX[k]);
217
    }
218
    
219
    a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
220
    v3=a2*(MYFLOAT)(dt/2.e0f)+v0;
221
    x3=v2*(MYFLOAT)(dt/2.e0f)+x0;
222
    for (private int l=0;l<N;l++)
223
    {
224
        if (gid != l)
225
        a3+=Interaction(x3,clDataInX[l]);
226
    }
227
    
228
    a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
229
    v4=a3*dt+v0;
230
    x4=v3*dt+x0;
231
    for (private int m=0;m<N;m++)
232
    {
233
        if (gid != m)
234
        a4+=Interaction(x4,clDataInX[m]);
235
    }
236
    
237
    xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
238
    vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
239
     
240
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
241
}
242

243
MYFLOAT8 AtomicHeun(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
244
{
245
    private MYFLOAT4 x0,v0,a0,x1,v1,a1,xf,vf;
246
    MYFLOAT4 Dt=dt*(MYFLOAT4)(1.e0f,1.e0f,1.e0f,1.e0f);
247

248
    x0=(MYFLOAT4)clDataInX[gid];
249
    v0=(MYFLOAT4)clDataInV[gid];
250
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
251

252
    for (private int i=0;i<get_global_size(0);i++)
253
    {
254
        if (gid != i)
255
        a0+=Interaction(x0,clDataInX[i]);
256
    }
257

258
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
259
    //v1=v0+dt*a0;
260
    //x1=x0+dt*v0;
261
    v1=dt*a0+v0;
262
    x1=dt*v0+x0;
263

264
    for (private int j=0;j<get_global_size(0);j++)
265
    {
266
        if (gid != j)
267
        a1+=Interaction(x1,clDataInX[j]);
268
    }
269

270
    vf=v0+dt*(a0+a1)/(MYFLOAT)2.e0f;
271
    xf=x0+dt*(v0+v1)/(MYFLOAT)2.e0f;
272

273
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
274
}
275

276
MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
277
{
278
    MYFLOAT4 x0,v0,a,xf,vf;
279

280
    x0=(MYFLOAT4)clDataInX[gid];
281
    v0=(MYFLOAT4)clDataInV[gid];
282
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
283

284
    for (private int i=0;i<get_global_size(0);i++)
285
    {
286
        if (gid != i)
287
          a+=Interaction(x0,clDataInX[i]);
288
    }
289
       
290
    vf=v0+dt*a;
291
    xf=x0+dt*vf;
292

293
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
294
}
295

296
MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
297
{
298
    MYFLOAT4 x0,v0,a,xf,vf;
299

300
    x0=(MYFLOAT4)clDataInX[gid];
301
    v0=(MYFLOAT4)clDataInV[gid];
302
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
303

304
    for (private int i=0;i<get_global_size(0);i++)
305
    {
306
        if (gid != i)
307
        a+=Interaction(x0,clDataInX[i]);
308
    }
309
       
310
    vf=v0+dt*a;
311
    xf=x0+dt*v0;
312
 
313
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,0.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
314
}
315

316
__kernel void InBallSplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT radius, 
317
                             uint seed_z,uint seed_w)
318
{
319
    private int gid=get_global_id(0);
320
    private uint zmwc=seed_z+gid;
321
    private uint wmwc=seed_w+(gid+1)%2;
322
    private MYFLOAT Heat,Radius,Theta,Phi,PosX,PosY,PosZ,SinTheta;
323
 
324
    for (int i=0;i<gid;i++)
325
    {
326
        Heat=MWCfp;
327
    }
328

329
// More accurate distribution based on spherical coordonates
330
// Disactivated because of AMD Oland GPU crash on launch
331
//     Radius=MWCfp*radius;
332
//     Theta=(MYFLOAT)acos((float)(-2.e0f*MWCfp+1.0e0f));
333
//     Phi=(MYFLOAT)(2.e0f*PI*MWCfp);
334
//     SinTheta=sin((float)Theta);
335
//     PosX=cos((float)Phi)*Radius*SinTheta;
336
//     PosY=sin((float)Phi)*Radius*SinTheta;
337
//     PosZ=cos((float)Theta)*Radius;
338
//     clDataX[gid]=(MYFLOAT4)(PosX,PosY,PosZ,0.e0f);
339

340
    MYFLOAT4 Position=(MYFLOAT4)((MWCfp-0.5e0f)*radius,(MWCfp-0.5e0f)*radius,(MWCfp-0.5e0f)*radius,0.e0f);
341
    MYFLOAT Length=(MYFLOAT)length((MYFLOAT4)Position);
342
    clDataX[gid]=Position/Length*fmod(radius,Length);    
343

344
    barrier(CLK_GLOBAL_MEM_FENCE);
345
}
346

347
__kernel void InBoxSplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box, 
348
                             uint seed_z,uint seed_w)
349
{
350
    int gid=get_global_id(0);
351
    uint zmwc=seed_z+gid;
352
    uint wmwc=seed_w-gid;
353
    private MYFLOAT Heat;
354
 
355
    for (int i=0;i<gid;i++)
356
    {
357
        Heat=MWCfp;
358
    }
359

360
    clDataX[gid]=(MYFLOAT4)((MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,(MWCfp-0.5e0f)*box,0.e0f);
361

362
    barrier(CLK_GLOBAL_MEM_FENCE);
363
}
364

365
__kernel void SplutterStress(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
366
{
367
    int gid = get_global_id(0);
368
    MYFLOAT N = (MYFLOAT)get_global_size(0);
369
    uint zmwc=seed_z+(uint)gid;
370
    uint wmwc=seed_w-(uint)gid;
371
    MYFLOAT4 SpeedVector;
372

373
    if (velocity<SMALL_NUM) {
374
       SpeedVector=(MYFLOAT4)normalize(cross(clDataX[gid],clCoM[0]))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
375
    }
376
    else
377
    {    
378
       // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
379
       // Implemention on AMD Oland are probably broken in float
380

381
       MYFLOAT theta=acos((float)(1.0e0f-2.e0f*MWCfp));
382
       MYFLOAT phi=MWCfp*PI*(MYFLOAT)2.e0f;
383
       MYFLOAT sinTheta=sin((float)theta);
384
       MYFLOAT sinPhi=sin((float)phi);
385

386
       SpeedVector=(MYFLOAT4)((MWCfp-0.5e0f)*velocity,(MWCfp-0.5e0f)*velocity,
387
                              (MWCfp-0.5e0f)*velocity,0.e0f);
388
    }
389
    clDataV[gid]=SpeedVector;
390
    barrier(CLK_GLOBAL_MEM_FENCE);
391
}
392

393
__kernel void RungeKutta(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
394
{
395
    private int gid = get_global_id(0);
396
    private MYFLOAT8 clDataGid;
397
    
398
    clDataGid=AtomicRungeKutta(clDataX,clDataV,gid,h);
399
    barrier(CLK_GLOBAL_MEM_FENCE);
400
    clDataX[gid]=clDataGid.s0123;
401
    clDataV[gid]=clDataGid.s4567;
402
}
403

404
__kernel void Heun(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
405
{
406
    private int gid = get_global_id(0);
407
    private MYFLOAT8 clDataGid;
408
    
409
    clDataGid=AtomicHeun(clDataX,clDataV,gid,h);
410
    barrier(CLK_GLOBAL_MEM_FENCE);
411
    clDataX[gid]=clDataGid.s0123;
412
    clDataV[gid]=clDataGid.s4567;
413
}
414

415
__kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
416
{
417
    private int gid = get_global_id(0);
418
    private MYFLOAT8 clDataGid;
419
    
420
    clDataGid=AtomicImplicitEuler(clDataX,clDataV,gid,h);
421
    barrier(CLK_GLOBAL_MEM_FENCE);
422
    clDataX[gid]=clDataGid.s0123;
423
    clDataV[gid]=clDataGid.s4567;
424
}
425

426
__kernel void ExplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
427
{
428
    private int gid = get_global_id(0);
429
    private MYFLOAT8 clDataGid;    
430

431
    clDataGid=AtomicExplicitEuler(clDataX,clDataV,gid,h);
432
    barrier(CLK_GLOBAL_MEM_FENCE);
433
    clDataX[gid]=clDataGid.s0123;
434
    clDataV[gid]=clDataGid.s4567;
435
}
436

437
__kernel void CoMPotential(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
438
{
439
    int gid = get_global_id(0);
440

441
    clPotential[gid]=PairPotential(clDataX[gid],clCoM[0]);
442
}
443

444
__kernel void Potential(__global MYFLOAT4* clDataX,__global MYFLOAT* clPotential)
445
{
446
    int gid = get_global_id(0);
447

448
    MYFLOAT potential=(MYFLOAT)0.e0f;
449
    MYFLOAT4 x=clDataX[gid]; 
450
    
451
    for (int i=0;i<get_global_size(0);i++)
452
    {
453
        if (gid != i)
454
        potential+=PairPotential(x,clDataX[i]);
455
    }
456
                 
457
    barrier(CLK_GLOBAL_MEM_FENCE);
458
    clPotential[gid]=potential*(MYFLOAT)5.e-1f;
459
}
460

461
__kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int Size)
462
{
463
    MYFLOAT4 CoM=clDataX[0]; 
464

465
    for (int i=1;i<Size;i++)
466
    {
467
        CoM+=clDataX[i];
468
    }
469

470
    barrier(CLK_GLOBAL_MEM_FENCE);
471
    clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,0.e0f)/(MYFLOAT)Size;
472
}
473

474
__kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
475
{
476
    int gid = get_global_id(0);
477
    
478
    barrier(CLK_GLOBAL_MEM_FENCE);
479
    MYFLOAT d=(MYFLOAT)length(clDataV[gid]);
480
    clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)(d*d);
481
}
482

483
"""
484

    
485
def MainOpenCL(clDataX,clDataV,Step,Method):
486
    time_start=time.time()
487
    if Method=="RungeKutta":
488
        CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clDataX,clDataV,Step)
489
    elif Method=="ExplicitEuler":
490
        CLLaunch=MyRoutines.ExplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
491
    elif Method=="Heun":
492
        CLLaunch=MyRoutines.Heun(queue,(Number,1),None,clDataX,clDataV,Step)
493
    else:
494
        CLLaunch=MyRoutines.ImplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
495
    CLLaunch.wait()
496
    Elapsed=time.time()-time_start
497
    return(Elapsed)
498
    
499
def display(*args):
500

    
501
    global MyDataX,MyDataV,clDataX,clDataV,Step,Method,Number,Iterations,Durations,Verbose,SpeedRendering
502
    
503
    glClearColor(0.0, 0.0, 0.0, 0.0)
504
    glClear(GL_COLOR_BUFFER_BIT)
505
    glColor3f(1.0,1.0,1.0)
506
    
507
    Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
508
    if SpeedRendering:
509
        cl.enqueue_copy(queue, MyDataV, clDataV)
510
        MyDataV.reshape(Number,4)[:,3]=1
511
        glVertexPointerf(MyDataV.reshape(Number,4))
512
    else:
513
        cl.enqueue_copy(queue, MyDataX, clDataX)
514
        MyDataX.reshape(Number,4)[:,3]=1
515
        glVertexPointerf(MyDataX.reshape(Number,4))
516

    
517
    if Verbose:
518
        print("Duration of #%s iteration: %s" % (Iterations,Elapsed))
519
    else:
520
        sys.stdout.write('.')
521
        sys.stdout.flush()
522
    Durations=np.append(Durations,MainOpenCL(clDataX,clDataV,Step,Method))    
523
    glEnableClientState(GL_VERTEX_ARRAY)
524
    glDrawArrays(GL_POINTS, 0, Number)
525
    glDisableClientState(GL_VERTEX_ARRAY)
526
    glFlush()
527
    Iterations+=1
528
    glutSwapBuffers()
529

    
530
def halt():
531
    pass
532

    
533
def keyboard(k,x,y):
534
    global ViewRZ,SizeOfBox,SpeedRendering
535
    LC_Z = as_8_bit( 'z' )
536
    UC_Z = as_8_bit( 'Z' )
537
    Plus = as_8_bit( '+' )
538
    Minus = as_8_bit( '-' )
539
    Switch = as_8_bit( 's' )
540

    
541
    Zoom=1
542
    if k == LC_Z:
543
        ViewRZ += 1.0
544
    elif k == UC_Z:
545
        ViewRZ -= 1.0
546
    elif k == Plus:
547
        Zoom *= 2.0
548
    elif k == Minus:
549
        Zoom /= 2.0
550
    elif k == Switch:
551
        if SpeedRendering:
552
            SpeedRendering=False
553
        else:
554
            SpeedRendering=True
555
    elif ord(k) == 27: # Escape
556
        glutLeaveMainLoop()
557
        return(False)
558
    else:
559
        return
560
    glRotatef(ViewRZ, 0.0, 0.0, 1.0)
561
    glScalef(Zoom,Zoom,Zoom)
562
    glutPostRedisplay()
563

    
564
def special(k,x,y):
565
    global ViewRX, ViewRY, ViewRZ
566

    
567
    if k == GLUT_KEY_UP:
568
        ViewRX += 1.0
569
    elif k == GLUT_KEY_DOWN:
570
        ViewRX -= 1.0
571
    elif k == GLUT_KEY_LEFT:
572
        ViewRY += 1.0
573
    elif k == GLUT_KEY_RIGHT:
574
        ViewRY -= 1.0
575
    else:
576
        return
577
    glRotatef(ViewRX, 1.0, 0.0, 0.0)
578
    glRotatef(ViewRY, 0.0, 1.0, 0.0)
579
    glutPostRedisplay()
580

    
581
def setup_viewport():
582
    global SizeOfBox
583
    glMatrixMode(GL_PROJECTION)
584
    glLoadIdentity()
585
    glOrtho(-SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox)
586
    glutPostRedisplay()
587
    
588
def reshape(w, h):
589
    glViewport(0, 0, w, h)
590
    setup_viewport()
591

    
592
if __name__=='__main__':
593

    
594
    global Number,Step,clDataX,clDataV,MyDataX,MyDataV,Method,SizeOfBox,Iterations,Verbose,Durations
595
    
596
    # ValueType
597
    ValueType='FP32'
598
    class MyFloat(np.float32):pass
599
    #    clType8=cl_array.vec.float8
600
    # Set defaults values
601
    np.set_printoptions(precision=2)  
602
    # Id of Device : 1 is for first find !
603
    Device=0
604
    # Number of bodies is integer
605
    Number=2
606
    # Number of iterations (for standalone execution)
607
    Iterations=100
608
    # Size of shape
609
    SizeOfShape=MyFloat(1.)
610
    # Initial velocity of particules
611
    Velocity=MyFloat(1.)
612
    # Step
613
    Step=MyFloat(1./32)
614
    # Method of integration
615
    Method='ImplicitEuler'
616
    # InitialRandom
617
    InitialRandom=False
618
    # RNG Marsaglia Method
619
    RNG='MWC'
620
    # Viriel Distribution of stress
621
    VirielStress=True
622
    # Verbose
623
    Verbose=False
624
    # OpenGL real time rendering
625
    OpenGL=False
626
    # Speed rendering
627
    SpeedRendering=False
628
    # Shape to distribute
629
    Shape='Box'
630
    
631
    HowToUse='%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -d <DeviceId> -p [SpeedRendering] -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
632

    
633
    try:
634
        opts, args = getopt.getopt(sys.argv[1:],"rpgehd:n:i:z:v:s:m:t:b:",["random","rendering","opengl","viriel","device=","number=","iterations=","size=","velocity=","step=","method=","valuetype=","shape="])
635
    except getopt.GetoptError:
636
        print(HowToUse % sys.argv[0])
637
        sys.exit(2)
638

    
639
    for opt, arg in opts:
640
        if opt == '-h':
641
            print(HowToUse % sys.argv[0])
642

    
643
            print("\nInformations about devices detected under OpenCL:")
644
            try:
645
                Id=0
646
                for platform in cl.get_platforms():
647
                    for device in platform.get_devices():
648
                        # Failed now because of POCL implementation
649
                        #deviceType=cl.device_type.to_string(device.type)
650
                        deviceType="xPU"
651
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
652
                        Id=Id+1
653
                sys.exit()
654
            except ImportError:
655
                print("Your platform does not seem to support OpenCL")
656
                sys.exit()
657

    
658
        elif opt in ("-t", "--valuetype"):
659
            if arg=='FP64':
660
                class MyFloat(np.float64): pass
661
            else:
662
                class MyFloat(np.float32):pass
663
            ValueType = arg
664
        elif opt in ("-d", "--device"):
665
            Device=int(arg)
666
        elif opt in ("-m", "--method"):
667
            Method=arg
668
        elif opt in ("-b", "--shape"):
669
            Shape=arg
670
        elif opt in ("-n", "--number"):
671
            Number=int(arg)
672
        elif opt in ("-i", "--iterations"):
673
            Iterations=int(arg)
674
        elif opt in ("-z", "--size"):
675
            SizeOfShape=MyFloat(arg)
676
        elif opt in ("-v", "--velocity"):
677
            Velocity=MyFloat(arg)
678
            VirielStress=False
679
        elif opt in ("-s", "--step"):
680
            Step=MyFloat(arg)
681
        elif opt in ("-r", "--random"):
682
            InitialRandom=True
683
        elif opt in ("-c", "--check"):
684
            CheckEnergies=True
685
        elif opt in ("-e", "--viriel"):
686
            VirielStress=True
687
        elif opt in ("-g", "--opengl"):
688
            OpenGL=True
689
        elif opt in ("-p", "--rendering"):
690
            SpeedRendering=True
691
                        
692
    SizeOfShape=MyFloat(SizeOfShape*Number)
693
    Velocity=MyFloat(Velocity)
694
    Step=MyFloat(Step)
695
                
696
    print("Device choosed : %s" % Device)
697
    print("Number of particules : %s" % Number)
698
    print("Size of Shape : %s" % SizeOfShape)
699
    print("Initial velocity : %s" % Velocity)
700
    print("Step of iteration : %s" % Step)
701
    print("Number of iterations : %s" % Iterations)
702
    print("Method of resolution : %s" % Method)
703
    print("Initial Random for RNG Seed : %s" % InitialRandom)
704
    print("ValueType is : %s" % ValueType)
705
    print("Viriel distribution of stress : %s" % VirielStress)
706
    print("OpenGL real time rendering : %s" % OpenGL)
707
    print("Speed rendering : %s" % SpeedRendering)
708

    
709
    # Create Numpy array of CL vector with 8 FP32    
710
    MyCoM = np.zeros(4,dtype=MyFloat)
711
    MyDataX = np.zeros(Number*4, dtype=MyFloat)
712
    MyDataV = np.zeros(Number*4, dtype=MyFloat)
713
    MyPotential = np.zeros(Number, dtype=MyFloat)
714
    MyKinetic = np.zeros(Number, dtype=MyFloat)
715

    
716
    Marsaglia,Computing=DictionariesAPI()
717

    
718
    # Scan the OpenCL arrays
719
    Id=0
720
    HasXPU=False
721
    for platform in cl.get_platforms():
722
        for device in platform.get_devices():
723
            if Id==Device:
724
                PlatForm=platform
725
                XPU=device
726
                print("CPU/GPU selected: ",device.name.lstrip())
727
                print("Platform selected: ",platform.name)
728
                HasXPU=True
729
            Id+=1
730

    
731
    if HasXPU==False:
732
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
733
        sys.exit()      
734

    
735
    # Create Context
736
    try:
737
        ctx = cl.Context([XPU])
738
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
739
    except:
740
        print("Crash during context creation")
741

    
742
    # Build all routines used for the computing
743

    
744
    #BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
745
    BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
746

    
747
    if 'Intel' in PlatForm.name or 'Experimental' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name :
748
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
749
    else:
750
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions+" -cl-strict-aliasing")
751
        
752
    mf = cl.mem_flags
753
    # Read/Write approach for buffering
754
    clDataX = cl.Buffer(ctx, mf.READ_WRITE, MyDataX.nbytes)
755
    clDataV = cl.Buffer(ctx, mf.READ_WRITE, MyDataV.nbytes)
756
    clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
757
    clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
758
    clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
759
    
760
    # Write/HostPointer approach for buffering
761
    # clDataX = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataX)
762
    # clDataV = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataV)
763
    # clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential)
764
    # clKinetic = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyKinetic)
765
    # clCoM = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyCoM)
766

    
767
    print('All particles superimposed.')
768

    
769
    # Set particles to RNG points
770
    if InitialRandom:
771
        seed_w=np.uint32(nprnd(2**32))
772
        seed_z=np.uint32(nprnd(2**32))
773
    else:
774
        seed_w=np.uint32(19710211)
775
        seed_z=np.uint32(20081010)
776
            
777
    if Shape=='Ball':
778
        MyRoutines.InBallSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
779
    else:
780
        MyRoutines.InBoxSplutterPoints(queue,(Number,1),None,clDataX,SizeOfShape,seed_w,seed_z)
781

    
782
    print('All particules distributed')
783

    
784
    CLLaunch=MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
785
    CLLaunch.wait()
786
    cl.enqueue_copy(queue,MyCoM,clCoM)
787
    print('Center Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
788
    
789
    if VirielStress:
790
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,MyFloat(0.),np.uint32(110271),np.uint32(250173))
791
    else:
792
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,Velocity,np.uint32(110271),np.uint32(250173))
793
    CLLaunch.wait()
794

    
795
    print('All particules stressed')
796

    
797
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
798
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
799
    CLLaunch.wait()
800
    cl.enqueue_copy(queue,MyPotential,clPotential)
801
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
802
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
803

    
804
    if SpeedRendering:
805
        SizeOfBox=max(2*MyKinetic)
806
    else:
807
        SizeOfBox=SizeOfShape        
808
    
809
    wall_time_start=time.time()
810

    
811
    Durations=np.array([],dtype=MyFloat)
812
    print('Starting!')
813
    if OpenGL:
814
        global ViewRX,ViewRY,ViewRZ
815
        Iterations=0
816
        ViewRX,ViewRY,ViewRZ = 0.,0.,0.
817
        # Launch OpenGL Loop
818
        glutInit(sys.argv)
819
        glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB)
820
        glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE,GLUT_ACTION_CONTINUE_EXECUTION)
821
        glutInitWindowSize(512,512)
822
        glutCreateWindow(b'NBodyGL')
823
        setup_viewport()
824
        glutReshapeFunc(reshape)
825
        glutDisplayFunc(display)
826
        glutIdleFunc(display)
827
        #   glutMouseFunc(mouse)
828
        glutSpecialFunc(special)
829
        glutKeyboardFunc(keyboard)
830
        glutMainLoop()
831
    else:
832
        for iteration in range(Iterations):
833
            Elapsed=MainOpenCL(clDataX,clDataV,Step,Method)
834
            if Verbose:
835
                print("Duration of #%s iteration: %s" % (Iterations,Elapsed))
836
            else:
837
                sys.stdout.write('.')
838
                sys.stdout.flush()
839
            Durations=np.append(Durations,Elapsed)
840

    
841
    print('\nEnding!')
842
            
843
    MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
844
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
845
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
846
    CLLaunch.wait()
847
    cl.enqueue_copy(queue,MyCoM,clCoM)
848
    cl.enqueue_copy(queue,MyPotential,clPotential)
849
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
850
    print('\nCenter Of Mass estimated: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
851
    print('Energy estimated: Viriel=%s Potential=%s Kinetic=%s\n'% (np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
852

    
853
    print("Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n" % (Device,Iterations,np.mean(Durations),np.median(Durations),np.std(Durations),np.min(Durations),np.max(Durations),np.std(Durations)/np.median(Durations)))
854

    
855
    # Contraction of Square*Size*Hertz: Size*Size/Elapsed
856
    Squertz=np.ones(len(Durations))
857
    Squertz*=Number*Number
858
    Squertz/=Durations
859

    
860
    print("Squertz in log10 stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" % (Device,Iterations,np.log10(np.mean(Squertz)),np.log10(np.median(Squertz)),np.log10(np.std(Squertz)),np.log10(np.min(Squertz)),np.log10(np.max(Squertz))))
861
        
862
    clDataX.release()
863
    clDataV.release()
864
    clKinetic.release()
865
    clPotential.release()
866