Statistiques
| Révision :

root / NBody / NBodyGL.py @ 162

Historique | Voir | Annoter | Télécharger (23,02 ko)

1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Demonstrateur OpenCL d'interaction NCorps
5

6
Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> CeCILLv2
7
"""
8
import getopt
9
import sys
10
import time
11
import numpy as np
12
import pyopencl as cl
13
import pyopencl.array as cl_array
14
from numpy.random import randint as nprnd
15
import string, sys
16
from OpenGL.GL import *
17
from OpenGL.GLUT import *
18

    
19
def DictionariesAPI():
20
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
21
    Computing={'FP32':0,'FP64':1}
22
    return(Marsaglia,Computing)
23

    
24
BlobOpenCL= """
25
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
26
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
27
#define MWC   (znew+wnew)
28
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
29
#define CONG  (jcong=69069*jcong+1234567)
30
#define KISS  ((MWC^CONG)+SHR3)
31

32
#define TFP32 0
33
#define TFP64 1
34

35
#define MWCfp MWC * 2.3283064365386963e-10f
36
#define KISSfp KISS * 2.3283064365386963e-10f
37
#define SHR3fp SHR3 * 2.3283064365386963e-10f
38
#define CONGfp CONG * 2.3283064365386963e-10f
39

40
#define PI 3.141592653589793238462643197169399375105820974944592307816406286e0f
41

42
#define SMALL_NUM 1.e-9f
43

44
#define LENGTH 1.e0f
45

46
#if TYPE == TFP32
47
#define MYFLOAT4 float4
48
#define MYFLOAT8 float8
49
#define MYFLOAT float
50
#define DISTANCE fast_distance
51
#else
52
#if defined(cl_khr_fp64)  // Khronos extension available?
53
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
54
#define DOUBLE_SUPPORT_AVAILABLE
55
#elif defined(cl_amd_fp64)  // AMD extension available?
56
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
57
#define DOUBLE_SUPPORT_AVAILABLE
58
#endif
59
#define MYFLOAT4 double4
60
#define MYFLOAT8 double8
61
#define MYFLOAT double
62
#define DISTANCE distance
63
#endif
64

65

66
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
67
{
68
    private MYFLOAT r=DISTANCE(n,m);
69

70
    return((n-m)/(MYFLOAT)(r*r*r));
71
}
72

73
MYFLOAT4 InteractionCore(MYFLOAT4 m,MYFLOAT4 n)
74
{
75
    private MYFLOAT core=(MYFLOAT)1.e5f;
76
    private MYFLOAT r=DISTANCE(n,m);
77
    private MYFLOAT d=r*r+core*core;
78

79
    return(core*(n-m)/(MYFLOAT)(d*d));
80
}
81

82
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
83
{
84
    return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
85
}
86

87
MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
88
{
89
    private MYFLOAT potential=(MYFLOAT)0.e0f;
90
    private MYFLOAT4 x=clDataX[gid]; 
91
    
92
    for (int i=0;i<get_global_size(0);i++)
93
    {
94
        if (gid != i)
95
        potential+=PairPotential(x,clDataX[i]);
96
    }
97

98
    barrier(CLK_GLOBAL_MEM_FENCE);
99
    return(potential);
100
}
101

102
MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid)
103
{
104
    return(PairPotential(clDataX[gid],clCoM[0]));
105
}
106

107
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
108
{
109
    private MYFLOAT4 a0,v0,x0,a1,v1,x1,a2,v2,x2,a3,v3,x3,a4,v4,x4,xf,vf;
110

111
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
112
    v0=(MYFLOAT4)clDataInV[gid];
113
    x0=(MYFLOAT4)clDataInX[gid];
114
    int N = get_global_size(0);    
115
    
116
    for (int i=0;i<N;i++)
117
    {
118
        if (gid != i)
119
        a0+=Interaction(x0,clDataInX[i]);
120
    }
121
        
122
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
123
    v1=v0+a0*dt;
124
    x1=x0+v0*dt;
125
    for (int i=0;i<N;i++)
126
    {
127
        if (gid != i)
128
        a1+=Interaction(x1,clDataInX[i]);
129
    }
130

131
    a2=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
132
    v2=v0+a1*dt*(MYFLOAT)5.e-1f;
133
    x2=x0+v1*dt*(MYFLOAT)5.e-1f;
134
    for (int i=0;i<N;i++)
135
    {
136
        if (gid != i)
137
        a2+=Interaction(x2,clDataInX[i]);
138
    }
139
    
140
    a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
141
    v3=v0+a2*dt*(MYFLOAT)5.e-1f;
142
    x3=x0+v2*dt*(MYFLOAT)5.e-1f;
143
    for (int i=0;i<N;i++)
144
    {
145
        if (gid != i)
146
        a3+=Interaction(x3,clDataInX[i]);
147
    }
148
    
149
    a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
150
    v4=v0+a3*dt;
151
    x4=x0+v3*dt;
152
    for (int i=0;i<N;i++)
153
    {
154
        if (gid != i)
155
        a4+=Interaction(x4,clDataInX[i]);
156
    }
157
    
158
    xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
159
    vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
160
     
161
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,1.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
162
}
163

164
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
165

166
MYFLOAT8 AtomicHeun(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
167
{
168
    private MYFLOAT4 x,v,a,xi,vi,ai,xf,vf;
169

170
    x=(MYFLOAT4)clDataInX[gid];
171
    v=(MYFLOAT4)clDataInV[gid];
172
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
173

174
    for (int i=0;i<get_global_size(0);i++)
175
    {
176
        if (gid != i)
177
        a+=Interaction(x,clDataInX[i]);
178
    }
179

180
    vi=v+dt*a;
181
    xi=x+dt*vi;
182
    ai=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
183

184
    for (int i=0;i<get_global_size(0);i++)
185
    {
186
        if (gid != i)
187
        ai+=Interaction(xi,clDataInX[i]);
188
    }
189

190
    vf=v+dt*(a+ai)/(MYFLOAT)2.e0f;
191
    xf=x+dt*(v+vi)/(MYFLOAT)2.e0f;
192

193
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,1.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
194
}
195

196
MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
197
{
198
    private MYFLOAT4 x,v,a,xf,vf;
199

200
    x=(MYFLOAT4)clDataInX[gid];
201
    v=(MYFLOAT4)clDataInV[gid];
202
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
203

204
    for (int i=0;i<get_global_size(0);i++)
205
    {
206
        if (gid != i)
207
        a+=Interaction(x,clDataInX[i]);
208
    }
209
       
210
    vf=v+dt*a;
211
    xf=x+dt*vf;
212
 
213
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,1.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
214
}
215

216
MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
217
{
218
    MYFLOAT4 x,v,a,xf,vf;
219

220
    x=(MYFLOAT4)clDataInX[gid];
221
    v=(MYFLOAT4)clDataInV[gid];
222
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
223

224
    for (int i=0;i<get_global_size(0);i++)
225
    {
226
        if (gid != i)
227
        a+=Interaction(x,clDataInX[i]);
228
    }
229
       
230
    vf=v+dt*a;
231
    xf=x+dt*v;
232
 
233
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,1.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
234
}
235

236
__kernel void SplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box, 
237
                             uint seed_z,uint seed_w)
238
{
239
    int gid=get_global_id(0);
240
    uint z=seed_z+gid;
241
    uint w=seed_w+gid;
242
     
243
    for (int i=0;i<gid;i++)
244
    {
245
        private MYFLOAT heat=MWCfp;
246
    }
247
 
248
    // Distribute in sphere
249
    MYFLOAT radius=MWCfp*box;
250
    MYFLOAT theta=MWCfp*PI;
251
    MYFLOAT phi=MWCfp*PI*(MYFLOAT)2.e0f;
252
    // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
253
    MYFLOAT sinTheta=sin((float)theta);    
254
    clDataX[gid].s0=radius*sinTheta*cos((float)phi)/2.e0f;
255
    clDataX[gid].s1=radius*sinTheta*sin((float)phi)/2.e0f;
256
    clDataX[gid].s2=radius*cos((float)theta)/2.e0f;
257
    clDataX[gid].s3=(MYFLOAT)0.e0f;
258
    barrier(CLK_GLOBAL_MEM_FENCE);
259

260
    // Distribute in box
261
    // MYFLOAT x0=box*(MYFLOAT)(MWCfp-0.5);
262
    // MYFLOAT y0=box*(MYFLOAT)(MWCfp-0.5);
263
    // MYFLOAT z0=box*(MYFLOAT)(MWCfp-0.5);
264
    // clDataX[gid].s0123 = (MYFLOAT4) (x0,y0,z0,0.e0f);
265
}
266

267
__kernel void SplutterStress(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
268
{
269
    int gid = get_global_id(0);
270
    MYFLOAT N = (MYFLOAT)get_global_size(0);
271
    uint z=seed_z+(uint)gid;
272
    uint w=seed_w-(uint)gid;
273

274
    if (velocity<SMALL_NUM) {
275
       MYFLOAT4 SpeedVector=(MYFLOAT4)normalize(cross(clDataX[gid],clCoM[0]))*sqrt(-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f);
276
       clDataV[gid]=SpeedVector;
277
    }
278
    else
279
    {    
280
       // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
281
       MYFLOAT theta=MWCfp*PI;
282
       MYFLOAT phi=MWCfp*PI*(MYFLOAT)2.e0f;
283
       MYFLOAT sinTheta=sin((float)theta);
284

285
       clDataV[gid].s0=velocity*sinTheta*cos((float)phi);
286
       clDataV[gid].s1=velocity*sinTheta*sin((float)phi);
287
       clDataV[gid].s2=velocity*cos((float)theta);
288
       clDataV[gid].s3=(MYFLOAT)0.e0f;
289
    }
290
    barrier(CLK_GLOBAL_MEM_FENCE);
291
}
292

293
__kernel void RungeKutta(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
294
{
295
    int gid = get_global_id(0);
296
    
297
    MYFLOAT8 clDataGid=AtomicRungeKutta(clDataX,clDataV,gid,h);
298
    barrier(CLK_GLOBAL_MEM_FENCE);
299
    clDataX[gid]=clDataGid.lo;
300
    clDataV[gid]=clDataGid.hi;
301
}
302

303
__kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
304
{
305
    int gid = get_global_id(0);
306
    
307
    MYFLOAT8 clDataGid=AtomicImplicitEuler(clDataX,clDataV,gid,h);
308
    barrier(CLK_GLOBAL_MEM_FENCE);
309
    clDataX[gid]=clDataGid.lo;
310
    clDataV[gid]=clDataGid.hi;
311
}
312

313
__kernel void Heun(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
314
{
315
    int gid = get_global_id(0);
316
    
317
    MYFLOAT8 clDataGid=AtomicHeun(clDataX,clDataV,gid,h);
318
    barrier(CLK_GLOBAL_MEM_FENCE);
319
    clDataX[gid]=clDataGid.lo;
320
    clDataV[gid]=clDataGid.hi;
321
}
322

323
__kernel void ExplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
324
{
325
    int gid = get_global_id(0);
326
    
327
    MYFLOAT8 clDataGid=AtomicExplicitEuler(clDataX,clDataV,gid,h);
328
    barrier(CLK_GLOBAL_MEM_FENCE);
329
    clDataX[gid]=clDataGid.lo;
330
    clDataV[gid]=clDataGid.hi;
331
}
332

333
__kernel void CoMPotential(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
334
{
335
    int gid = get_global_id(0);
336

337
    clPotential[gid]=PairPotential(clDataX[gid],clCoM[0]);
338
}
339

340
__kernel void Potential(__global MYFLOAT4* clDataX,__global MYFLOAT* clPotential)
341
{
342
    int gid = get_global_id(0);
343

344
    MYFLOAT potential=(MYFLOAT)0.e0f;
345
    MYFLOAT4 x=clDataX[gid]; 
346
    
347
    for (int i=0;i<get_global_size(0);i++)
348
    {
349
        if (gid != i)
350
        potential+=PairPotential(x,clDataX[i]);
351
    }
352
                 
353
    barrier(CLK_GLOBAL_MEM_FENCE);
354
    clPotential[gid]=potential*(MYFLOAT)5.e-1f;
355
}
356

357
__kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int Size)
358
{
359
    MYFLOAT4 CoM=clDataX[0]; 
360

361
    for (int i=1;i<Size;i++)
362
    {
363
        CoM+=clDataX[i];
364
    }
365

366
    barrier(CLK_GLOBAL_MEM_FENCE);
367
    clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,1.e0f)/(MYFLOAT)Size;
368
}
369

370
__kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
371
{
372
    int gid = get_global_id(0);
373
    
374
    barrier(CLK_GLOBAL_MEM_FENCE);
375
    MYFLOAT d=(MYFLOAT)length(clDataV[gid]);
376
    clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)(d*d);
377
}
378
"""
379

    
380
def display(*args):
381

    
382
    global MyDataX,MyDataV,clDataX,clDataV,Step,Method,Number,Iterations
383
    
384
    glClearColor(0.0, 0.0, 0.0, 0.0)
385
    glClear(GL_COLOR_BUFFER_BIT)
386
    glColor3f(1.0,1.0,1.0)
387
    
388
    time_start=time.time()
389
    if Method=="RungeKutta":            
390
        CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clDataX,clDataV,Step)
391
    elif Method=="ExplicitEuler":
392
        CLLaunch=MyRoutines.ExplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
393
    elif Method=="Heun":
394
        CLLaunch=MyRoutines.Heun(queue,(Number,1),None,clDataX,clDataV,Step)
395
    else:
396
        CLLaunch=MyRoutines.ImplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
397
    CLLaunch.wait()
398
    print("Duration of #%s iteration: %s" % (Iterations,(time.time()-time_start)))
399

    
400
    cl.enqueue_copy(queue, MyDataX, clDataX)
401
    # print(MyDataX.reshape(Number,4))
402
    MyDataX.reshape(Number,4)[:,3]=1
403
    glVertexPointerf(MyDataX.reshape(Number,4))
404
    # cl.enqueue_copy(queue, MyDataV, clDataV)
405
    # print(MyDataV.reshape(Number,4))
406
    # MyDataV.reshape(Number,4)[:,3]=1
407
    # glVertexPointerf(MyDataV.reshape(Number,4))
408
    glEnableClientState(GL_VERTEX_ARRAY)
409
    glDrawArrays(GL_POINTS, 0, Number)
410
    glDisableClientState(GL_VERTEX_ARRAY)
411
    glFlush()
412
    Iterations+=1
413
    glutSwapBuffers()
414

    
415
def halt():
416
    pass
417

    
418
def keyboard(k, x, y):
419
    global view_rotz
420
    LC_Z = as_8_bit( 'z' )
421
    UC_Z = as_8_bit( 'Z' )
422

    
423
    if k == LC_Z:
424
        view_rotz += 1.0
425
    elif k == UC_Z:
426
        view_rotz -= 1.0
427
    elif ord(k) == 27: # Escape
428
        glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE,GLUT_ACTION_CONTINUE_EXECUTION)
429
        glutSetOption(GLUT_ACTION_GLUTMAINLOOP_RETURNS,GLUT_ACTION_CONTINUE_EXECUTION) 
430
        glutLeaveMainLoop()
431
        return(False)
432
    else:
433
        return
434
    glRotatef(view_rotz, 0.0, 0.0, 1.0)
435
    glutPostRedisplay()
436

    
437
def special(k, x, y):
438
    global view_rotx, view_roty, view_rotz
439

    
440
    if k == GLUT_KEY_UP:
441
        view_rotx += 1.0
442
    elif k == GLUT_KEY_DOWN:
443
        view_rotx -= 1.0
444
    elif k == GLUT_KEY_LEFT:
445
        view_roty += 1.0
446
    elif k == GLUT_KEY_RIGHT:
447
        view_roty -= 1.0
448
    else:
449
        return
450
    glRotatef(view_rotx, 1.0, 0.0, 0.0)
451
    glRotatef(view_roty, 0.0, 1.0, 0.0)
452
    glutPostRedisplay()
453

    
454
def mouse(button, state, x, y):
455
    global angle, delta_angle, halted
456
    if button == GLUT_LEFT_BUTTON:
457
        angle = angle + delta_angle
458
    elif button == GLUT_RIGHT_BUTTON:
459
        angle = angle - delta_angle
460
    elif button == GLUT_MIDDLE_BUTTON and state == GLUT_DOWN:
461
        if halted:
462
            glutIdleFunc(display)
463
            halted = 0
464
        else:
465
            glutIdleFunc(halt)
466
            halted = 1
467

    
468
def setup_viewport():
469
    global SizeOfBox
470
    glMatrixMode(GL_PROJECTION)
471
    glLoadIdentity()
472
    glOrtho(-SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox, -SizeOfBox, SizeOfBox)
473
    
474
def reshape(w, h):
475
    glViewport(0, 0, w, h)
476
    setup_viewport()
477

    
478
if __name__=='__main__':
479

    
480
    global Number,Step,clDataX,clDataV,MyDataX,MyDataV,Method,SizeOfBox
481
    
482
    # ValueType
483
    ValueType='FP32'
484
    class MyFloat(np.float32):pass
485
    #    clType8=cl_array.vec.float8
486
    clType4=cl_array.vec.float4
487
    # Set defaults values
488
    np.set_printoptions(precision=2)  
489
    # Id of Device : 1 is for first find !
490
    Device=0
491
    # Iterations is integer
492
    Number=2
493
    # Size of box
494
    SizeOfBox=MyFloat(1.)
495
    # Initial velocity of particules
496
    Velocity=MyFloat(1.)
497
    # Redo the last process
498
    Iterations=int(np.pi*1024)
499
    # Step
500
    Step=MyFloat(1./256)
501
    # Method of integration
502
    Method='ImplicitEuler'
503
    # InitialRandom
504
    InitialRandom=False
505
    # RNG Marsaglia Method
506
    RNG='MWC'
507
    # CheckEnergies
508
    CheckEnergies=False
509
    # Display samples in 3D
510
    GraphSamples=False
511
    # Viriel Distribution of stress
512
    VirielStress=True
513
    
514
    HowToUse='%s -h [Help] -r [InitialRandom] -e [VirielStress] -g [GraphSamples] -c [CheckEnergies] -d <DeviceId> -n <NumberOfParticules> -z <SizeOfBox> -v <Velocity> -s <Step> -i <Iterations> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
515

    
516
    try:
517
        opts, args = getopt.getopt(sys.argv[1:],"rehgcd:n:z:v:i:s:m:t:",["random","viriel","graph","check","device=","number=","size=","velocity=","iterations=","step=","method=","valuetype="])
518
    except getopt.GetoptError:
519
        print(HowToUse % sys.argv[0])
520
        sys.exit(2)
521

    
522
    for opt, arg in opts:
523
        if opt == '-h':
524
            print(HowToUse % sys.argv[0])
525

    
526
            print("\nInformations about devices detected under OpenCL:")
527
            try:
528
                Id=0
529
                for platform in cl.get_platforms():
530
                    for device in platform.get_devices():
531
                        #deviceType=cl.device_type.to_string(device.type)
532
                        deviceType="xPU"
533
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
534
                        Id=Id+1
535
                sys.exit()
536
            except ImportError:
537
                print("Your platform does not seem to support OpenCL")
538
                sys.exit()
539

    
540
        elif opt in ("-t", "--valuetype"):
541
            if arg=='FP64':
542
                class MyFloat(np.float64): pass
543
                clType4=cl_array.vec.double4
544
            else:
545
                class MyFloat(np.float32):pass
546
                clType4=cl_array.vec.float4
547
            ValueType = arg
548
        elif opt in ("-d", "--device"):
549
            Device=int(arg)
550
        elif opt in ("-m", "--method"):
551
            Method=arg
552
        elif opt in ("-n", "--number"):
553
            Number=int(arg)
554
        elif opt in ("-z", "--size"):
555
            SizeOfBox=MyFloat(arg)
556
        elif opt in ("-v", "--velocity"):
557
            Velocity=MyFloat(arg)
558
            VirielStress=False
559
        elif opt in ("-s", "--step"):
560
            Step=MyFloat(arg)
561
        elif opt in ("-i", "--iterations"):
562
            Iterations=int(arg)
563
        elif opt in ("-r", "--random"):
564
            InitialRandom=True
565
        elif opt in ("-c", "--check"):
566
            CheckEnergies=True
567
        elif opt in ("-g", "--graph"):
568
            GraphSamples=True
569
        elif opt in ("-e", "--viriel"):
570
            VirielStress=True
571
                        
572
    SizeOfBox=MyFloat(SizeOfBox*Number)
573
    Velocity=MyFloat(Velocity)
574
    Step=MyFloat(Step)
575
                
576
    print("Device choosed : %s" % Device)
577
    print("Number of particules : %s" % Number)
578
    print("Size of Box : %s" % SizeOfBox)
579
    print("Initial velocity : %s" % Velocity)
580
    print("Number of iterations : %s" % Iterations)
581
    print("Step of iteration : %s" % Step)
582
    print("Method of resolution : %s" % Method)
583
    print("Initial Random for RNG Seed : %s" % InitialRandom)
584
    print("Check for Energies : %s" % CheckEnergies)
585
    print("Graph for Samples : %s" % GraphSamples)
586
    print("ValueType is : %s" % ValueType)
587
    print("Viriel distribution of stress %s" % VirielStress)
588

    
589
    # Create Numpy array of CL vector with 8 FP32    
590
    MyCoM = np.zeros(4,dtype=MyFloat)
591
    MyDataX = np.zeros(Number*4, dtype=MyFloat)
592
    MyDataV = np.zeros(Number*4, dtype=MyFloat)
593
    # MyCoM = np.zeros(1,dtype=clType4)
594
    # MyDataX = np.zeros(Number, dtype=clType4)
595
    # MyDataV = np.zeros(Number, dtype=clType4)
596
    MyPotential = np.zeros(Number, dtype=MyFloat)
597
    MyKinetic = np.zeros(Number, dtype=MyFloat)
598

    
599
    Marsaglia,Computing=DictionariesAPI()
600

    
601
    # Scan the OpenCL arrays
602
    Id=0
603
    HasXPU=False
604
    for platform in cl.get_platforms():
605
        for device in platform.get_devices():
606
            if Id==Device:
607
                PlatForm=platform
608
                XPU=device
609
                print("CPU/GPU selected: ",device.name.lstrip())
610
                print("Platform selected: ",platform.name)
611
                HasXPU=True
612
            Id+=1
613

    
614
    if HasXPU==False:
615
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
616
        sys.exit()      
617

    
618
    # Create Context
619
    try:
620
        ctx = cl.Context([XPU])
621
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
622
    except:
623
        print("Crash during context creation")
624

    
625
    print(Marsaglia[RNG],Computing[ValueType])
626
    # Build all routines used for the computing
627
    BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
628
    
629
    if 'Intel' in PlatForm.name or 'Experimental' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name :
630
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
631
    else:
632
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions+" -cl-strict-aliasing")
633
    
634
    mf = cl.mem_flags
635
    # Read/Write approach for buffering
636
    # clDataX = cl.Buffer(ctx, mf.READ_WRITE, MyDataX.nbytes)
637
    # clDataV = cl.Buffer(ctx, mf.READ_WRITE, MyDataV.nbytes)
638
    # clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
639
    # clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
640
    # clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
641

    
642
    # Write/HostPoniter approach for buffering
643
    clDataX = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataX)
644
    clDataV = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataV)
645
    clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential)
646
    clKinetic = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyKinetic)
647
    clCoM = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyCoM)
648

    
649
    print('All particles superimposed.')
650

    
651
    # Set particles to RNG points
652
    if InitialRandom:
653
        MyRoutines.SplutterPoints(queue,(Number,1),None,clDataX,SizeOfBox,np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32)))
654
    else:
655
        MyRoutines.SplutterPoints(queue,(Number,1),None,clDataX,SizeOfBox,np.uint32(110271),np.uint32(250173))
656

    
657
    print('All particules distributed')
658

    
659
    CLLaunch=MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
660
    CLLaunch.wait()
661
    cl.enqueue_copy(queue,MyCoM,clCoM)
662
    print('Center Of Mass: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
663
    
664
    if VirielStress:
665
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,MyFloat(0.),np.uint32(110271),np.uint32(250173))
666
    else:
667
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,Velocity,np.uint32(110271),np.uint32(250173))
668
    CLLaunch.wait()
669

    
670
    if GraphSamples:
671
        cl.enqueue_copy(queue, MyDataX, clDataX)
672
        # t0=np.array([[MyDataX[0][0],MyDataX[0][1],MyDataX[0][2]]])
673
        # t1=np.array([[MyDataX[1][0],MyDataX[1][1],MyDataX[1][2]]])
674
        # tL=np.array([[MyDataX[-1][0],MyDataX[-1][1],MyDataX[-1][2]]])
675

    
676
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
677
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
678
    CLLaunch.wait()
679
    cl.enqueue_copy(queue,MyPotential,clPotential)
680
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
681
    print('Viriel=%s Potential=%s Kinetic=%s'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
682
 
683
    if GraphSamples:
684
        cl.enqueue_copy(queue, MyDataX, clDataX)
685
        # t0=np.array([[MyDataX[0][0],MyDataX[0][1],MyDataX[0][2]]])
686
        # t1=np.array([[MyDataX[1][0],MyDataX[1][1],MyDataX[1][2]]])
687
        # tL=np.array([[MyDataX[-1][0],MyDataX[-1][1],MyDataX[-1][2]]])
688

    
689
    wall_time_start=time.time()
690

    
691
    print("Use the mouse buttons to control some of the dots.")
692
    print("Hit any key to quit.")
693
    global view_rotx,view_roty,view_rotz,Iterations
694
    (view_rotx,view_roty,view_rotz)=(0.0, 0.0, 0.0)
695
    Iterations=0
696
    glutInit(sys.argv)
697
    glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB)
698
    glutInitWindowSize(512,512)
699
    glutCreateWindow(b'NBodyGL')
700
    setup_viewport()
701
    glutReshapeFunc(reshape)
702
    glutDisplayFunc(display)
703
    glutIdleFunc(display)
704
#   glutMouseFunc(mouse)
705
    glutSpecialFunc(special)
706
    Loop=glutKeyboardFunc(keyboard)
707
    glutMainLoop()
708
    
709
    print("\nWall Duration on %s for each %s\n" % (Device,(time.time()-wall_time_start)/Iterations))
710
    
711
    MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
712
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
713
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
714
    CLLaunch.wait()
715
    cl.enqueue_copy(queue,MyCoM,clCoM)
716
    cl.enqueue_copy(queue,MyPotential,clPotential)
717
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
718
    print('Center Of Mass: (%s,%s,%s)' % (MyCoM[0],MyCoM[1],MyCoM[2]))
719
    print('Viriel=%s Potential=%s Kinetic=%s'% (np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
720
    
721
    if GraphSamples:    
722
        t0=np.transpose(np.reshape(t0,(Iterations+1,3)))
723
        t1=np.transpose(np.reshape(t1,(Iterations+1,3)))
724
        tL=np.transpose(np.reshape(tL,(Iterations+1,3)))
725
    
726
        import matplotlib.pyplot as plt
727
        from mpl_toolkits.mplot3d import Axes3D
728
    
729
        fig = plt.figure()
730
        ax = fig.gca(projection='3d')
731
        ax.scatter(t0[0],t0[1],t0[2], marker='^',color='blue')
732
        ax.scatter(t1[0],t1[1],t1[2], marker='o',color='red')
733
        ax.scatter(tL[0],tL[1],tL[2], marker='D',color='green')
734
   
735
        ax.set_xlabel('X Label')
736
        ax.set_ylabel('Y Label')
737
        ax.set_zlabel('Z Label')
738

    
739
        plt.show()
740
    
741
    clDataX.release()
742
    clDataV.release()
743
    clKinetic.release()
744
    clPotential.release()
745