Statistiques
| Révision :

root / NBody / NBody.py @ 162

Historique | Voir | Annoter | Télécharger (20,29 ko)

1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Demonstrateur OpenCL d'interaction NCorps
5

6
Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> CeCILLv2
7
"""
8
import getopt
9
import sys
10
import time
11
import numpy as np
12
import pyopencl as cl
13
import pyopencl.array as cl_array
14
from numpy.random import randint as nprnd
15

    
16
def DictionariesAPI():
17
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
18
    Computing={'FP32':0,'FP64':1}
19
    return(Marsaglia,Computing)
20

    
21
BlobOpenCL= """
22
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
23
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
24
#define MWC   (znew+wnew)
25
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
26
#define CONG  (jcong=69069*jcong+1234567)
27
#define KISS  ((MWC^CONG)+SHR3)
28

29
#define TFP32 0
30
#define TFP64 1
31

32
#define LENGTH 1.e0f
33

34
#if TYPE == TFP32
35
#define MYFLOAT4 float4
36
#define MYFLOAT8 float8
37
#define MYFLOAT float
38
#define DISTANCE fast_distance
39
#else
40
#if defined(cl_khr_fp64)  // Khronos extension available?
41
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
42
#define DOUBLE_SUPPORT_AVAILABLE
43
#elif defined(cl_amd_fp64)  // AMD extension available?
44
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
45
#define DOUBLE_SUPPORT_AVAILABLE
46
#endif
47
#define MYFLOAT4 double4
48
#define MYFLOAT8 double8
49
#define MYFLOAT double
50
#define DISTANCE distance
51
#endif
52

53
#define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f)
54
#define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f)
55
#define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f)
56
#define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f)
57

58
#define PI (MYFLOAT)3.141592653589793238462643197169399375105820974944592307816406286e0f
59

60
#define SMALL_NUM 1.e-9f
61

62
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
63
{
64
    private MYFLOAT r=DISTANCE(n,m);
65

66
    return((n-m)/(MYFLOAT)(r*r*r));
67
}
68

69
MYFLOAT4 InteractionCore(MYFLOAT4 m,MYFLOAT4 n)
70
{
71
    private MYFLOAT core=(MYFLOAT)1.e5f;
72
    private MYFLOAT r=DISTANCE(n,m);
73
    private MYFLOAT d=r*r+core*core;
74

75
    return(core*(n-m)/(MYFLOAT)(d*d));
76
}
77

78
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
79
{
80
    return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
81
}
82

83
MYFLOAT AtomicPotential(__global MYFLOAT4* clDataX,int gid)
84
{
85
    private MYFLOAT potential=(MYFLOAT)0.e0f;
86
    private MYFLOAT4 x=clDataX[gid]; 
87
    
88
    for (int i=0;i<get_global_size(0);i++)
89
    {
90
        if (gid != i)
91
        potential+=PairPotential(x,clDataX[i]);
92
    }
93

94
    barrier(CLK_GLOBAL_MEM_FENCE);
95
    return(potential);
96
}
97

98
MYFLOAT AtomicPotentialCoM(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int gid)
99
{
100
    return(PairPotential(clDataX[gid],clCoM[0]));
101
}
102

103
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
104
{
105
    private MYFLOAT4 a0,v0,x0,a1,v1,x1,a2,v2,x2,a3,v3,x3,a4,v4,x4,xf,vf;
106

107
    a0=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
108
    v0=(MYFLOAT4)clDataInV[gid];
109
    x0=(MYFLOAT4)clDataInX[gid];
110
    int N = get_global_size(0);    
111
    
112
    for (int i=0;i<N;i++)
113
    {
114
        if (gid != i)
115
        a0+=Interaction(x0,clDataInX[i]);
116
    }
117
        
118
    a1=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
119
    v1=v0+a0*dt;
120
    x1=x0+v0*dt;
121
    for (int i=0;i<N;i++)
122
    {
123
        if (gid != i)
124
        a1+=Interaction(x1,clDataInX[i]);
125
    }
126

127
    a2=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
128
    v2=v0+a1*dt*(MYFLOAT)5.e-1f;
129
    x2=x0+v1*dt*(MYFLOAT)5.e-1f;
130
    for (int i=0;i<N;i++)
131
    {
132
        if (gid != i)
133
        a2+=Interaction(x2,clDataInX[i]);
134
    }
135
    
136
    a3=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
137
    v3=v0+a2*dt*(MYFLOAT)5.e-1f;
138
    x3=x0+v2*dt*(MYFLOAT)5.e-1f;
139
    for (int i=0;i<N;i++)
140
    {
141
        if (gid != i)
142
        a3+=Interaction(x3,clDataInX[i]);
143
    }
144
    
145
    a4=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
146
    v4=v0+a3*dt;
147
    x4=x0+v3*dt;
148
    for (int i=0;i<N;i++)
149
    {
150
        if (gid != i)
151
        a4+=Interaction(x4,clDataInX[i]);
152
    }
153
    
154
    xf=x0+dt*(v1+(MYFLOAT)2.e0f*(v2+v3)+v4)/(MYFLOAT)6.e0f;
155
    vf=v0+dt*(a1+(MYFLOAT)2.e0f*(a2+a3)+a4)/(MYFLOAT)6.e0f;
156
     
157
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,1.e0f,vf.s0,vf.s1,vf.s2,1.e0f));
158
}
159

160
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
161

162
MYFLOAT8 AtomicHeun(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
163
{
164
    private MYFLOAT4 x,v,a,xi,vi,ai,xf,vf;
165

166
    x=(MYFLOAT4)clDataInX[gid];
167
    v=(MYFLOAT4)clDataInV[gid];
168
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
169

170
    for (int i=0;i<get_global_size(0);i++)
171
    {
172
        if (gid != i)
173
        a+=Interaction(x,clDataInX[i]);
174
    }
175

176
    vi=v+dt*a;
177
    xi=x+dt*vi;
178
    ai=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
179

180
    for (int i=0;i<get_global_size(0);i++)
181
    {
182
        if (gid != i)
183
        ai+=Interaction(xi,clDataInX[i]);
184
    }
185
       
186
    vf=v+dt*(a+ai)/(MYFLOAT)2.e0f;
187
    xf=x+dt*(v+vi)/(MYFLOAT)2.e0f;
188

189
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,1.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
190
}
191

192
MYFLOAT8 AtomicImplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
193
{
194
    private MYFLOAT4 x,v,a,xf,vf;
195

196
    x=(MYFLOAT4)clDataInX[gid];
197
    v=(MYFLOAT4)clDataInV[gid];
198
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
199

200
    for (int i=0;i<get_global_size(0);i++)
201
    {
202
        if (gid != i)
203
        a+=Interaction(x,clDataInX[i]);
204
    }
205
       
206
    vf=v+dt*a;
207
    xf=x+dt*vf;
208
 
209
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,1.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
210
}
211

212
MYFLOAT8 AtomicExplicitEuler(__global MYFLOAT4* clDataInX,__global MYFLOAT4* clDataInV,int gid,MYFLOAT dt)
213
{
214
    MYFLOAT4 x,v,a,xf,vf;
215

216
    x=(MYFLOAT4)clDataInX[gid];
217
    v=(MYFLOAT4)clDataInV[gid];
218
    a=(MYFLOAT4)(0.e0f,0.e0f,0.e0f,0.e0f);
219

220
    for (int i=0;i<get_global_size(0);i++)
221
    {
222
        if (gid != i)
223
        a+=Interaction(x,clDataInX[i]);
224
    }
225
       
226
    vf=v+dt*a;
227
    xf=x+dt*v;
228
 
229
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,1.e0f,vf.s0,vf.s1,vf.s2,0.e0f));
230
}
231

232
__kernel void SplutterPoints(__global MYFLOAT4* clDataX, MYFLOAT box, 
233
                             uint seed_z,uint seed_w)
234
{
235
    int gid = get_global_id(0);
236
    uint z=seed_z+(uint)gid;
237
    uint w=seed_w-(uint)gid;
238
    
239
    MYFLOAT x0=box*(MYFLOAT)(MWCfp-(MYFLOAT)5.e-1f);
240
    MYFLOAT y0=box*(MYFLOAT)(MWCfp-(MYFLOAT)5.e-1f);
241
    MYFLOAT z0=box*(MYFLOAT)(MWCfp-(MYFLOAT)5.e-1f);
242

243
    clDataX[gid].s0123 = (MYFLOAT4) (x0,y0,z0,1.e0f);
244
}
245

246
__kernel void SplutterStress(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,__global MYFLOAT4* clCoM, MYFLOAT velocity,uint seed_z,uint seed_w)
247
{
248
    int gid = get_global_id(0);
249
    MYFLOAT N = (MYFLOAT)get_global_size(0);
250
    uint z=seed_z+(uint)gid;
251
    uint w=seed_w-(uint)gid;
252

253
    if (velocity<SMALL_NUM) {
254
       MYFLOAT4 SpeedVector=(MYFLOAT4)normalize(cross(clDataX[gid],clCoM[0]))*sqrt(-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f);
255
       clDataV[gid]=SpeedVector;
256
    }
257
    else
258
    {    
259
       // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
260
       MYFLOAT theta=MWCfp*PI;
261
       MYFLOAT phi=MWCfp*PI*(MYFLOAT)2.e0f;
262
       MYFLOAT sinTheta=sin((float)theta);
263

264
       clDataV[gid].s0=velocity*sinTheta*cos((float)phi);
265
       clDataV[gid].s1=velocity*sinTheta*sin((float)phi);
266
       clDataV[gid].s2=velocity*cos((float)theta);
267
       clDataV[gid].s3=(MYFLOAT)1.e0f;
268
    }
269
}
270

271
__kernel void RungeKutta(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
272
{
273
    int gid = get_global_id(0);
274
    
275
    MYFLOAT8 clDataGid=AtomicRungeKutta(clDataX,clDataV,gid,h);
276
    barrier(CLK_GLOBAL_MEM_FENCE);
277
    clDataX[gid]=clDataGid.lo;
278
    clDataV[gid]=clDataGid.hi;
279
}
280

281
__kernel void ImplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
282
{
283
    int gid = get_global_id(0);
284
    
285
    MYFLOAT8 clDataGid=AtomicImplicitEuler(clDataX,clDataV,gid,h);
286
    barrier(CLK_GLOBAL_MEM_FENCE);
287
    clDataX[gid]=clDataGid.lo;
288
    clDataV[gid]=clDataGid.hi;
289
}
290

291
__kernel void Heun(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
292
{
293
    int gid = get_global_id(0);
294
    
295
    MYFLOAT8 clDataGid=AtomicHeun(clDataX,clDataV,gid,h);
296
    barrier(CLK_GLOBAL_MEM_FENCE);
297
    clDataX[gid]=clDataGid.lo;
298
    clDataV[gid]=clDataGid.hi;
299
}
300

301
__kernel void ExplicitEuler(__global MYFLOAT4* clDataX,__global MYFLOAT4* clDataV,MYFLOAT h)
302
{
303
    int gid = get_global_id(0);
304
    
305
    MYFLOAT8 clDataGid=AtomicExplicitEuler(clDataX,clDataV,gid,h);
306
    barrier(CLK_GLOBAL_MEM_FENCE);
307
    clDataX[gid]=clDataGid.lo;
308
    clDataV[gid]=clDataGid.hi;
309
}
310

311
__kernel void CoMPotential(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,__global MYFLOAT* clPotential)
312
{
313
    int gid = get_global_id(0);
314

315
    clPotential[gid]=PairPotential(clDataX[gid],clCoM[0]);
316
}
317

318
__kernel void Potential(__global MYFLOAT4* clDataX,__global MYFLOAT* clPotential)
319
{
320
    int gid = get_global_id(0);
321

322
    MYFLOAT potential=(MYFLOAT)0.e0f;
323
    MYFLOAT4 x=clDataX[gid]; 
324
    
325
    for (int i=0;i<get_global_size(0);i++)
326
    {
327
        if (gid != i)
328
        potential+=PairPotential(x,clDataX[i]);
329
    }
330
                 
331
    barrier(CLK_GLOBAL_MEM_FENCE);
332
    clPotential[gid]=potential*(MYFLOAT)5.e-1f;
333
}
334

335
__kernel void CenterOfMass(__global MYFLOAT4* clDataX,__global MYFLOAT4* clCoM,int Size)
336
{
337
    MYFLOAT4 CoM=clDataX[0]; 
338

339
    for (int i=1;i<Size;i++)
340
    {
341
        CoM+=clDataX[i];
342
    }
343

344
    barrier(CLK_GLOBAL_MEM_FENCE);
345
    clCoM[0]=(MYFLOAT4)(CoM.s0,CoM.s1,CoM.s2,1.e0f)/(MYFLOAT)Size;
346
}
347

348
__kernel void Kinetic(__global MYFLOAT4* clDataV,__global MYFLOAT* clKinetic)
349
{
350
    int gid = get_global_id(0);
351
    
352
    barrier(CLK_GLOBAL_MEM_FENCE);
353
    MYFLOAT d=(MYFLOAT)length(clDataV[gid]);
354
    clKinetic[gid]=(MYFLOAT)5.e-1f*(MYFLOAT)(d*d);
355
}
356
"""
357

    
358
def Energy(MyData):
359
    return(sum(MyData*MyData))
360

    
361
if __name__=='__main__':
362
    
363
    # ValueType
364
    ValueType='FP32'
365
    class MyFloat(np.float32):pass
366
    #    clType8=cl_array.vec.float8
367
    clType4=cl_array.vec.float4
368
    # Set defaults values
369
    np.set_printoptions(precision=2)  
370
    # Id of Device : 1 is for first find !
371
    Device=0
372
    # Iterations is integer
373
    Number=2
374
    # Size of box
375
    SizeOfBox=MyFloat(1.)
376
    # Initial velocity of particules
377
    Velocity=MyFloat(1.)
378
    # Redo the last process
379
    Iterations=int(np.pi*1024)
380
    # Step
381
    Step=MyFloat(1./1024)
382
    # Method of integration
383
    Method='ImplicitEuler'
384
    # InitialRandom
385
    InitialRandom=False
386
    # RNG Marsaglia Method
387
    RNG='MWC'
388
    # CheckEnergies
389
    CheckEnergies=False
390
    # Display samples in 3D
391
    GraphSamples=False
392
    # Viriel Distribution of stress
393
    VirielStress=True
394
    
395
    HowToUse='%s -h [Help] -r [InitialRandom] -e [VirielStress] -g [GraphSamples] -c [CheckEnergies] -d <DeviceId> -n <NumberOfParticules> -z <SizeOfBox> -v <Velocity> -s <Step> -i <Iterations> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
396

    
397
    try:
398
        opts, args = getopt.getopt(sys.argv[1:],"rehgcd:n:z:v:i:s:m:t:",["random","viriel","graph","check","device=","number=","size=","velocity=","iterations=","step=","method=","valuetype="])
399
    except getopt.GetoptError:
400
        print(HowToUse % sys.argv[0])
401
        sys.exit(2)
402

    
403
    for opt, arg in opts:
404
        if opt == '-h':
405
            print(HowToUse % sys.argv[0])
406

    
407
            print("\nInformations about devices detected under OpenCL:")
408
            try:
409
                Id=0
410
                for platform in cl.get_platforms():
411
                    for device in platform.get_devices():
412
                        #deviceType=cl.device_type.to_string(device.type)
413
                        deviceType="xPU"
414
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
415
                        Id=Id+1
416
                sys.exit()
417
            except ImportError:
418
                print("Your platform does not seem to support OpenCL")
419
                sys.exit()
420

    
421
        elif opt in ("-t", "--valuetype"):
422
            if arg=='FP64':
423
                class MyFloat(np.float64): pass
424
                clType4=cl_array.vec.double4
425
            else:
426
                class MyFloat(np.float32):pass
427
                clType4=cl_array.vec.float4
428
            ValueType = arg
429
        elif opt in ("-d", "--device"):
430
            Device=int(arg)
431
        elif opt in ("-m", "--method"):
432
            Method=arg
433
        elif opt in ("-n", "--number"):
434
            Number=int(arg)
435
        elif opt in ("-z", "--size"):
436
            SizeOfBox=MyFloat(arg)
437
        elif opt in ("-v", "--velocity"):
438
            Velocity=MyFloat(arg)
439
            VirielStress=False
440
        elif opt in ("-s", "--step"):
441
            Step=MyFloat(arg)
442
        elif opt in ("-i", "--iterations"):
443
            Iterations=int(arg)
444
        elif opt in ("-r", "--random"):
445
            InitialRandom=True
446
        elif opt in ("-c", "--check"):
447
            CheckEnergies=True
448
        elif opt in ("-g", "--graph"):
449
            GraphSamples=True
450
        elif opt in ("-e", "--viriel"):
451
            VirielStress=True
452
                        
453
    SizeOfBox=MyFloat(Number*SizeOfBox)
454
    Velocity=MyFloat(Velocity)
455
    Step=MyFloat(Step)
456
                
457
    print("Device choosed : %s" % Device)
458
    print("Number of particules : %s" % Number)
459
    print("Size of Box : %s" % SizeOfBox)
460
    print("Initial velocity : %s" % Velocity)
461
    print("Number of iterations : %s" % Iterations)
462
    print("Step of iteration : %s" % Step)
463
    print("Method of resolution : %s" % Method)
464
    print("Initial Random for RNG Seed : %s" % InitialRandom)
465
    print("Check for Energies : %s" % CheckEnergies)
466
    print("Graph for Samples : %s" % GraphSamples)
467
    print("ValueType is : %s" % ValueType)
468
    print("Viriel distribution of stress %s" % VirielStress)
469

    
470
    # Create Numpy array of CL vector with 8 FP32    
471
    MyCoM = np.zeros(1,dtype=clType4)
472
    MyDataX = np.zeros(Number, dtype=clType4)
473
    MyDataV = np.zeros(Number, dtype=clType4)
474
    MyPotential = np.zeros(Number, dtype=MyFloat)
475
    MyKinetic = np.zeros(Number, dtype=MyFloat)
476

    
477
    Marsaglia,Computing=DictionariesAPI()
478

    
479
    # Scan the OpenCL arrays
480
    Id=0
481
    HasXPU=False
482
    for platform in cl.get_platforms():
483
        for device in platform.get_devices():
484
            if Id==Device:
485
                PlatForm=platform
486
                XPU=device
487
                print("CPU/GPU selected: ",device.name.lstrip())
488
                print("Platform selected: ",platform.name)
489
                HasXPU=True
490
            Id+=1
491

    
492
    if HasXPU==False:
493
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
494
        sys.exit()      
495

    
496
    # Create Context
497
    try:
498
        ctx = cl.Context([XPU])
499
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
500
    except:
501
        print("Crash during context creation")
502

    
503
    print(Marsaglia[RNG],Computing[ValueType])
504
    # Build all routines used for the computing
505
    #BuildOptions="-DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
506
    #BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
507
    BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
508
    
509
    if 'Intel' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name :
510
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
511
    else:
512
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions+" -cl-strict-aliasing")
513
    
514
    mf = cl.mem_flags
515
    # clDataX = cl.Buffer(ctx, mf.READ_WRITE, MyDataX.nbytes)
516
    # clDataV = cl.Buffer(ctx, mf.READ_WRITE, MyDataV.nbytes)
517
    # clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
518
    # clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
519
    # clCoM = cl.Buffer(ctx, mf.READ_WRITE, MyCoM.nbytes)
520

    
521
    clDataX = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataX)
522
    clDataV = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyDataV)
523
    clPotential = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyPotential)
524
    clKinetic = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyKinetic)
525
    clCoM = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyCoM)
526

    
527
    print('All particles superimposed.')
528

    
529
    print(SizeOfBox.dtype)
530
    
531
    # Set particles to RNG points
532
    if InitialRandom:
533
        MyRoutines.SplutterPoints(queue,(Number,1),None,clDataX,SizeOfBox,np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32)))
534
    else:
535
        MyRoutines.SplutterPoints(queue,(Number,1),None,clDataX,SizeOfBox,np.uint32(110271),np.uint32(250173))
536

    
537
    print('All particules distributed')
538

    
539
    CLLaunch=MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
540
    CLLaunch.wait()
541
    cl.enqueue_copy(queue,MyCoM,clCoM)
542
    print('Center Of Mass: (%s,%s,%s)' % (MyCoM[0][0],MyCoM[0][1],MyCoM[0][2]))
543
    
544
    if VirielStress:
545
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,MyFloat(0.),np.uint32(110271),np.uint32(250173))
546
    else:
547
        CLLaunch=MyRoutines.SplutterStress(queue,(Number,1),None,clDataX,clDataV,clCoM,Velocity,np.uint32(110271),np.uint32(250173))
548
    CLLaunch.wait()
549
        
550
    if GraphSamples:
551
        cl.enqueue_copy(queue, MyDataX, clDataX)
552
        t0=np.array([[MyDataX[0][0],MyDataX[0][1],MyDataX[0][2]]])
553
        t1=np.array([[MyDataX[1][0],MyDataX[1][1],MyDataX[1][2]]])
554
        tL=np.array([[MyDataX[-1][0],MyDataX[-1][1],MyDataX[-1][2]]])
555

    
556
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
557
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
558
    CLLaunch.wait()
559
    cl.enqueue_copy(queue,MyPotential,clPotential)
560
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
561
    print('Viriel=%s Potential=%s Kinetic=%s'% (np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
562
 
563
    if GraphSamples:
564
        cl.enqueue_copy(queue, MyDataX, clDataX)
565
        t0=np.array([[MyDataX[0][0],MyDataX[0][1],MyDataX[0][2]]])
566
        t1=np.array([[MyDataX[1][0],MyDataX[1][1],MyDataX[1][2]]])
567
        tL=np.array([[MyDataX[-1][0],MyDataX[-1][1],MyDataX[-1][2]]])
568

    
569
    time_start=time.time()
570
    for i in range(Iterations):
571
        if Method=="RungeKutta":            
572
            CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clDataX,clDataV,Step)
573
        elif Method=="ExplicitEuler":
574
            CLLaunch=MyRoutines.ExplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
575
        elif Method=="Heun":
576
            CLLaunch=MyRoutines.Heun(queue,(Number,1),None,clDataX,clDataV,Step)
577
        else:
578
            CLLaunch=MyRoutines.ImplicitEuler(queue,(Number,1),None,clDataX,clDataV,Step)
579
        CLLaunch.wait()
580

    
581
        if CheckEnergies:
582
            CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
583
            CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
584
            CLLaunch.wait()
585
            cl.enqueue_copy(queue,MyPotential,clPotential)
586
            cl.enqueue_copy(queue,MyKinetic,clKinetic)
587
            print(np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic))
588

    
589
            print(MyPotential,MyKinetic)
590
            
591
        if GraphSamples:
592
            cl.enqueue_copy(queue, MyDataX, clDataX)
593
            t0=np.append(t0,[MyDataX[0][0],MyDataX[0][1],MyDataX[0][2]])
594
            t1=np.append(t1,[MyDataX[1][0],MyDataX[1][1],MyDataX[1][2]])
595
            tL=np.append(tL,[MyDataX[-1][0],MyDataX[-1][1],MyDataX[-1][2]])
596
    print("\nDuration on %s for each %s\n" % (Device,(time.time()-time_start)/Iterations))
597

    
598
    MyRoutines.CenterOfMass(queue,(1,1),None,clDataX,clCoM,np.int32(Number))
599
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clDataX,clPotential)
600
    CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clDataV,clKinetic)
601
    CLLaunch.wait()
602
    cl.enqueue_copy(queue,MyCoM,clCoM)
603
    cl.enqueue_copy(queue,MyPotential,clPotential)
604
    cl.enqueue_copy(queue,MyKinetic,clKinetic)
605
    print('Center Of Mass: (%s,%s,%s)' % (MyCoM[0][0],MyCoM[0][1],MyCoM[0][2]))
606
    print('Viriel=%s Potential=%s Kinetic=%s'% (np.sum(MyPotential)+2.*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic)))
607
    
608
    if GraphSamples:    
609
        t0=np.transpose(np.reshape(t0,(Iterations+1,3)))
610
        t1=np.transpose(np.reshape(t1,(Iterations+1,3)))
611
        tL=np.transpose(np.reshape(tL,(Iterations+1,3)))
612
    
613
        import matplotlib.pyplot as plt
614
        from mpl_toolkits.mplot3d import Axes3D
615
    
616
        fig = plt.figure()
617
        ax = fig.gca(projection='3d')
618
        ax.scatter(t0[0],t0[1],t0[2], marker='^',color='blue')
619
        ax.scatter(t1[0],t1[1],t1[2], marker='o',color='red')
620
        ax.scatter(tL[0],tL[1],tL[2], marker='D',color='green')
621
   
622
        ax.set_xlabel('X Label')
623
        ax.set_ylabel('Y Label')
624
        ax.set_zlabel('Z Label')
625

    
626
        plt.show()
627
    
628
    clDataX.release()
629
    clDataV.release()
630
    clKinetic.release()
631
    clPotential.release()