Statistiques
| Révision :

root / NBody / NBody.py @ 136

Historique | Voir | Annoter | Télécharger (14,47 ko)

1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Demonstrateur OpenCL d'interaction NCorps
5

6
Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> CeCILLv2
7
"""
8
import getopt
9
import sys
10
import time
11
import numpy as np
12
import pyopencl as cl
13
import pyopencl.array as cl_array
14
from numpy.random import randint as nprnd
15

    
16
def DictionariesAPI():
17
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
18
    Computing={'FP32':0,'FP64':1}
19
    return(Marsaglia,Computing)
20

    
21
BlobOpenCL= """
22
#define znew  ((z=36969*(z&65535)+(z>>16))<<16)
23
#define wnew  ((w=18000*(w&65535)+(w>>16))&65535)
24
#define MWC   (znew+wnew)
25
#define SHR3  (jsr=(jsr=(jsr=jsr^(jsr<<17))^(jsr>>13))^(jsr<<5))
26
#define CONG  (jcong=69069*jcong+1234567)
27
#define KISS  ((MWC^CONG)+SHR3)
28

29
#define MWCfp MWC * 2.328306435454494e-10f
30
#define KISSfp KISS * 2.328306435454494e-10f
31
#define SHR3fp SHR3 * 2.328306435454494e-10f
32
#define CONGfp CONG * 2.328306435454494e-10f
33

34
#define TFP32 0
35
#define TFP64 1
36

37
#define LENGTH 1.
38

39
#define PI 3.14159265359
40

41
#define SMALL_NUM 0.000000001
42

43
#if TYPE == TFP32
44
#define MYFLOAT4 float4
45
#define MYFLOAT8 float8
46
#define MYFLOAT float
47
#else
48
#pragma OPENCL EXTENSION cl_khr_fp64: enable
49
#define MYFLOAT4 double4
50
#define MYFLOAT8 double8
51
#define MYFLOAT double
52
#endif
53

54
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
55
{
56
    // return((n-m)/(MYFLOAT)pow(distance(n,m),2));
57
    return((n-m)/(MYFLOAT)pow(distance(n,m),2));
58
}
59

60
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
61
{
62
    return((MYFLOAT)-1./distance(n,m));
63
}
64

65
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
66

67

68
MYFLOAT8 AtomicRungeKutta(__global MYFLOAT8* clDataIn,int gid,MYFLOAT dt)
69
{
70
    MYFLOAT4 x0=(MYFLOAT4)clDataIn[gid].lo;
71
    MYFLOAT4 v0=(MYFLOAT4)clDataIn[gid].hi;
72
    MYFLOAT4 a0=(MYFLOAT4)(0.,0.,0.,0.);
73
    int N = get_global_size(0);    
74
    
75
    for (int i=0;i<N;i++)
76
    {
77
        if (gid != i)
78
        a0+=Interaction(x0,clDataIn[i].lo);
79
    }
80
        
81
    MYFLOAT4 x1=x0+v0*(MYFLOAT)0.5*dt;
82
    MYFLOAT4 v1=v0+a0*(MYFLOAT)0.5*dt;
83
    MYFLOAT4 a1=(MYFLOAT4)(0.,0.,0.,0.);
84
    for (int i=0;i<N;i++)
85
    {
86
        if (gid != i)
87
        a1+=Interaction(x1,clDataIn[i].lo);
88
    }
89

90
    MYFLOAT4 x2=x0+v1*(MYFLOAT)0.5*dt;
91
    MYFLOAT4 v2=v0+a1*(MYFLOAT)0.5*dt;
92
    MYFLOAT4 a2=(MYFLOAT4)(0.,0.,0.,0.);
93
    for (int i=0;i<N;i++)
94
    {
95
        if (gid != i)
96
        a2+=Interaction(x2,clDataIn[i].lo);
97
    }
98
    
99
    MYFLOAT4 x3=x0+v2*dt;
100
    MYFLOAT4 v3=v0+a2*dt;
101
    MYFLOAT4 a3=(MYFLOAT)(0.,0.,0.,0.);
102
    for (int i=0;i<N;i++)
103
    {
104
        if (gid != i)
105
        a3+=Interaction(x3,clDataIn[i].lo);
106
    }
107
    
108
    MYFLOAT4 xf=x0+dt*(v0+(MYFLOAT)2.*(v1+v2)+v3)/(MYFLOAT)6.;
109
    MYFLOAT4 vf=v0+dt*(a0+(MYFLOAT)2.*(a1+a2)+a3)/(MYFLOAT)6.;
110
     
111
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,xf.s3,vf.s0,vf.s1,vf.s2,vf.s3));
112
}
113

114
// Elements from : http://doswa.com/2009/01/02/fourth-order-runge-kutta-numerical-integration.html
115

116
MYFLOAT8 AtomicRungeKutta2(__global MYFLOAT8* clDataIn,int gid,MYFLOAT dt)
117
{
118
    MYFLOAT4 x[4],v[4],a[4],xf,vf;
119
    int N=get_global_size(0);
120

121
    x[0]=clDataIn[gid].lo;
122
    v[0]=clDataIn[gid].hi;
123
    a[0]=(0.,0.,0.,0.);
124
    
125
    for (int i=0;i<N;i++)
126
    {
127
        if (gid != i)
128
        a[0]+=Interaction(x[0],clDataIn[i].lo);
129
    }
130
        
131
    x[1]=x[0]+v[0]*(MYFLOAT)0.5*dt;
132
    v[1]=v[0]+a[0]*(MYFLOAT)0.5*dt;
133
    a[1]=(0.,0.,0.,0.);
134
    for (int i=0;i<N;i++)
135
    {
136
        if (gid != i)
137
        a[1]+=Interaction(x[1],clDataIn[i].lo);
138
    }
139

140
    x[2]=x[0]+v[1]*(MYFLOAT)0.5*dt;
141
    v[2]=v[0]+a[1]*(MYFLOAT)0.5*dt;
142
    a[2]=(0.,0.,0.,0.);
143
    for (int i=0;i<N;i++)
144
    {
145
        if (gid != i)
146
        a[2]+=Interaction(x[2],clDataIn[i].lo);
147
    }
148
    
149
    x[3]=x[0]+v[2]*dt;
150
    v[3]=v[0]+a[2]*dt;
151
    a[3]=(0.,0.,0.,0.);
152
    for (int i=0;i<N;i++)
153
    {
154
        if (gid != i)
155
        a[3]+=Interaction(x[3],clDataIn[i].lo);
156
    }
157
    
158
    xf=x[0]+dt*(v[0]+(MYFLOAT)2.*(v[1]+v[2])+v[3])/(MYFLOAT)6.;
159
    vf=v[0]+dt*(a[0]+(MYFLOAT)2.*(a[1]+a[2])+a[3])/(MYFLOAT)6.;
160
     
161
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,xf.s3,vf.s0,vf.s1,vf.s2,vf.s3));
162
}
163

164
MYFLOAT8 AtomicEuler(__global MYFLOAT8* clDataIn,int gid,MYFLOAT dt)
165
{
166
    MYFLOAT4 x,v,a,xf,vf;
167

168
    x=clDataIn[gid].lo;
169
    v=clDataIn[gid].hi;
170
    a=(0.,0.,0.,0.);
171
    for (int i=0;i<get_global_size(0);i++)
172
    {
173
        if (gid != i)
174
        a+=Interaction(x,clDataIn[i].lo);
175
    }
176
       
177
    vf=v+dt*a;
178
    xf=x+dt*vf;
179
 
180
    return((MYFLOAT8)(xf.s0,xf.s1,xf.s2,xf.s3,vf.s0,vf.s1,vf.s2,vf.s3));
181
}
182

183
__kernel void SplutterPoints(__global MYFLOAT8* clData, MYFLOAT box, MYFLOAT velocity,
184
                               uint seed_z,uint seed_w)
185
{
186
    int gid = get_global_id(0);
187
    MYFLOAT N = (MYFLOAT) get_global_size(0);
188
    uint z=seed_z+(uint)gid;
189
    uint w=seed_w-(uint)gid;
190
    
191
    MYFLOAT theta=MWCfp*PI;
192
    MYFLOAT phi=MWCfp*PI*(MYFLOAT)2.;
193
    MYFLOAT sinTheta=sin(theta);
194
    clData[gid].s01234567 = (MYFLOAT8) (box*(MYFLOAT)(MWCfp-0.5),box*(MYFLOAT)(MWCfp-0.5),box*(MYFLOAT)(MWCfp-0.5),0.,0.,0.,0.,0.);
195
    MYFLOAT v=sqrt(N*(MYFLOAT)2./distance(clData[gid].lo,(MYFLOAT4) (0.,0.,0.,0.)));
196
    clData[gid].s4=v*sinTheta*cos(phi);
197
    clData[gid].s5=v*sinTheta*sin(phi);
198
    clData[gid].s6=v*cos(theta);
199
}
200

201
__kernel void RungeKutta(__global MYFLOAT8* clData,MYFLOAT h)
202
{
203
    int gid = get_global_id(0);
204
    
205
    MYFLOAT8 clDataGid=AtomicRungeKutta(clData,gid,h);
206
    barrier(CLK_GLOBAL_MEM_FENCE);
207
    clData[gid]=clDataGid;
208
}
209

210
__kernel void Euler(__global MYFLOAT8* clData,MYFLOAT h)
211
{
212
    int gid = get_global_id(0);
213
    
214
    MYFLOAT8 clDataGid=AtomicEuler(clData,gid,h);
215
    barrier(CLK_GLOBAL_MEM_FENCE);
216
    clData[gid]=clDataGid;
217
}
218

219
__kernel void Potential(__global MYFLOAT8* clData,__global MYFLOAT* clPotential)
220
{
221
    int gid = get_global_id(0);
222

223
    MYFLOAT potential=0.;
224
    MYFLOAT4 x=clData[gid].lo; 
225
    
226
    for (int i=0;i<get_global_size(0);i++)
227
    {
228
        if (gid != i)
229
        potential+=PairPotential(x,clData[i].lo);
230
    }
231
                 
232
    barrier(CLK_GLOBAL_MEM_FENCE);
233
    clPotential[gid]=(MYFLOAT)0.5*potential;
234
}
235

236
__kernel void Kinetic(__global MYFLOAT8* clData,__global MYFLOAT* clKinetic)
237
{
238
    int gid = get_global_id(0);
239
    
240
    clKinetic[gid]=(MYFLOAT)0.5*(pow(clData[gid].s4,2)+pow(clData[gid].s5,2)+pow(clData[gid].s6,2));
241
}
242
"""
243

    
244
def Energy(MyData):
245
    return(sum(pow(MyData,2)))
246

    
247
if __name__=='__main__':
248
    
249
    # ValueType
250
    ValueType='FP32'
251
    class MyFloat(np.float32):pass
252
    clType=cl_array.vec.float8
253
    # Set defaults values
254
    np.set_printoptions(precision=2)  
255
    # Id of Device : 1 is for first find !
256
    Device=1
257
    # Iterations is integer
258
    Number=4
259
    # Size of box
260
    SizeOfBox=MyFloat(1.)
261
    # Initial velocity of particules
262
    Velocity=MyFloat(1.)
263
    # Redo the last process
264
    Iterations=100
265
    # Step
266
    Step=MyFloat(0.01)
267
    # Method of integration
268
    Method='RungeKutta'
269
    # InitialRandom
270
    InitialRandom=False
271
    # RNG Marsaglia Method
272
    RNG='MWC'
273
    # CheckEnergies
274
    CheckEnergies=False
275
    # Display samples in 3D
276
    GraphSamples=False    
277
    
278
    HowToUse='%s -h [Help] -r [InitialRandom] -g [GraphSamples] -c [CheckEnergies] -d <DeviceId> -n <NumberOfParticules> -z <SizeOfBox> -v <Velocity> -s <Step> -i <Iterations> -m <RungeKutta|Euler> -t <FP32|FP64>'
279

    
280
    try:
281
        opts, args = getopt.getopt(sys.argv[1:],"rhgcd:n:z:v:i:s:m:t:",["random","graph","check","device=","number=","size=","velocity=","iterations=","step=","method=","valuetype="])
282
    except getopt.GetoptError:
283
        print(HowToUse % sys.argv[0])
284
        sys.exit(2)
285

    
286
    for opt, arg in opts:
287
        if opt == '-h':
288
            print(HowToUse % sys.argv[0])
289

    
290
            print("\nInformations about devices detected under OpenCL:")
291
            try:
292
                Id=0
293
                for platform in cl.get_platforms():
294
                    for device in platform.get_devices():
295
                        deviceType=cl.device_type.to_string(device.type)
296
                        print("Device #%i from %s of type %s : %s" % (Id,platform.vendor.lstrip(),deviceType,device.name.lstrip()))
297
                        Id=Id+1
298
                sys.exit()
299
            except ImportError:
300
                print("Your platform does not seem to support OpenCL")
301
                sys.exit()
302

    
303
        elif opt in ("-t", "--valuetype"):
304
            if arg=='FP64':
305
                class MyFloat(np.float64): pass
306
                clType=cl_array.vec.double8
307
            else:
308
                class MyFloat(np.float32):pass
309
                clType=cl_array.vec.float8
310
            ValueType = arg
311
        elif opt in ("-d", "--device"):
312
            Device=int(arg)
313
        elif opt in ("-m", "--method"):
314
            Method=arg
315
        elif opt in ("-n", "--number"):
316
            Number=int(arg)
317
        elif opt in ("-z", "--size"):
318
            SizeOfBox=MyFloat(arg)
319
        elif opt in ("-v", "--velocity"):
320
            Velocity=MyFloat(arg)
321
        elif opt in ("-s", "--step"):
322
            Step=MyFloat(arg)
323
        elif opt in ("-i", "--iterations"):
324
            Iterations=int(arg)
325
        elif opt in ("-r", "--random"):
326
            InitialRandom=True
327
        elif opt in ("-c", "--check"):
328
            CheckEnergies=True
329
        elif opt in ("-g", "--graph"):
330
            GraphSamples=True
331
                        
332
    SizeOfBox=MyFloat(SizeOfBox)
333
    Velocity=MyFloat(Velocity)
334
    Step=MyFloat(Step)
335
                
336
    print("Device choosed : %s" % Device)
337
    print("Number of particules : %s" % Number)
338
    print("Size of Box : %s" % SizeOfBox)
339
    print("Initial velocity % s" % Velocity)
340
    print("Number of iterations % s" % Iterations)
341
    print("Step of iteration % s" % Step)
342
    print("Method of resolution % s" % Method)
343
    print("Initial Random for RNG Seed % s" % InitialRandom)
344
    print("Check for Energies % s" % CheckEnergies)
345
    print("Graph for Samples % s" % GraphSamples)
346
    print("ValueType is % s" % ValueType)
347

    
348
    # Create Numpy array of CL vector with 8 FP32    
349
    MyData = np.zeros(Number, dtype=clType)
350
    MyPotential = np.zeros(Number, dtype=MyFloat)
351
    MyKinetic = np.zeros(Number, dtype=MyFloat)
352

    
353
    Marsaglia,Computing=DictionariesAPI()
354

    
355
    # Scan the OpenCL arrays
356
    Id=0
357
    HasXPU=False
358
    for platform in cl.get_platforms():
359
        for device in platform.get_devices():
360
            if Id==Device:
361
                PlatForm=platform
362
                XPU=device
363
                print("CPU/GPU selected: ",device.name.lstrip())
364
                HasXPU=True
365
            Id+=1
366

    
367
    if HasXPU==False:
368
        print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
369
        sys.exit()      
370

    
371
    # Create Context
372
    try:
373
        ctx = cl.Context([XPU])
374
        queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
375
    except:
376
        print("Crash during context creation")
377

    
378
    print(Marsaglia[RNG],Computing[ValueType])
379
    # Build all routines used for the computing
380
    MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = "-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType]))
381

    
382
# Initial forced values for exploration
383
#    MyData[0][0]=np.float32(-1.)
384
#    MyData[0][1]=np.float32(0.)
385
#    MyData[0][5]=np.float32(1.)
386
#    MyData[1][0]=np.float32(1.)
387
#    MyData[1][1]=np.float32(0.)
388
#    MyData[1][5]=np.float32(-1.)
389

    
390
    mf = cl.mem_flags
391
    clData = cl.Buffer(ctx, mf.READ_WRITE, MyData.nbytes)
392
    clPotential = cl.Buffer(ctx, mf.READ_WRITE, MyPotential.nbytes)
393
    clKinetic = cl.Buffer(ctx, mf.READ_WRITE, MyKinetic.nbytes)
394
    #clData = cl.Buffer(ctx, mf.WRITE_ONLY|mf.COPY_HOST_PTR,hostbuf=MyData)
395

    
396
    print('All particles superimposed.')
397

    
398
    print(SizeOfBox.dtype)
399
    
400
    # Set particles to RNG points
401
    if InitialRandom:
402
        MyRoutines.SplutterPoints(queue,(Number,1),None,clData,SizeOfBox,Velocity,np.uint32(nprnd(2**32)),np.uint32(nprnd(2**32)))
403
    else:
404
        MyRoutines.SplutterPoints(queue,(Number,1),None,clData,SizeOfBox,Velocity,np.uint32(110271),np.uint32(250173))
405

    
406
    print('All particules distributed')
407
 
408
    CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clData,clPotential)
409
    CLLaunch.wait()
410
    if CheckEnergies:
411
        cl.enqueue_copy(queue,MyPotential,clPotential)
412
        CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clData,clKinetic)
413
        CLLaunch.wait()
414
        cl.enqueue_copy(queue,MyKinetic,clKinetic)
415
        # print(np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic),MyPotential,MyKinetic)
416
        print(np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic))
417
 
418
    if GraphSamples:
419
        cl.enqueue_copy(queue, MyData, clData)
420
        t0=np.array([[MyData[0][0],MyData[0][1],MyData[0][2]]])
421
        t1=np.array([[MyData[1][0],MyData[1][1],MyData[1][2]]])
422
        tL=np.array([[MyData[-1][0],MyData[-1][1],MyData[-1][2]]])
423

    
424
    time_start=time.time()
425
    for i in range(Iterations):
426
        if Method=="RungeKutta":            
427
            CLLaunch=MyRoutines.RungeKutta(queue,(Number,1),None,clData,Step)
428
        else:
429
            CLLaunch=MyRoutines.Euler(queue,(Number,1),None,clData,Step)
430
        CLLaunch.wait()
431
        if CheckEnergies:
432
            CLLaunch=MyRoutines.Potential(queue,(Number,1),None,clData,clPotential)
433
            CLLaunch.wait()
434
            cl.enqueue_copy(queue,MyPotential,clPotential)
435
            CLLaunch=MyRoutines.Kinetic(queue,(Number,1),None,clData,clKinetic)
436
            CLLaunch.wait()
437
            cl.enqueue_copy(queue,MyKinetic,clKinetic)
438
            # print(np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic),MyPotential,MyKinetic)
439
            print(np.sum(MyPotential)+2*np.sum(MyKinetic),np.sum(MyPotential),np.sum(MyKinetic))
440

    
441
        if GraphSamples:
442
            cl.enqueue_copy(queue, MyData, clData)
443
            t0=np.append(t0,[MyData[0][0],MyData[0][1],MyData[0][2]])
444
            t1=np.append(t1,[MyData[1][0],MyData[1][1],MyData[1][2]])
445
            tL=np.append(tL,[MyData[-1][0],MyData[-1][1],MyData[-1][2]])
446
    print("\nDuration on %s for each %s" % (Device,(time.time()-time_start)/Iterations))
447

    
448
    if GraphSamples:    
449
        t0=np.transpose(np.reshape(t0,(Iterations+1,3)))
450
        t1=np.transpose(np.reshape(t1,(Iterations+1,3)))
451
        tL=np.transpose(np.reshape(tL,(Iterations+1,3)))
452
    
453
        import matplotlib.pyplot as plt
454
        from mpl_toolkits.mplot3d import Axes3D
455
    
456
        fig = plt.figure()
457
        ax = fig.gca(projection='3d')
458
        ax.scatter(t0[0],t0[1],t0[2], marker='^',color='blue')
459
        ax.scatter(t1[0],t1[1],t1[2], marker='o',color='red')
460
        ax.scatter(tL[0],tL[1],tL[2], marker='D',color='green')
461
   
462
        ax.set_xlabel('X Label')
463
        ax.set_ylabel('Y Label')
464
        ax.set_zlabel('Z Label')
465

    
466
        plt.show()
467
    
468
    clData.release()
469
    clKinetic.release()
470
    clPotential.release()