Révision 171

NBody/NBody.py (revision 171)
1 1
#!/usr/bin/env python3
2 2
# -*- coding: utf-8 -*-
3 3
"""
4
Demonstrateur OpenCL d'interaction NCorps
4
NBody Demonstrator implemented in OpenCL, rendering OpenGL
5 5

  
6 6
Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> CeCILLv2
7 7
"""
......
20 20
    Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
21 21
    Computing={'FP32':0,'FP64':1}
22 22
    Interaction={'Force':0,'Potential':1}
23
    return(Marsaglia,Computing,Interaction)
23
    Artevasion={'None':0,'NegExp':1}
24
    return(Marsaglia,Computing,Interaction,Artevasion)
24 25

  
25

  
26

  
27

  
28

  
29

  
30

  
31

  
32

  
33

  
34

  
35

  
36

  
37

  
38

  
39

  
40

  
41

  
42

  
43

  
44

  
45

  
46

  
47

  
48

  
49

  
50

  
51

  
52

  
53

  
54

  
55

  
56

  
57

  
58

  
59

  
60

  
61

  
62

  
63

  
64

  
65

  
66

  
67

  
68

  
69

  
70

  
71

  
72

  
73

  
74

  
75

  
76

  
77

  
78

  
79

  
80

  
81

  
82

  
83

  
84

  
85

  
86

  
87

  
88 26
BlobOpenCL= """
89 27
#define TFP32 0
90 28
#define TFP64 1
......
92 30
#define TFORCE 0
93 31
#define TPOTENTIAL 1
94 32

  
33
#define NONE 0
34
#define NEGEXP 1
35

  
95 36
#if TYPE == TFP32
96 37
#define MYFLOAT4 float4
97 38
#define MYFLOAT8 float8
......
107 48
#endif
108 49
#endif
109 50

  
110

  
111 51
#define znew  ((zmwc=36969*(zmwc&65535)+(zmwc>>16))<<16)
112 52
#define wnew  ((wmwc=18000*(wmwc&65535)+(wmwc>>16))&65535)
113 53
#define MWC   (znew+wnew)
......
115 55
#define CONG  (jcong=69069*jcong+1234567)
116 56
#define KISS  ((MWC^CONG)+SHR3)
117 57

  
118

  
119 58
#define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f)
120 59
#define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f)
121 60
#define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f)
122 61
#define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f)
123 62

  
124
#define PI (MYFLOAT)3.141592653589793238462643197169399375105820974944592307816406286e0f
63
#define PI (MYFLOAT)3.141592653589793238e0f
125 64

  
126 65
#define SMALL_NUM (MYFLOAT)1.e-9f
127 66

  
128
#define LENGTH 1.e0f
129

  
130 67
// Create my own Distance implementation: distance buggy on Oland AMD chipset
131 68

  
132 69
MYFLOAT MyDistance(MYFLOAT4 n,MYFLOAT4 m)
......
154 91
//    return(((MYFLOAT4)n-(MYFLOAT4)m)*(MYFLOAT)(1.e0f-exp(-c*r2))/(MYFLOAT)(r*r2));
155 92
//}
156 93

  
94
// Potential between 2 m,n bodies
157 95
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
96
#if ARTEVASION == NEGEXP
97
// Add exp(-r) to numerator to avoid divergence for low distances
158 98
{
159

  
99
    MYFLOAT r=DISTANCE(n,m);
100
    return((-1.e0f+exp(-r))/r);
101
}
102
#else
103
// Classical potential in 1/r
104
{
160 105
//    return((MYFLOAT)(-1.e0f)/(MyDistance(m,n)));
161

  
162
//    MYFLOAT r=DISTANCE(n,m);
163
//    return((-1.e0f+exp(-r))/r);
164

  
165

  
166 106
    return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
167 107
}
108
#endif
168 109

  
110
// Interaction based of Force (1/r**2) or Potential (-grad(1/r))
169 111
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
170 112
#if INTERACTION == TFORCE
171 113
// Simplest implementation of force (equals to acceleration)
......
180 122
// Estimate potential and proximate potential to estimate force
181 123
// MYFLOAT4 InteractionPotential(MYFLOAT4 m,MYFLOAT4 n)
182 124
{
183
    private MYFLOAT epsilon=(MYFLOAT)(1.e0f/1048576);
125
    // 1/1024 seems to be a good factor: larger one provides bad results
126
    private MYFLOAT epsilon=(MYFLOAT)(1.e0f/1024);
184 127
    private MYFLOAT4 er=normalize(n-m);
185 128
    private MYFLOAT4 dr=er*(MYFLOAT)epsilon;
186 129

  
187
    return(er*(PairPotential(m,n)-PairPotential(m+dr,n))/epsilon);
130
    return(er/epsilon*(PairPotential(m,n)-PairPotential(m+dr,n)));
188 131
}
189 132
#endif
190 133

  
......
403 346
    MYFLOAT N = (MYFLOAT)get_global_size(0);
404 347
    uint zmwc=seed_z+(uint)gid;
405 348
    uint wmwc=seed_w-(uint)gid;
406
    MYFLOAT4 SpeedVector;
349
    MYFLOAT4 CrossVector,SpeedVector;
407 350

  
351
    if (get_global_size(0)==2) {
352
       CrossVector=(MYFLOAT4)(1.e0f,1.e0f,1.e0f,0.e0f);
353
    } else {
354
       CrossVector=(MYFLOAT4)(MWCfp-5e-1f,MWCfp-5e-1f,MWCfp-5e-1f,0.e0f);
355
    }
356

  
408 357
    if (velocity<SMALL_NUM) {
409
       SpeedVector=(MYFLOAT4)normalize(cross(clDataX[gid],clCoM[0]))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
358
       SpeedVector=(MYFLOAT4)normalize(cross(clDataX[gid]-clCoM[0],CrossVector))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
410 359
    }
411 360
    else
412 361
    {    
413 362
       // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
414 363
       // Implemention on AMD Oland are probably broken in float
415 364

  
416
       MYFLOAT theta=acos((float)(1.0e0f-2.e0f*MWCfp));
417
       MYFLOAT phi=MWCfp*PI*(MYFLOAT)2.e0f;
418
       MYFLOAT sinTheta=sin((float)theta);
419
       MYFLOAT sinPhi=sin((float)phi);
420

  
421 365
       SpeedVector=(MYFLOAT4)((MWCfp-0.5e0f)*velocity,(MWCfp-0.5e0f)*velocity,
422 366
                              (MWCfp-0.5e0f)*velocity,0.e0f);
423 367
    }
......
660 604
    OpenGL=False
661 605
    # Speed rendering
662 606
    SpeedRendering=False
607
    # Counter ArtEvasions Measures (artefact evasion)
608
    CoArEv='None'
663 609
    # Shape to distribute
664 610
    Shape='Ball'
665 611
    # Type of Interaction
666 612
    InterType='Potential'
667 613
    
668
    HowToUse='%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [SpeedRendering] -f [Force] -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
614
    HowToUse='%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [SpeedRendering] -x [NegativeExponential4ArteEvasions] -f [Force] -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
669 615

  
670 616
    try:
671
        opts, args = getopt.getopt(sys.argv[1:],"rpfgehod:n:i:z:v:s:m:t:b:",["random","rendering","force","opengl","viriel","verbose","device=","number=","iterations=","size=","velocity=","step=","method=","valuetype=","shape="])
617
        opts, args = getopt.getopt(sys.argv[1:],"rpxfgehod:n:i:z:v:s:m:t:b:",["random","rendering","negexp","force","opengl","viriel","verbose","device=","number=","iterations=","size=","velocity=","step=","method=","valuetype=","shape="])
672 618
    except getopt.GetoptError:
673 619
        print(HowToUse % sys.argv[0])
674 620
        sys.exit(2)
......
725 671
            OpenGL=True
726 672
        elif opt in ("-p", "--rendering"):
727 673
            SpeedRendering=True
674
        elif opt in ("-x", "--negexp"):
675
            CoArEv='NegExp'
728 676
        elif opt in ("-o", "--verbose"):
729 677
            Verbose=True
730 678
        elif opt in ("-f", "--force"):
......
747 695
    print("OpenGL real time rendering : %s" % OpenGL)
748 696
    print("Speed rendering : %s" % SpeedRendering)
749 697
    print("Interaction type : %s" % InterType)
698
    print("Counter Artevasion type : %s" % CoArEv)
750 699

  
751 700
    # Create Numpy array of CL vector with 8 FP32    
752 701
    MyCoM = np.zeros(4,dtype=MyFloat)
......
755 704
    MyPotential = np.zeros(Number, dtype=MyFloat)
756 705
    MyKinetic = np.zeros(Number, dtype=MyFloat)
757 706

  
758
    Marsaglia,Computing,Interaction=DictionariesAPI()
707
    Marsaglia,Computing,Interaction,Artevasion=DictionariesAPI()
759 708

  
760 709
    # Scan the OpenCL arrays
761 710
    Id=0
......
784 733
    # Build all routines used for the computing
785 734

  
786 735
    #BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
787
    BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i" % (Marsaglia[RNG],Computing[ValueType],Interaction[InterType])
736
    BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i -DARTEVASION=%i" % (Marsaglia[RNG],Computing[ValueType],Interaction[InterType],Artevasion[CoArEv])
788 737

  
789 738
    if 'Intel' in PlatForm.name or 'Experimental' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name :
790 739
        MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
......
896 845

  
897 846
    print("Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n" % (Device,Iterations,np.mean(Durations),np.median(Durations),np.std(Durations),np.min(Durations),np.max(Durations),np.std(Durations)/np.median(Durations)))
898 847

  
848
    # FPS: 1/Elapsed
849
    FPS=np.ones(len(Durations))
850
    FPS/=Durations
851

  
852
    print("FPS stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" % (Device,Iterations,np.mean(FPS),np.median(FPS),np.std(FPS),np.min(FPS),np.max(FPS)))
853

  
899 854
    # Contraction of Square*Size*Hertz: Size*Size/Elapsed
900 855
    Squertz=np.ones(len(Durations))
901 856
    Squertz*=Number*Number

Formats disponibles : Unified diff