Révision 171
NBody/NBody.py (revision 171) | ||
---|---|---|
1 | 1 |
#!/usr/bin/env python3 |
2 | 2 |
# -*- coding: utf-8 -*- |
3 | 3 |
""" |
4 |
Demonstrateur OpenCL d'interaction NCorps
|
|
4 |
NBody Demonstrator implemented in OpenCL, rendering OpenGL
|
|
5 | 5 |
|
6 | 6 |
Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> CeCILLv2 |
7 | 7 |
""" |
... | ... | |
20 | 20 |
Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3} |
21 | 21 |
Computing={'FP32':0,'FP64':1} |
22 | 22 |
Interaction={'Force':0,'Potential':1} |
23 |
return(Marsaglia,Computing,Interaction) |
|
23 |
Artevasion={'None':0,'NegExp':1} |
|
24 |
return(Marsaglia,Computing,Interaction,Artevasion) |
|
24 | 25 |
|
25 |
|
|
26 |
|
|
27 |
|
|
28 |
|
|
29 |
|
|
30 |
|
|
31 |
|
|
32 |
|
|
33 |
|
|
34 |
|
|
35 |
|
|
36 |
|
|
37 |
|
|
38 |
|
|
39 |
|
|
40 |
|
|
41 |
|
|
42 |
|
|
43 |
|
|
44 |
|
|
45 |
|
|
46 |
|
|
47 |
|
|
48 |
|
|
49 |
|
|
50 |
|
|
51 |
|
|
52 |
|
|
53 |
|
|
54 |
|
|
55 |
|
|
56 |
|
|
57 |
|
|
58 |
|
|
59 |
|
|
60 |
|
|
61 |
|
|
62 |
|
|
63 |
|
|
64 |
|
|
65 |
|
|
66 |
|
|
67 |
|
|
68 |
|
|
69 |
|
|
70 |
|
|
71 |
|
|
72 |
|
|
73 |
|
|
74 |
|
|
75 |
|
|
76 |
|
|
77 |
|
|
78 |
|
|
79 |
|
|
80 |
|
|
81 |
|
|
82 |
|
|
83 |
|
|
84 |
|
|
85 |
|
|
86 |
|
|
87 |
|
|
88 | 26 |
BlobOpenCL= """ |
89 | 27 |
#define TFP32 0 |
90 | 28 |
#define TFP64 1 |
... | ... | |
92 | 30 |
#define TFORCE 0 |
93 | 31 |
#define TPOTENTIAL 1 |
94 | 32 |
|
33 |
#define NONE 0 |
|
34 |
#define NEGEXP 1 |
|
35 |
|
|
95 | 36 |
#if TYPE == TFP32 |
96 | 37 |
#define MYFLOAT4 float4 |
97 | 38 |
#define MYFLOAT8 float8 |
... | ... | |
107 | 48 |
#endif |
108 | 49 |
#endif |
109 | 50 |
|
110 |
|
|
111 | 51 |
#define znew ((zmwc=36969*(zmwc&65535)+(zmwc>>16))<<16) |
112 | 52 |
#define wnew ((wmwc=18000*(wmwc&65535)+(wmwc>>16))&65535) |
113 | 53 |
#define MWC (znew+wnew) |
... | ... | |
115 | 55 |
#define CONG (jcong=69069*jcong+1234567) |
116 | 56 |
#define KISS ((MWC^CONG)+SHR3) |
117 | 57 |
|
118 |
|
|
119 | 58 |
#define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f) |
120 | 59 |
#define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f) |
121 | 60 |
#define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f) |
122 | 61 |
#define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f) |
123 | 62 |
|
124 |
#define PI (MYFLOAT)3.141592653589793238462643197169399375105820974944592307816406286e0f
|
|
63 |
#define PI (MYFLOAT)3.141592653589793238e0f |
|
125 | 64 |
|
126 | 65 |
#define SMALL_NUM (MYFLOAT)1.e-9f |
127 | 66 |
|
128 |
#define LENGTH 1.e0f |
|
129 |
|
|
130 | 67 |
// Create my own Distance implementation: distance buggy on Oland AMD chipset |
131 | 68 |
|
132 | 69 |
MYFLOAT MyDistance(MYFLOAT4 n,MYFLOAT4 m) |
... | ... | |
154 | 91 |
// return(((MYFLOAT4)n-(MYFLOAT4)m)*(MYFLOAT)(1.e0f-exp(-c*r2))/(MYFLOAT)(r*r2)); |
155 | 92 |
//} |
156 | 93 |
|
94 |
// Potential between 2 m,n bodies |
|
157 | 95 |
MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n) |
96 |
#if ARTEVASION == NEGEXP |
|
97 |
// Add exp(-r) to numerator to avoid divergence for low distances |
|
158 | 98 |
{ |
159 |
|
|
99 |
MYFLOAT r=DISTANCE(n,m); |
|
100 |
return((-1.e0f+exp(-r))/r); |
|
101 |
} |
|
102 |
#else |
|
103 |
// Classical potential in 1/r |
|
104 |
{ |
|
160 | 105 |
// return((MYFLOAT)(-1.e0f)/(MyDistance(m,n))); |
161 |
|
|
162 |
// MYFLOAT r=DISTANCE(n,m); |
|
163 |
// return((-1.e0f+exp(-r))/r); |
|
164 |
|
|
165 |
|
|
166 | 106 |
return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m))); |
167 | 107 |
} |
108 |
#endif |
|
168 | 109 |
|
110 |
// Interaction based of Force (1/r**2) or Potential (-grad(1/r)) |
|
169 | 111 |
MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n) |
170 | 112 |
#if INTERACTION == TFORCE |
171 | 113 |
// Simplest implementation of force (equals to acceleration) |
... | ... | |
180 | 122 |
// Estimate potential and proximate potential to estimate force |
181 | 123 |
// MYFLOAT4 InteractionPotential(MYFLOAT4 m,MYFLOAT4 n) |
182 | 124 |
{ |
183 |
private MYFLOAT epsilon=(MYFLOAT)(1.e0f/1048576); |
|
125 |
// 1/1024 seems to be a good factor: larger one provides bad results |
|
126 |
private MYFLOAT epsilon=(MYFLOAT)(1.e0f/1024); |
|
184 | 127 |
private MYFLOAT4 er=normalize(n-m); |
185 | 128 |
private MYFLOAT4 dr=er*(MYFLOAT)epsilon; |
186 | 129 |
|
187 |
return(er*(PairPotential(m,n)-PairPotential(m+dr,n))/epsilon);
|
|
130 |
return(er/epsilon*(PairPotential(m,n)-PairPotential(m+dr,n)));
|
|
188 | 131 |
} |
189 | 132 |
#endif |
190 | 133 |
|
... | ... | |
403 | 346 |
MYFLOAT N = (MYFLOAT)get_global_size(0); |
404 | 347 |
uint zmwc=seed_z+(uint)gid; |
405 | 348 |
uint wmwc=seed_w-(uint)gid; |
406 |
MYFLOAT4 SpeedVector; |
|
349 |
MYFLOAT4 CrossVector,SpeedVector;
|
|
407 | 350 |
|
351 |
if (get_global_size(0)==2) { |
|
352 |
CrossVector=(MYFLOAT4)(1.e0f,1.e0f,1.e0f,0.e0f); |
|
353 |
} else { |
|
354 |
CrossVector=(MYFLOAT4)(MWCfp-5e-1f,MWCfp-5e-1f,MWCfp-5e-1f,0.e0f); |
|
355 |
} |
|
356 |
|
|
408 | 357 |
if (velocity<SMALL_NUM) { |
409 |
SpeedVector=(MYFLOAT4)normalize(cross(clDataX[gid],clCoM[0]))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
|
|
358 |
SpeedVector=(MYFLOAT4)normalize(cross(clDataX[gid]-clCoM[0],CrossVector))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
|
|
410 | 359 |
} |
411 | 360 |
else |
412 | 361 |
{ |
413 | 362 |
// cast to float for sin,cos are NEEDED by Mesa FP64 implementation! |
414 | 363 |
// Implemention on AMD Oland are probably broken in float |
415 | 364 |
|
416 |
MYFLOAT theta=acos((float)(1.0e0f-2.e0f*MWCfp)); |
|
417 |
MYFLOAT phi=MWCfp*PI*(MYFLOAT)2.e0f; |
|
418 |
MYFLOAT sinTheta=sin((float)theta); |
|
419 |
MYFLOAT sinPhi=sin((float)phi); |
|
420 |
|
|
421 | 365 |
SpeedVector=(MYFLOAT4)((MWCfp-0.5e0f)*velocity,(MWCfp-0.5e0f)*velocity, |
422 | 366 |
(MWCfp-0.5e0f)*velocity,0.e0f); |
423 | 367 |
} |
... | ... | |
660 | 604 |
OpenGL=False |
661 | 605 |
# Speed rendering |
662 | 606 |
SpeedRendering=False |
607 |
# Counter ArtEvasions Measures (artefact evasion) |
|
608 |
CoArEv='None' |
|
663 | 609 |
# Shape to distribute |
664 | 610 |
Shape='Ball' |
665 | 611 |
# Type of Interaction |
666 | 612 |
InterType='Potential' |
667 | 613 |
|
668 |
HowToUse='%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [SpeedRendering] -f [Force] -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>' |
|
614 |
HowToUse='%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [SpeedRendering] -x [NegativeExponential4ArteEvasions] -f [Force] -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
|
|
669 | 615 |
|
670 | 616 |
try: |
671 |
opts, args = getopt.getopt(sys.argv[1:],"rpfgehod:n:i:z:v:s:m:t:b:",["random","rendering","force","opengl","viriel","verbose","device=","number=","iterations=","size=","velocity=","step=","method=","valuetype=","shape="])
|
|
617 |
opts, args = getopt.getopt(sys.argv[1:],"rpxfgehod:n:i:z:v:s:m:t:b:",["random","rendering","negexp","force","opengl","viriel","verbose","device=","number=","iterations=","size=","velocity=","step=","method=","valuetype=","shape="])
|
|
672 | 618 |
except getopt.GetoptError: |
673 | 619 |
print(HowToUse % sys.argv[0]) |
674 | 620 |
sys.exit(2) |
... | ... | |
725 | 671 |
OpenGL=True |
726 | 672 |
elif opt in ("-p", "--rendering"): |
727 | 673 |
SpeedRendering=True |
674 |
elif opt in ("-x", "--negexp"): |
|
675 |
CoArEv='NegExp' |
|
728 | 676 |
elif opt in ("-o", "--verbose"): |
729 | 677 |
Verbose=True |
730 | 678 |
elif opt in ("-f", "--force"): |
... | ... | |
747 | 695 |
print("OpenGL real time rendering : %s" % OpenGL) |
748 | 696 |
print("Speed rendering : %s" % SpeedRendering) |
749 | 697 |
print("Interaction type : %s" % InterType) |
698 |
print("Counter Artevasion type : %s" % CoArEv) |
|
750 | 699 |
|
751 | 700 |
# Create Numpy array of CL vector with 8 FP32 |
752 | 701 |
MyCoM = np.zeros(4,dtype=MyFloat) |
... | ... | |
755 | 704 |
MyPotential = np.zeros(Number, dtype=MyFloat) |
756 | 705 |
MyKinetic = np.zeros(Number, dtype=MyFloat) |
757 | 706 |
|
758 |
Marsaglia,Computing,Interaction=DictionariesAPI() |
|
707 |
Marsaglia,Computing,Interaction,Artevasion=DictionariesAPI()
|
|
759 | 708 |
|
760 | 709 |
# Scan the OpenCL arrays |
761 | 710 |
Id=0 |
... | ... | |
784 | 733 |
# Build all routines used for the computing |
785 | 734 |
|
786 | 735 |
#BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType]) |
787 |
BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i" % (Marsaglia[RNG],Computing[ValueType],Interaction[InterType])
|
|
736 |
BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i -DARTEVASION=%i" % (Marsaglia[RNG],Computing[ValueType],Interaction[InterType],Artevasion[CoArEv])
|
|
788 | 737 |
|
789 | 738 |
if 'Intel' in PlatForm.name or 'Experimental' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name : |
790 | 739 |
MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions) |
... | ... | |
896 | 845 |
|
897 | 846 |
print("Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n" % (Device,Iterations,np.mean(Durations),np.median(Durations),np.std(Durations),np.min(Durations),np.max(Durations),np.std(Durations)/np.median(Durations))) |
898 | 847 |
|
848 |
# FPS: 1/Elapsed |
|
849 |
FPS=np.ones(len(Durations)) |
|
850 |
FPS/=Durations |
|
851 |
|
|
852 |
print("FPS stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" % (Device,Iterations,np.mean(FPS),np.median(FPS),np.std(FPS),np.min(FPS),np.max(FPS))) |
|
853 |
|
|
899 | 854 |
# Contraction of Square*Size*Hertz: Size*Size/Elapsed |
900 | 855 |
Squertz=np.ones(len(Durations)) |
901 | 856 |
Squertz*=Number*Number |
Formats disponibles : Unified diff