/ - Diff - Bench4GPU - Forge du Centre Blaise Pascal

Révision 171

     #!/usr/bin/env python3
     # -*- coding: utf-8 -*-
     """
     Demonstrateur OpenCL d'interaction NCorps
     NBody Demonstrator implemented in OpenCL, rendering OpenGL
     Emmanuel QUEMENER <emmanuel.quemener@ens-lyon.fr> CeCILLv2
     """
-...
         Marsaglia={'CONG':0,'SHR3':1,'MWC':2,'KISS':3}
         Computing={'FP32':0,'FP64':1}
         Interaction={'Force':0,'Potential':1}
         return(Marsaglia,Computing,Interaction)
         Artevasion={'None':0,'NegExp':1}
         return(Marsaglia,Computing,Interaction,Artevasion)
     BlobOpenCL= """
     #define TFP32 0
     #define TFP64 1
-...
     #define TFORCE 0
     #define TPOTENTIAL 1
     #define NONE 0
     #define NEGEXP 1
     #if TYPE == TFP32
     #define MYFLOAT4 float4
     #define MYFLOAT8 float8
-...
     #endif
     #endif
     #define znew  ((zmwc=36969*(zmwc&65535)+(zmwc>>16))<<16)
     #define wnew  ((wmwc=18000*(wmwc&65535)+(wmwc>>16))&65535)
     #define MWC   (znew+wnew)
-...
     #define CONG  (jcong=69069*jcong+1234567)
     #define KISS  ((MWC^CONG)+SHR3)
     #define MWCfp (MYFLOAT)(MWC * 2.3283064365386963e-10f)
     #define KISSfp (MYFLOAT)(KISS * 2.3283064365386963e-10f)
     #define SHR3fp (MYFLOAT)(SHR3 * 2.3283064365386963e-10f)
     #define CONGfp (MYFLOAT)(CONG * 2.3283064365386963e-10f)
     #define PI (MYFLOAT)3.141592653589793238462643197169399375105820974944592307816406286e0f
     #define PI (MYFLOAT)3.141592653589793238e0f
     #define SMALL_NUM (MYFLOAT)1.e-9f
     #define LENGTH 1.e0f
     // Create my own Distance implementation: distance buggy on Oland AMD chipset
     MYFLOAT MyDistance(MYFLOAT4 n,MYFLOAT4 m)
-...
     //    return(((MYFLOAT4)n-(MYFLOAT4)m)*(MYFLOAT)(1.e0f-exp(-c*r2))/(MYFLOAT)(r*r2));
     //}
     // Potential between 2 m,n bodies
     MYFLOAT PairPotential(MYFLOAT4 m,MYFLOAT4 n)
     #if ARTEVASION == NEGEXP
     // Add exp(-r) to numerator to avoid divergence for low distances
+    {
         MYFLOAT r=DISTANCE(n,m);
         return((-1.e0f+exp(-r))/r);
+    }
     #else
     // Classical potential in 1/r
+    {
     //    return((MYFLOAT)(-1.e0f)/(MyDistance(m,n)));
     //    MYFLOAT r=DISTANCE(n,m);
     //    return((-1.e0f+exp(-r))/r);
         return((MYFLOAT)(-1.e0f)/(DISTANCE(n,m)));
+    }
     #endif
     // Interaction based of Force (1/r**2) or Potential (-grad(1/r))
     MYFLOAT4 Interaction(MYFLOAT4 m,MYFLOAT4 n)
     #if INTERACTION == TFORCE
     // Simplest implementation of force (equals to acceleration)
-...
     // Estimate potential and proximate potential to estimate force
     // MYFLOAT4 InteractionPotential(MYFLOAT4 m,MYFLOAT4 n)
+    {
         private MYFLOAT epsilon=(MYFLOAT)(1.e0f/1048576);
         // 1/1024 seems to be a good factor: larger one provides bad results
         private MYFLOAT epsilon=(MYFLOAT)(1.e0f/1024);
         private MYFLOAT4 er=normalize(n-m);
         private MYFLOAT4 dr=er*(MYFLOAT)epsilon;
         return(er*(PairPotential(m,n)-PairPotential(m+dr,n))/epsilon);
         return(er/epsilon*(PairPotential(m,n)-PairPotential(m+dr,n)));
+    }
     #endif
-...
         MYFLOAT N = (MYFLOAT)get_global_size(0);
         uint zmwc=seed_z+(uint)gid;
         uint wmwc=seed_w-(uint)gid;
         MYFLOAT4 SpeedVector;
         MYFLOAT4 CrossVector,SpeedVector;
         if (get_global_size(0)==2) {
            CrossVector=(MYFLOAT4)(1.e0f,1.e0f,1.e0f,0.e0f);
         } else {
            CrossVector=(MYFLOAT4)(MWCfp-5e-1f,MWCfp-5e-1f,MWCfp-5e-1f,0.e0f);
+        }
         if (velocity<SMALL_NUM) {
            SpeedVector=(MYFLOAT4)normalize(cross(clDataX[gid],clCoM[0]))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
            SpeedVector=(MYFLOAT4)normalize(cross(clDataX[gid]-clCoM[0],CrossVector))*sqrt((-AtomicPotential(clDataX,gid)/(MYFLOAT)2.e0f));
+        }
         else
+        {
            // cast to float for sin,cos are NEEDED by Mesa FP64 implementation!
            // Implemention on AMD Oland are probably broken in float
            MYFLOAT theta=acos((float)(1.0e0f-2.e0f*MWCfp));
            MYFLOAT phi=MWCfp*PI*(MYFLOAT)2.e0f;
            MYFLOAT sinTheta=sin((float)theta);
            MYFLOAT sinPhi=sin((float)phi);
            SpeedVector=(MYFLOAT4)((MWCfp-0.5e0f)*velocity,(MWCfp-0.5e0f)*velocity,
                                   (MWCfp-0.5e0f)*velocity,0.e0f);
+        }
-...
         OpenGL=False
         # Speed rendering
         SpeedRendering=False
         # Counter ArtEvasions Measures (artefact evasion)
         CoArEv='None'
         # Shape to distribute
         Shape='Ball'
         # Type of Interaction
         InterType='Potential'
         HowToUse='%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [SpeedRendering] -f [Force] -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
         HowToUse='%s -h [Help] -r [InitialRandom] -g [OpenGL] -e [VirielStress] -o [Verbose] -p [SpeedRendering] -x [NegativeExponential4ArteEvasions] -f [Force] -d <DeviceId> -n <NumberOfParticules> -i <Iterations> -z <SizeOfBoxOrBall> -v <Velocity> -s <Step> -b <Ball|Box> -m <ImplicitEuler|RungeKutta|ExplicitEuler|Heun> -t <FP32|FP64>'
         try:
             opts, args = getopt.getopt(sys.argv[1:],"rpfgehod:n:i:z:v:s:m:t:b:",["random","rendering","force","opengl","viriel","verbose","device=","number=","iterations=","size=","velocity=","step=","method=","valuetype=","shape="])
             opts, args = getopt.getopt(sys.argv[1:],"rpxfgehod:n:i:z:v:s:m:t:b:",["random","rendering","negexp","force","opengl","viriel","verbose","device=","number=","iterations=","size=","velocity=","step=","method=","valuetype=","shape="])
         except getopt.GetoptError:
             print(HowToUse % sys.argv[0])
             sys.exit(2)
-...
                 OpenGL=True
             elif opt in ("-p", "--rendering"):
                 SpeedRendering=True
             elif opt in ("-x", "--negexp"):
                 CoArEv='NegExp'
             elif opt in ("-o", "--verbose"):
                 Verbose=True
             elif opt in ("-f", "--force"):
-...
         print("OpenGL real time rendering : %s" % OpenGL)
         print("Speed rendering : %s" % SpeedRendering)
         print("Interaction type : %s" % InterType)
         print("Counter Artevasion type : %s" % CoArEv)
         # Create Numpy array of CL vector with 8 FP32
         MyCoM = np.zeros(4,dtype=MyFloat)
-...
         MyPotential = np.zeros(Number, dtype=MyFloat)
         MyKinetic = np.zeros(Number, dtype=MyFloat)
         Marsaglia,Computing,Interaction=DictionariesAPI()
         Marsaglia,Computing,Interaction,Artevasion=DictionariesAPI()
         # Scan the OpenCL arrays
         Id=0
-...
         # Build all routines used for the computing
         #BuildOptions="-cl-mad-enable -cl-kernel-arg-info -cl-fast-relaxed-math -cl-std=CL1.2 -DTRNG=%i -DTYPE=%i" % (Marsaglia[RNG],Computing[ValueType])
         BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i" % (Marsaglia[RNG],Computing[ValueType],Interaction[InterType])
         BuildOptions="-cl-mad-enable -cl-fast-relaxed-math -DTRNG=%i -DTYPE=%i -DINTERACTION=%i -DARTEVASION=%i" % (Marsaglia[RNG],Computing[ValueType],Interaction[InterType],Artevasion[CoArEv])
         if 'Intel' in PlatForm.name or 'Experimental' in PlatForm.name or 'Clover' in PlatForm.name or 'Portable' in PlatForm.name :
             MyRoutines = cl.Program(ctx, BlobOpenCL).build(options = BuildOptions)
-...
         print("Duration stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n\n\tVariability:\t%s\n" % (Device,Iterations,np.mean(Durations),np.median(Durations),np.std(Durations),np.min(Durations),np.max(Durations),np.std(Durations)/np.median(Durations)))
         # FPS: 1/Elapsed
         FPS=np.ones(len(Durations))
         FPS/=Durations
         print("FPS stats on device %s with %s iterations :\n\tMean:\t%s\n\tMedian:\t%s\n\tStddev:\t%s\n\tMin:\t%s\n\tMax:\t%s\n" % (Device,Iterations,np.mean(FPS),np.median(FPS),np.std(FPS),np.min(FPS),np.max(FPS)))
         # Contraction of Square*Size*Hertz: Size*Size/Elapsed
         Squertz=np.ones(len(Durations))
         Squertz*=Number*Number

Formats disponibles : Unified diff

Centre Blaise Pascal » Bench4GPU

Révision 171