/ - Diff - Bench4GPU - Forge du Centre Blaise Pascal

     /* Simple SillySum function in C */
     /* compilation with : gcc -O3 -o MySteps MySteps.c -lm */
     #include <math.h>
     #include <stdio.h>
     #include <stdlib.h>
     #include <sys/time.h>
     #define PI 3.141592653589793
     #define MYFLOAT float
     MYFLOAT MySillyFunction(MYFLOAT x)
+    {
         return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2));
+    }
     void MySillySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size)
+    {
       for (uint i=0; i<size;i++)
+        {
           MYFLOAT ai=a[i];
           MYFLOAT bi=b[i];
           for (int c=0;c<calls;c++)
+    	{
     	  ai=MySillyFunction(ai);
     	  bi=MySillyFunction(bi);
+    	}
           res[i] = ai + bi;
+        }
+    }
     void MySillySumOMP(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size)
+    {
       #pragma omp parallel for
       for (uint i=0; i<size;i++)
+        {
           MYFLOAT ai=a[i];
           MYFLOAT bi=b[i];
           for (int c=0;c<calls;c++)
+    	{
     	  ai=MySillyFunction(ai);
     	  bi=MySillyFunction(bi);
+    	}
           res[i] = ai + bi;
+        }
+    }
     MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size)
+    {
       MYFLOAT norm=0.;
       for (int i=0;i<size;i++)
+        {
           norm+=pow(a[i]-b[i],2);
+        }
       return(sqrt(norm));
+    }
     void MyPrint(MYFLOAT *a,int size)
+    {
       printf("[");
       for (int i=0;i<size;i++)
+        {
           printf(" %.8e ",a[i]);
+        }
       printf("]\n");
+    }
     int main(int argc,char *argv[])
+    {
       float *a,*b,*res,*resacc;
       int size=1024;
       int calls=1;
       struct timeval tv1,tv2;
       if (argc > 1) {
         size=(int)atoll(argv[1]);
         calls=(int)atoll(argv[2]);
+      }
       else {
         printf("\n\tPi : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\t\t#2 : calls (default 1)\n\n");
+      }
       printf("%i %i\n",size,calls);
       a=(float*)malloc(size*sizeof(MYFLOAT));
       b=(float*)malloc(size*sizeof(MYFLOAT));
       res=(float*)malloc(size*sizeof(MYFLOAT));
       resacc=(float*)malloc(size*sizeof(MYFLOAT));
       srand(110271);
       for (int i=0;i<size;i++)
+        {
           a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           res[i]=0.;
           resacc[i]=0.;
+        }
       gettimeofday(&tv1, NULL);
       MySillySum(res,a,b,calls,size);
       gettimeofday(&tv2, NULL);
       MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
     			    (tv2.tv_usec-tv1.tv_usec))/1000000;
     #ifdef VERBOSE
       MyPrint(res,size);
       MyPrint(resacc,size);
     #endif
       printf("Elapsed Time: %.3f\n",elapsed);
       printf("NaiveRate: %.i\n",(int)((float)size/elapsed));
       free(a);
       free(b);
       free(res);
       free(resacc);
+    }

     /* Simple SillySum function in C and OpenMP/C */
     /* compilation with : gcc -fopenmp -O3 -o MySteps_openmp MySteps_openmp.c -lm -lgomp */
     #include <math.h>
     #include <stdio.h>
     #include <stdlib.h>
     #include <sys/time.h>
     #define PI 3.141592653589793
     #define MYFLOAT float
     MYFLOAT MySillyFunction(MYFLOAT x)
+    {
         return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2));
+    }
     void MySillySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size)
+    {
       for (uint i=0; i<size;i++)
+        {
           MYFLOAT ai=a[i];
           MYFLOAT bi=b[i];
           for (int c=0;c<calls;c++)
+    	{
     	  ai=MySillyFunction(ai);
     	  bi=MySillyFunction(bi);
+    	}
           res[i] = ai + bi;
+        }
+    }
     void MySillySumOMP(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size)
+    {
       #pragma omp parallel for
       for (uint i=0; i<size;i++)
+        {
           MYFLOAT ai=a[i];
           MYFLOAT bi=b[i];
           for (int c=0;c<calls;c++)
+    	{
     	  ai=MySillyFunction(ai);
     	  bi=MySillyFunction(bi);
+    	}
           res[i] = ai + bi;
+        }
+    }
     MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size)
+    {
       MYFLOAT norm=0.;
       for (int i=0;i<size;i++)
+        {
           norm+=pow(a[i]-b[i],2);
+        }
       return(sqrt(norm));
+    }
     void MyPrint(MYFLOAT *a,int size)
+    {
       printf("[");
       for (int i=0;i<size;i++)
+        {
           printf(" %.8e ",a[i]);
+        }
       printf("]\n");
+    }
     int main(int argc,char *argv[])
+    {
       float *a,*b,*res,*resacc;
       int size=1024;
       int calls=1;
       struct timeval tv1,tv2;
       if (argc > 1) {
         size=(int)atoll(argv[1]);
         calls=(int)atoll(argv[2]);
+      }
       else {
         printf("\n\tPi : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\t\t#2 : calls (default 1)\n\n");
+      }
       printf("%i %i\n",size,calls);
       a=(float*)malloc(size*sizeof(MYFLOAT));
       b=(float*)malloc(size*sizeof(MYFLOAT));
       res=(float*)malloc(size*sizeof(MYFLOAT));
       resacc=(float*)malloc(size*sizeof(MYFLOAT));
       srand(110271);
       for (int i=0;i<size;i++)
+        {
           a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           res[i]=0.;
           resacc[i]=0.;
+        }
       gettimeofday(&tv1, NULL);
       MySillySum(res,a,b,calls,size);
       gettimeofday(&tv2, NULL);
       MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
     			    (tv2.tv_usec-tv1.tv_usec))/1000000;
       gettimeofday(&tv1, NULL);
       MySillySumOMP(resacc,a,b,calls,size);
       gettimeofday(&tv2, NULL);
       MYFLOAT elapsedAcc=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
     			       (tv2.tv_usec-tv1.tv_usec))/1000000;
       MYFLOAT MyChecker=MyNorm(res,resacc,size);
       printf("Norm: %.8e\n",MyChecker);
     #ifdef VERBOSE
       MyPrint(res,size);
       MyPrint(resacc,size);
     #endif
       printf("Elapsed Time: %.3f\n",elapsed);
       printf("OMP Elapsed Time: %.3f\n",elapsedAcc);
       printf("NaiveRate: %.i\n",(int)((float)size/elapsed));
       printf("OMPRate: %.i\n",(int)((float)size/elapsedAcc));
       printf("AccRatio: %.3f\n",elapsed/elapsedAcc);
       free(a);
       free(b);
       free(res);
       free(resacc);
+    }

     /* Simple SillySum function in C and OpenMP/C */
     /* compilation with : gcc -O3 -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35 -lm" -o MySteps_openacc MySteps_openacc.c -lm */
     #include <math.h>
     #include <stdio.h>
     #include <stdlib.h>
     #include <sys/time.h>
     #define PI 3.141592653589793
     #define MYFLOAT float
     #pragma acc routine
     MYFLOAT MySillyFunction(MYFLOAT x)
+    {
         return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2));
+    }
     void MySillySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size)
+    {
       for (uint i=0; i<size;i++)
+        {
           MYFLOAT ai=a[i];
           MYFLOAT bi=b[i];
           for (int c=0;c<calls;c++)
+    	{
     	  ai=MySillyFunction(ai);
     	  bi=MySillyFunction(bi);
+    	}
           res[i] = ai + bi;
+        }
+    }
     void MySillySumOpenACC(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size)
+    {
       #pragma acc data copyin(a[0:size],b[0:size]),copyout(res[0:size])
       #pragma acc parallel loop
       for (uint i=0; i<size;i++)
+        {
           MYFLOAT ai=a[i];
           MYFLOAT bi=b[i];
           for (int c=0;c<calls;c++)
+    	{
     	  ai=MySillyFunction(ai);
     	  bi=MySillyFunction(bi);
+    	}
           res[i] = ai + bi;
+        }
+    }
     MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size)
+    {
       MYFLOAT norm=0.;
       for (int i=0;i<size;i++)
+        {
           norm+=pow(a[i]-b[i],2);
+        }
       return(sqrt(norm));
+    }
     void MyPrint(MYFLOAT *a,int size)
+    {
       printf("[");
       for (int i=0;i<size;i++)
+        {
           printf(" %.8e ",a[i]);
+        }
       printf("]\n");
+    }
     int main(int argc,char *argv[])
+    {
       float *a,*b,*res,*resacc;
       int size=1024;
       int calls=1;
       struct timeval tv1,tv2;
       if (argc > 1) {
         size=(int)atoll(argv[1]);
         calls=(int)atoll(argv[2]);
+      }
       else {
         printf("\n\tPi : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\t\t#2 : calls (default 1)\n\n");
+      }
       printf("%i %i\n",size,calls);
       a=(float*)malloc(size*sizeof(MYFLOAT));
       b=(float*)malloc(size*sizeof(MYFLOAT));
       res=(float*)malloc(size*sizeof(MYFLOAT));
       resacc=(float*)malloc(size*sizeof(MYFLOAT));
       srand(110271);
       for (int i=0;i<size;i++)
+        {
           a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           res[i]=0.;
           resacc[i]=0.;
+        }
       gettimeofday(&tv1, NULL);
       MySillySum(res,a,b,calls,size);
       gettimeofday(&tv2, NULL);
       MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
     			    (tv2.tv_usec-tv1.tv_usec))/1000000;
       gettimeofday(&tv1, NULL);
       MySillySumOpenACC(resacc,a,b,calls,size);
       gettimeofday(&tv2, NULL);
       MYFLOAT elapsedAcc=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
     			       (tv2.tv_usec-tv1.tv_usec))/1000000;
       MYFLOAT MyChecker=MyNorm(res,resacc,size);
       printf("Norm: %.8e\n",MyChecker);
     #ifdef VERBOSE
       MyPrint(res,size);
       MyPrint(resacc,size);
     #endif
       printf("Elapsed Time: %.3f\n",elapsed);
       printf("OpenACC Elapsed Time: %.3f\n",elapsedAcc);
       printf("NaiveRate: %.i\n",(int)((float)size/elapsed));
       printf("OpenACCRate: %.i\n",(int)((float)size/elapsedAcc));
       printf("OpenACCRatio: %.3f\n",elapsed/elapsedAcc);
       free(a);
       free(b);
       free(res);
       free(resacc);
+    }

     /* Simple SillySum function in C and OpenACC/C */
     /* compilation with sequential compute : gcc -O3 -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35 -lm" -o MySteps_6_openacc MySteps_6_openacc.c -lm */
     /* compilation without sequential compute : gcc -DNOSERIAL -O3 -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35 -lm" -o MySteps_6_openacc_NoSerial MySteps_6_openacc.c -lm */
     #include <math.h>
     #include <stdio.h>
     #include <stdlib.h>
     #include <sys/time.h>
     #define PI 3.141592653589793
     #define MYFLOAT float
     #pragma acc routine
     MYFLOAT MySillyFunction(MYFLOAT x)
+    {
         return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2));
+    }
     void MySillySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size)
+    {
       for (uint i=0; i<size;i++)
+        {
           MYFLOAT ai=a[i];
           MYFLOAT bi=b[i];
           for (int c=0;c<calls;c++)
+    	{
     	  ai=MySillyFunction(ai);
     	  bi=MySillyFunction(bi);
+    	}
           res[i] = ai + bi;
+        }
+    }
     void MySillySumOpenACC(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size)
+    {
       #pragma acc data copyin(a[0:size],b[0:size]),copyout(res[0:size])
       #pragma acc parallel loop
       for (uint i=0; i<size;i++)
+        {
           MYFLOAT ai=a[i];
           MYFLOAT bi=b[i];
           for (int c=0;c<calls;c++)
+    	{
     	  ai=MySillyFunction(ai);
     	  bi=MySillyFunction(bi);
+    	}
           res[i] = ai + bi;
+        }
+    }
     MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size)
+    {
       MYFLOAT norm=0.;
       for (int i=0;i<size;i++)
+        {
           norm+=pow(a[i]-b[i],2);
+        }
       return(sqrt(norm));
+    }
     void MyPrint(MYFLOAT *a,int size)
+    {
       printf("[");
       for (int i=0;i<size;i++)
+        {
           printf(" %.8e ",a[i]);
+        }
       printf("]\n");
+    }
     int main(int argc,char *argv[])
+    {
       float *a,*b,*res,*resacc;
       int size=1024;
       int calls=1;
       struct timeval tv1,tv2;
       if (argc > 1) {
         size=(int)atoll(argv[1]);
         calls=(int)atoll(argv[2]);
+      }
       else {
         printf("\n\tPi : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\t\t#2 : calls (default 1)\n\n");
+      }
       printf("%i %i\n",size,calls);
       a=(float*)malloc(size*sizeof(MYFLOAT));
       b=(float*)malloc(size*sizeof(MYFLOAT));
       res=(float*)malloc(size*sizeof(MYFLOAT));
       resacc=(float*)malloc(size*sizeof(MYFLOAT));
       srand(110271);
       for (int i=0;i<size;i++)
+        {
           a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           res[i]=0.;
           resacc[i]=0.;
+        }
     #ifndef NOSERIAL
       gettimeofday(&tv1, NULL);
       MySillySum(res,a,b,calls,size);
       gettimeofday(&tv2, NULL);
     #endif
       MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
     			    (tv2.tv_usec-tv1.tv_usec))/1000000;
       gettimeofday(&tv1, NULL);
       MySillySumOpenACC(resacc,a,b,calls,size);
       gettimeofday(&tv2, NULL);
       MYFLOAT elapsedAcc=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
     			       (tv2.tv_usec-tv1.tv_usec))/1000000;
     #ifndef NOSERIAL
       MYFLOAT MyChecker=MyNorm(res,resacc,size);
       printf("Norm: %.8e\n",MyChecker);
     #endif
     #ifdef VERBOSE
       MyPrint(res,size);
       MyPrint(resacc,size);
     #endif
     #ifndef NOSERIAL
       printf("Elapsed Time: %.3f\n",elapsed);
       printf("OpenACC Elapsed Time: %.3f\n",elapsedAcc);
     #endif
     #ifndef NOSERIAL
       printf("NaiveRate: %.lld\n",(unsigned long)((float)size/elapsed));
     #endif
       printf("OpenACCRate: %.lld\n",(unsigned long)((float)size/elapsedAcc));
     #ifndef NOSERIAL
       printf("OpenACCRatio: %.3f\n",elapsed/elapsedAcc);
     #endif
       free(a);
       free(b);
       free(res);
       free(resacc);
+    }

     /* Simple Sum function in C */
     /* compilation with : gcc -O3 -o MySteps_1 MySteps_1.c -lm */
     #include <math.h>
     #include <stdio.h>
     #include <stdlib.h>
     #include <sys/time.h>
     #define PI 3.141592653589793
     #define MYFLOAT float
     void MySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b, int size)
+    {
       for (uint i=0; i<size;i++)
+        {
           res[i] = a[i] + b[i];
+        }
+    }
     MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size)
+    {
       MYFLOAT norm=0.;
       for (int i=0;i<size;i++)
+        {
           norm+=pow(a[i]-b[i],2);
+        }
       return(sqrt(norm));
+    }
     void MyPrint(MYFLOAT *a,int size)
+    {
       printf("[");
       for (int i=0;i<size;i++)
+        {
           printf(" %.8e ",a[i]);
+        }
       printf("]\n");
+    }
     int main(int argc,char *argv[])
+    {
       float *a,*b,*res,*resacc;
       int size=1024;
       struct timeval tv1,tv2;
       if (argc > 1) {
         size=(int)atoll(argv[1]);
+      }
       else {
         printf("\n\tMySteps : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\n");
+      }
       printf("%i %i\n",size);
       a=(float*)malloc(size*sizeof(MYFLOAT));
       b=(float*)malloc(size*sizeof(MYFLOAT));
       res=(float*)malloc(size*sizeof(MYFLOAT));
       srand(110271);
       for (int i=0;i<size;i++)
+        {
           a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           res[i]=0.;
+        }
       gettimeofday(&tv1, NULL);
       MySum(res,a,b,size);
       gettimeofday(&tv2, NULL);
       MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
     			    (tv2.tv_usec-tv1.tv_usec))/1000000;
     #ifdef VERBOSE
       MyPrint(res,size);
     #endif
       printf("Elapsed Time: %.3f\n",elapsed);
       printf("NaiveRate: %.lld\n",(unsigned long)((float)size/elapsed));
       free(a);
       free(b);
       free(res);
+    }

+    #
     def NumpyFFT(x,y):
         xy=x+1.j*y
         xy=np.csingle(x+1.j*y)
         XY=np.fft.fft(xy)
         print(XY)
         return(XY.real,XY.imag)
+    #
-...
         import gpyfft
         from gpyfft.fft import FFT
         TimeIn=time.time()
         Id=0
         HasXPU=False
         for platform in cl.get_platforms():
-...
         if HasXPU==False:
             print("No XPU #%i found in all of %i devices, sorry..." % (Device,Id-1))
             sys.exit()
         Elapsed=time.time()-TimeIn
         print("Selection of device : %.3f" % Elapsed)
         TimeIn=time.time()
         try:
             ctx = cl.Context(devices=[XPU])
             queue = cl.CommandQueue(ctx,properties=cl.command_queue_properties.PROFILING_ENABLE)
         except:
             print("Crash during context creation")
         Elapsed=time.time()-TimeIn
         print("Context initialisation : %.3f" % Elapsed)
         XY_gpu = cla.to_device(queue, x+1.j*y)
         TimeIn=time.time()
         XY_gpu = cla.to_device(queue, np.csingle(x+1.j*y))
         Elapsed=time.time()-TimeIn
         print("Copy from Host to Device : %.3f" % Elapsed)
         transform = FFT(ctx, queue, XY_gpu)
         TimeIn=time.time()
         transform = FFT(ctx, queue, XY_gpu)
         event, = transform.enqueue()
         event.wait()
         Elapsed=time.time()-TimeIn
         print("Compute FFT : %.3f" % Elapsed)
         TimeIn=time.time()
         XY = XY_gpu.get()
         Elapsed=time.time()-TimeIn
         print("Copy from Device to Host : %.3f" % Elapsed)
         print(XY)
         return(XY.real,XY.imag)
     # Naive Discrete Fourier Transform
-...
             m_np,n_np=NumpyFFT(a_np,b_np)
             NumpyFFTElapsed=time.time()-TimeIn
             NumpyFFTRate=int(SIZE/NumpyFFTElapsed)
             print("NumpyFFTElapsed: %i" % NumpyFFTElapsed)
             print("NumpyFFTRate: %i" % NumpyFFTRate)
             print("Precision: ",np.linalg.norm(m_np-C_np),
                   np.linalg.norm(n_np-D_np))
-...
             i_np,j_np=OpenCLFFT(a_np,b_np,Device)
             OpenCLFFTElapsed=time.time()-TimeIn
             OpenCLFFTRate=int(SIZE/OpenCLFFTElapsed)
             print("OpenCLElapsed: %i" % OpenCLFFTElapsed)
             print("OpenCLRate: %i" % OpenCLFFTRate)
             print("Precision: ",np.linalg.norm(i_np-C_np),
                   np.linalg.norm(j_np-D_np))
         if OpenCLFFTMethod and NumpyFFTMethod:
             print("NumpyOpenCLRatio: %f" % (OpenCLFFTRate/NumpyFFTRate))

     /* Simple SillySum function in C */
     /* compilation with : gcc -O3 -o MySteps_6 MySteps_6.c -lm */
     #include <math.h>
     #include <stdio.h>
     #include <stdlib.h>
     #include <sys/time.h>
     #define PI 3.141592653589793
     #define MYFLOAT float
     MYFLOAT MySillyFunction(MYFLOAT x)
+    {
         return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2));
+    }
     void MySillySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size)
+    {
       for (uint i=0; i<size;i++)
+        {
           MYFLOAT ai=a[i];
           MYFLOAT bi=b[i];
           for (int c=0;c<calls;c++)
+    	{
     	  ai=MySillyFunction(ai);
     	  bi=MySillyFunction(bi);
+    	}
           res[i] = ai + bi;
+        }
+    }
     MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size)
+    {
       MYFLOAT norm=0.;
       for (int i=0;i<size;i++)
+        {
           norm+=pow(a[i]-b[i],2);
+        }
       return(sqrt(norm));
+    }
     void MyPrint(MYFLOAT *a,int size)
+    {
       printf("[");
       for (int i=0;i<size;i++)
+        {
           printf(" %.8e ",a[i]);
+        }
       printf("]\n");
+    }
     int main(int argc,char *argv[])
+    {
       float *a,*b,*res,*resacc;
       int size=1024;
       int calls=1;
       struct timeval tv1,tv2;
       if (argc > 1) {
         size=(int)atoll(argv[1]);
         calls=(int)atoll(argv[2]);
+      }
       else {
         printf("\n\tMySteps : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\t\t#2 : calls (default 1)\n\n");
+      }
       printf("%i %i\n",size,calls);
       a=(float*)malloc(size*sizeof(MYFLOAT));
       b=(float*)malloc(size*sizeof(MYFLOAT));
       res=(float*)malloc(size*sizeof(MYFLOAT));
       resacc=(float*)malloc(size*sizeof(MYFLOAT));
       srand(110271);
       for (int i=0;i<size;i++)
+        {
           a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           res[i]=0.;
           resacc[i]=0.;
+        }
       gettimeofday(&tv1, NULL);
       MySillySum(res,a,b,calls,size);
       gettimeofday(&tv2, NULL);
       MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
     			    (tv2.tv_usec-tv1.tv_usec))/1000000;
     #ifdef VERBOSE
       MyPrint(res,size);
       MyPrint(resacc,size);
     #endif
       printf("Elapsed Time: %.3f\n",elapsed);
       printf("NaiveRate: %.lld\n",(unsigned long)((float)size/elapsed));
       free(a);
       free(b);
       free(res);
       free(resacc);
+    }

ETSN/MySteps_2.py (revision 296)
196	196	# Check on CPU with Numpy:
197	197	print(res_cl - res_np)
198	198	print(np.linalg.norm(res_cl - res_np))
199		assert np.allclose(res_np, res_cl,rtol=1e-4)
	199	assert np.allclose(res_cl, res_np,rtol=1e-4)

         Device=0
         Calls=1
         Threads=1
         Serial=True
         import getopt
         HowToUse='%s -g <CUDA/OpenCL> -s <SizeOfVector> -d <DeviceId> -c <SillyCalls> -t <Threads>'
         HowToUse='%s -n -g <CUDA/OpenCL> -s <SizeOfVector> -d <DeviceId> -c <SillyCalls> -t <Threads>'
         try:
             opts, args = getopt.getopt(sys.argv[1:],"hg:s:d:c:t:",["gpustyle=","size=","device=","calls=","threads="])
             opts, args = getopt.getopt(sys.argv[1:],"hng:s:d:c:t:",["gpustyle=","size=","device=","calls=","threads="])
         except getopt.GetoptError:
             print(HowToUse % sys.argv[0])
             sys.exit(2)
-...
                 GpuStyle = arg
             elif opt in ("-s", "--size"):
                 SIZE = int(arg)
             elif opt in ("-n"):
                 Serial = False
         print("Device Selection : %i" % Device)
         print("GpuStyle used : %s" % GpuStyle)
         print("Size of complex vector : %i" % SIZE)
         print("Number of silly calls : %i" % Calls)
         print("Number of Threads : %i" % Threads)
         print("Serial compute : %i" % Serial)
         if GpuStyle=='CUDA':
             try:
-...
         b_np = np.random.rand(SIZE).astype(np.float32)
         # Native Implementation
         TimeIn=time.time()
         res_np=NativeSillyAddition(a_np,b_np,Calls)
         NativeElapsed=time.time()-TimeIn
         NativeRate=int(SIZE/NativeElapsed)
         print("NativeRate: %i" % NativeRate)
         if Serial:
             TimeIn=time.time()
             res_np=NativeSillyAddition(a_np,b_np,Calls)
             NativeElapsed=time.time()-TimeIn
             NativeRate=int(SIZE/NativeElapsed)
             print("NativeRate: %i" % NativeRate)
         # OpenCL Implementation
         if GpuStyle=='OpenCL' or GpuStyle=='all':
-...
             OpenCLRate=int(SIZE/OpenCLElapsed)
             print("OpenCLRate: %i" % OpenCLRate)
             # Check on OpenCL with Numpy:
             print(res_cl - res_np)
             print(np.linalg.norm(res_cl - res_np))
             try:
                 assert np.allclose(res_np, res_cl)
             except:
                 print("Results between Native & OpenCL seem to be too different!")
             if Serial:
                 print(res_cl - res_np)
                 print(np.linalg.norm(res_cl - res_np))
                 try:
                     assert np.allclose(res_np, res_cl)
                 except:
                     print("Results between Native & OpenCL seem to be too different!")
             print("OpenCLvsNative ratio: %f" % (OpenCLRate/NativeRate))
                 print("OpenCLvsNative ratio: %f" % (OpenCLRate/NativeRate))
         # CUDA Implementation
         if GpuStyle=='CUDA' or GpuStyle=='all':
-...
             CUDARate=int(SIZE/CUDAElapsed)
             print("CUDARate: %i" % CUDARate)
             # Check on CUDA with Numpy:
             print(res_cuda - res_np)
             print(np.linalg.norm(res_cuda - res_np))
             try:
                 assert np.allclose(res_np, res_cuda)
             except:
                 print("Results between Native & CUDA seem to be too different!")
             if Serial:
                 print(res_cuda - res_np)
                 print(np.linalg.norm(res_cuda - res_np))
                 try:
                     assert np.allclose(res_np, res_cuda)
                 except:
                     print("Results between Native & CUDA seem to be too different!")
             print("CUDAvsNative ratio: %f" % (CUDARate/NativeRate))
                 print("CUDAvsNative ratio: %f" % (CUDARate/NativeRate))

     /* Simple Sum function in C and OpenMP/C */
     /* compilation with sequential compute : gcc -fopenmp -O3 -o MySteps_1_openmp MySteps_1_openmp.c -lm -lgomp */
     /* compilation without sequential compute : gcc -DNOSERIAL -fopenmp -O3 -o MySteps_1_openmp_1_NoSerial MySteps_1_openmp.c -lm -lgomp */
     #include <math.h>
     #include <stdio.h>
     #include <stdlib.h>
     #include <sys/time.h>
     #define PI 3.141592653589793
     #define MYFLOAT float
     void MySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b, int size)
+    {
       for (uint i=0; i<size;i++)
+        {
           res[i] = a[i] + b[i];
+        }
+    }
     void MySillySumOMP(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b, int size)
+    {
       #pragma omp parallel for
       for (uint i=0; i<size;i++)
+        {
           res[i] = a[i] + b[i];
+        }
+    }
     MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size)
+    {
       MYFLOAT norm=0.;
       for (int i=0;i<size;i++)
+        {
           norm+=pow(a[i]-b[i],2);
+        }
       return(sqrt(norm));
+    }
     void MyPrint(MYFLOAT *a,int size)
+    {
       printf("[");
       for (int i=0;i<size;i++)
+        {
           printf(" %.8e ",a[i]);
+        }
       printf("]\n");
+    }
     int main(int argc,char *argv[])
+    {
       float *a,*b,*res,*resacc;
       int size=1024;
       struct timeval tv1,tv2;
       if (argc > 1) {
         size=(int)atoll(argv[1]);
+      }
       else {
         printf("\n\tPi : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\n");
+      }
       printf("%i\n",size);
       a=(float*)malloc(size*sizeof(MYFLOAT));
       b=(float*)malloc(size*sizeof(MYFLOAT));
       res=(float*)malloc(size*sizeof(MYFLOAT));
       resacc=(float*)malloc(size*sizeof(MYFLOAT));
       srand(110271);
       for (int i=0;i<size;i++)
+        {
           a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           res[i]=0.;
           resacc[i]=0.;
+        }
     #ifndef NOSERIAL
       gettimeofday(&tv1, NULL);
       MySum(res,a,b,size);
       gettimeofday(&tv2, NULL);
     #endif
       MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
     			    (tv2.tv_usec-tv1.tv_usec))/1000000;
       gettimeofday(&tv1, NULL);
       MySillySumOMP(resacc,a,b,size);
       gettimeofday(&tv2, NULL);
       MYFLOAT elapsedAcc=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
     			       (tv2.tv_usec-tv1.tv_usec))/1000000;
     #ifndef NOSERIAL
       MYFLOAT MyChecker=MyNorm(res,resacc,size);
       printf("Norm: %.8e\n",MyChecker);
     #endif
     #ifdef VERBOSE
       MyPrint(res,size);
       MyPrint(resacc,size);
     #endif
     #ifndef NOSERIAL
       printf("Elapsed Time: %.3f\n",elapsed);
     #endif
       printf("OMP Elapsed Time: %.3f\n",elapsedAcc);
     #ifndef NOSERIAL
       printf("NaiveRate: %.lld\n",(unsigned long)((float)size/elapsed));
     #endif
       printf("OMPRate: %.lld\n",(unsigned long)((float)size/elapsedAcc));
     #ifndef NOSERIAL
       printf("AccRatio: %.3f\n",elapsed/elapsedAcc);
     #endif
       free(a);
       free(b);
       free(res);
       free(resacc);
+    }

     /* Simple SillySum function in C and OpenMP/C */
     /* compilation with sequential compute : gcc -fopenmp -O3 -o MySteps_6_openmp MySteps_6_openmp.c -lm -lgomp */
     /* compilation without sequential compute : gcc -DNOSERIAL -fopenmp -O3 -o MySteps_6_openmp_NoSerial MySteps_6_openmp.c -lm -lgomp */
     #include <math.h>
     #include <stdio.h>
     #include <stdlib.h>
     #include <sys/time.h>
     #define PI 3.141592653589793
     #define MYFLOAT float
     MYFLOAT MySillyFunction(MYFLOAT x)
+    {
         return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2));
+    }
     void MySillySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size)
+    {
       for (uint i=0; i<size;i++)
+        {
           MYFLOAT ai=a[i];
           MYFLOAT bi=b[i];
           for (int c=0;c<calls;c++)
+    	{
     	  ai=MySillyFunction(ai);
     	  bi=MySillyFunction(bi);
+    	}
           res[i] = ai + bi;
+        }
+    }
     void MySillySumOMP(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b,int calls, int size)
+    {
       #pragma omp parallel for
       for (uint i=0; i<size;i++)
+        {
           MYFLOAT ai=a[i];
           MYFLOAT bi=b[i];
           for (int c=0;c<calls;c++)
+    	{
     	  ai=MySillyFunction(ai);
     	  bi=MySillyFunction(bi);
+    	}
           res[i] = ai + bi;
+        }
+    }
     MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size)
+    {
       MYFLOAT norm=0.;
       for (int i=0;i<size;i++)
+        {
           norm+=pow(a[i]-b[i],2);
+        }
       return(sqrt(norm));
+    }
     void MyPrint(MYFLOAT *a,int size)
+    {
       printf("[");
       for (int i=0;i<size;i++)
+        {
           printf(" %.8e ",a[i]);
+        }
       printf("]\n");
+    }
     int main(int argc,char *argv[])
+    {
       float *a,*b,*res,*resacc;
       int size=1024;
       int calls=1;
       struct timeval tv1,tv2;
       if (argc > 1) {
         size=(int)atoll(argv[1]);
         calls=(int)atoll(argv[2]);
+      }
       else {
         printf("\n\tPi : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\t\t#2 : calls (default 1)\n\n");
+      }
       printf("%i %i\n",size,calls);
       a=(float*)malloc(size*sizeof(MYFLOAT));
       b=(float*)malloc(size*sizeof(MYFLOAT));
       res=(float*)malloc(size*sizeof(MYFLOAT));
       resacc=(float*)malloc(size*sizeof(MYFLOAT));
       srand(110271);
       for (int i=0;i<size;i++)
+        {
           a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           res[i]=0.;
           resacc[i]=0.;
+        }
     #ifndef NOSERIAL
       gettimeofday(&tv1, NULL);
       MySillySum(res,a,b,calls,size);
       gettimeofday(&tv2, NULL);
     #endif
       MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
     			    (tv2.tv_usec-tv1.tv_usec))/1000000;
       gettimeofday(&tv1, NULL);
       MySillySumOMP(resacc,a,b,calls,size);
       gettimeofday(&tv2, NULL);
       MYFLOAT elapsedAcc=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
     			       (tv2.tv_usec-tv1.tv_usec))/1000000;
     #ifndef NOSERIAL
       MYFLOAT MyChecker=MyNorm(res,resacc,size);
       printf("Norm: %.8e\n",MyChecker);
     #endif
     #ifdef VERBOSE
       MyPrint(res,size);
       MyPrint(resacc,size);
     #endif
     #ifndef NOSERIAL
       printf("Elapsed Time: %.3f\n",elapsed);
     #endif
       printf("OMP Elapsed Time: %.3f\n",elapsedAcc);
     #ifndef NOSERIAL
       printf("NaiveRate: %.lld\n",(unsigned long)((float)size/elapsed));
     #endif
       printf("OMPRate: %.lld\n",(unsigned long)((float)size/elapsedAcc));
     #ifndef NOSERIAL
       printf("AccRatio: %.3f\n",elapsed/elapsedAcc);
     #endif
       free(a);
       free(b);
       free(res);
       free(resacc);
+    }

     /* Simple Sum function in C and OpenACC/C */
     /* compilation with sequential compute : gcc -O3 -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35 -lm" -o MySteps_1_openacc MySteps_1_openacc.c -lm */
     /* compilation without sequential compute : gcc -DNOSERIAL -O3 -fopenacc -foffload=nvptx-none -foffload="-O3 -misa=sm_35 -lm" -o MySteps_1_openacc_NoSerial MySteps_1_openacc.c -lm */
     #include <math.h>
     #include <stdio.h>
     #include <stdlib.h>
     #include <sys/time.h>
     #define PI 3.141592653589793
     #define MYFLOAT float
     /* #pragma acc routine */
     /* MYFLOAT MySillyFunction(MYFLOAT x) */
     /* { */
     /*     return(pow(sqrt(log(exp(atanh(tanh(asinh(sinh(acosh(cosh(atan(tan(asin(sin(acos(cos(x))))))))))))))),2));  */
     /* } */
     void MySum(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b, int size)
+    {
       for (uint i=0; i<size;i++)
+        {
           res[i] = a[i] + b[i];
+        }
+    }
     void MySumOpenACC(MYFLOAT *res, MYFLOAT *a, MYFLOAT *b, int size)
+    {
       #pragma acc data copyin(a[0:size],b[0:size]),copyout(res[0:size])
       #pragma acc parallel loop
       for (uint i=0; i<size;i++)
+        {
           res[i] = a[i] + b[i];
+        }
+    }
     MYFLOAT MyNorm(MYFLOAT *a,MYFLOAT *b,int size)
+    {
       MYFLOAT norm=0.;
       for (int i=0;i<size;i++)
+        {
           norm+=pow(a[i]-b[i],2);
+        }
       return(sqrt(norm));
+    }
     void MyPrint(MYFLOAT *a,int size)
+    {
       printf("[");
       for (int i=0;i<size;i++)
+        {
           printf(" %.8e ",a[i]);
+        }
       printf("]\n");
+    }
     int main(int argc,char *argv[])
+    {
       float *a,*b,*res,*resacc;
       int size=1024;
       struct timeval tv1,tv2;
       if (argc > 1) {
         size=(int)atoll(argv[1]);
+      }
       else {
         printf("\n\tPi : Estimate SillySum\n\n\t\t#1 : size (default 1024)\n\n");
+      }
       printf("%i\n",size);
       a=(float*)malloc(size*sizeof(MYFLOAT));
       b=(float*)malloc(size*sizeof(MYFLOAT));
       res=(float*)malloc(size*sizeof(MYFLOAT));
       resacc=(float*)malloc(size*sizeof(MYFLOAT));
       srand(110271);
       for (int i=0;i<size;i++)
+        {
           a[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           b[i]=(MYFLOAT)rand()/(MYFLOAT)RAND_MAX;
           res[i]=0.;
           resacc[i]=0.;
+        }
     #ifndef NOSERIAL
       gettimeofday(&tv1, NULL);
       MySum(res,a,b,size);
       gettimeofday(&tv2, NULL);
     #endif
       MYFLOAT elapsed=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
     			    (tv2.tv_usec-tv1.tv_usec))/1000000;
       gettimeofday(&tv1, NULL);
       MySumOpenACC(resacc,a,b,size);
       gettimeofday(&tv2, NULL);
       MYFLOAT elapsedAcc=(MYFLOAT)((tv2.tv_sec-tv1.tv_sec) * 1000000L +
     			       (tv2.tv_usec-tv1.tv_usec))/1000000;
     #ifndef NOSERIAL
       MYFLOAT MyChecker=MyNorm(res,resacc,size);
       printf("Norm: %.8e\n",MyChecker);
     #endif
     #ifdef VERBOSE
       MyPrint(res,size);
       MyPrint(resacc,size);
     #endif
     #ifndef NOSERIAL
       printf("Elapsed Time: %.3f\n",elapsed);
       printf("OpenACC Elapsed Time: %.3f\n",elapsedAcc);
     #endif
     #ifndef NOSERIAL
       printf("NaiveRate: %.lld\n",(unsigned long)((float)size/elapsed));
     #endif
       printf("OpenACCRate: %.lld\n",(unsigned long)((float)size/elapsedAcc));
     #ifndef NOSERIAL
       printf("OpenACCRatio: %.3f\n",elapsed/elapsedAcc);
     #endif
       free(a);
       free(b);
       free(res);
       free(resacc);
+    }

Centre Blaise Pascal » Bench4GPU

Révision 296