/ - Diff - HPL sur GPU - Forge du Centre Blaise Pascal

     #define STDC_ARGS(p)           ()
     #endif
     #ifdef HPL_CALL_VSIPL
     #include <vsip.h>
     #endif
     #ifdef HPL_CALL_CUBLAS
     #include <cublas.h>
     #endif
     /*
      * ---------------------------------------------------------------------
      * #define macro constants

     #ifndef HPL_CALL_VSIPL          /* Call the Fortran 77 BLAS interface */
     #ifndef HPL_CALL_CBLAS                       /* there can be only one */
     #ifndef HPL_CALL_CUBLAS
     #define HPL_CALL_FBLAS
     #endif
     #endif
     #endif
     /*
      * ---------------------------------------------------------------------
      * Include files

     #endif
        F77dcopy( &F77N, X, &F77incx, Y, &F77incy );
     #endif
     #ifdef HPL_CALL_CUBLAS
     #define CUBLASN                 N
     #define CUBLASincx              INCX
     #define CUBLASincy              INCY
        CUBLAS_DCOPY( &CUBLASN, X, &CUBLASincx, Y, &CUBLASincy );
     #endif
     /*
      * End of HPL_dcopy
      */

     #endif
        F77dswap( &F77N, X, &F77incx, Y, &F77incy );
     #endif
     #ifdef HPL_CALL_CUBLAS
     #define CUBLASN                 N
     #define CUBLASincx              INCX
     #define CUBLASincy              INCY
        CUBLAS_DSWAP( &CUBLASN, X, &CUBLASincx, Y, &CUBLASincy );
     #endif
     /*
      * End of HPL_dswap
      */

+          }
+       }
     #endif
     #ifdef HPL_CALL_FBLAS
        double                    alpha = ALPHA;
     #ifdef HPL_USE_F77_INTEGER_DEF
-...
        else
        {  F77dger( &F77N, &F77M, &alpha, Y, &F77incy, X, &F77incx, A, &F77lda ); }
     #endif
     #ifdef HPL_CALL_CUBLAS
        double                    alpha = ALPHA;
     #define CUBLASM                 M
     #define CUBLASN                 N
     #define CUBLASlda               LDA
     #define CUBLASincx              INCX
     #define CUBLASincy              INCY
        if( ORDER == HplColumnMajor )
+       {
          CUBLAS_DGER( &CUBLASM, &CUBLASN, &alpha,
     		  X, &CUBLASincx, Y, &CUBLASincy, A, &CUBLASlda );
+       }
        else
+       {
          CUBLAS_DGER( &CUBLASN, &CUBLASM, &alpha,
     		  Y, &CUBLASincy, X, &CUBLASincx, A, &CUBLASlda ); }
     #endif
     /*
      * End of HPL_dger
      */

                       N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
+       }
     #endif
     #ifdef HPL_CALL_FBLAS
        double                    alpha = ALPHA, beta = BETA;
     #ifdef StringSunStyle
-...
+       }
     #endif
     #ifdef HPL_CALL_CUBLAS
        double                    alpha = ALPHA, beta = BETA;
        int                       IONE = 1;
     #define CUBLASM                 M
     #define CUBLASN                 N
     #define CUBLASlda               LDA
     #define CUBLASincx              INCX
     #define CUBLASincy              INCY
        char                      ctran;
        if( ORDER == HplColumnMajor )
+       {
           ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
           CUBLAS_DGEMV( &ctran, &CUBLASM, &CUBLASN,
     		    &alpha, A, &CUBLASlda, X, &CUBLASincx,
     		    &beta, Y, &CUBLASincy, IONE );
+       }
        else
+       {
           ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
           CUBLAS_DGEMV( &ctran, &CUBLASN, &CUBLASM,
     		    &alpha, A, &CUBLASlda, X, &CUBLASincx,
     		    &beta, Y, &CUBLASincy, IONE );
+       }
     #endif
     /*
      * End of HPL_dgemv
      */

                       DIAG, N, A, LDA, X, INCX );
+       }
     #endif
     #ifdef HPL_CALL_FBLAS
     #ifdef StringSunStyle
     #ifdef HPL_USE_F77_INTEGER_DEF
-...
     #endif
     #endif
     #ifdef HPL_CALL_CUBLAS
        int                       IONE = 1;
     #define CUBLASN              N
     #define CUBLASlda            LDA
     #define CUBLASincx           INCX
        char                      cuplo, ctran, cdiag;
        if( ORDER == HplColumnMajor )
+       {
           cuplo = ( UPLO  == HplUpper   ? 'U' : 'L' );
           ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
+       }
        else
+       {
           cuplo = ( UPLO  == HplUpper   ? 'L' : 'U' );
           ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
+       }
        cdiag = ( DIAG == HplNonUnit ? 'N' : 'U' );
        CUBLAS_DTRSV( &cuplo, &ctran, &cdiag, &CUBLASN,
     		 A, &CUBLASlda, X, &CUBLASincx, IONE, IONE, IONE );
     #endif
     /*
      * End of HPL_dtrsv
      */

        if( N > 0 ) imax = F77idamax( &F77N, X, &F77incx ) - 1;
        return( imax );
     #endif
     #ifdef HPL_CALL_CUBLAS
     #define CUBLASN                 N
     #define CUBLASincx              INCX
        int                       imax = 0;
        if( N > 0 ) imax = CUBLAS_IDAMAX( &CUBLASN, X, &CUBLASincx ) - 1;
        return( imax );
     #endif
     /*
      * End of HPL_idamax
      */

+          }
+       }
     #endif
     #ifdef HPL_CALL_FBLAS
        double                    alpha = ALPHA;
     #ifdef HPL_USE_F77_INTEGER_DEF
-...
     #endif
        F77daxpy( &F77N, &alpha, X, &F77incx, Y, &F77incy );
     #endif
     #ifdef HPL_CALL_CUBLAS
        double                    alpha = ALPHA;
     #define CUBLASN                 N
     #define CUBLASincx              INCX
     #define CUBLASincy              INCY
        CUBLAS_DAXPY( &CUBLASN, &alpha, X, &CUBLASincx, Y, &CUBLASincy );
     #endif
     /*
      * End of HPL_daxpy
      */

+          }
+       }
     #endif
     #ifdef HPL_CALL_FBLAS
        double                    alpha = ALPHA;
     #ifdef HPL_USE_F77_INTEGER_DEF
-...
        F77dscal( &F77N, &alpha, X, &F77incx );
     #endif
     #ifdef HPL_CALL_CUBLAS
        double                    alpha = ALPHA;
     #define CUBLASN                 N
     #define CUBLASincx              INCX
        CUBLAS_DSCAL( &CUBLASN, &alpha, X, &CUBLASincx );
     #endif
     /*
      * End of HPL_dscal
      */

     #endif
+       }
     #endif
     #ifdef HPL_CALL_CUBLAS
        double                    alpha = ALPHA, beta = BETA;
        int                       IONE = 1;
     #define CUBLASM                 M
     #define CUBLASN                 N
     #define CUBLASK                 K
     #define CUBLASlda               LDA
     #define CUBLASldb               LDB
     #define CUBLASldc               LDC
        char                      ctransa, ctransb;
        if(      TRANSA == HplNoTrans ) ctransa = 'N';
        else if( TRANSA == HplTrans   ) ctransa = 'T';
        else                            ctransa = 'C';
        if(      TRANSB == HplNoTrans ) ctransb = 'N';
        else if( TRANSB == HplTrans   ) ctransb = 'T';
        else                            ctransb = 'C';
        if( ORDER == HplColumnMajor )
+       {
           CUBLAS_DGEMM( &ctransa, &ctransb, &CUBLASM, &CUBLASN, &CUBLASK,
     		    &alpha, A, &CUBLASlda, B, &CUBLASldb, &beta, C, &CUBLASldc,
     		    &IONE, &IONE );
+       }
        else
+       {
           CUBLAS_DGEMM( &ctransb, &ctransa, &CUBLASN, &CUBLASM, &CUBLASK,
     		    &alpha, B, &CUBLASldb, A, &CUBLASlda, &beta, C, &CUBLASldc,
     		    &IONE, &IONE );
+       }
     #endif
     /*
      * End of HPL_dgemm
      */

                       TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB );
+       }
     #endif
     #ifdef HPL_CALL_FBLAS
        double                    alpha = ALPHA;
     #ifdef StringSunStyle
-...
     #endif
+       }
     #endif
     #ifdef HPL_CALL_CUBLAS
        double                    alpha = ALPHA;
        int                       IONE = 1;
     #define  CUBLASM                M
     #define  CUBLASN                N
     #define  CUBLASlda              LDA
     #define  CUBLASldb              LDB
        char                      cside, cuplo, ctran, cdiag;
        if(      TRANS == HplNoTrans ) ctran = 'N';
        else if( TRANS == HplTrans   ) ctran = 'T';
        else                           ctran = 'C';
        cdiag = ( DIAG == HplUnit  ? 'U' : 'N' );
        if( ORDER == HplColumnMajor )
+       {
           cside = ( SIDE == HplRight ? 'R' : 'L' );
           cuplo = ( UPLO == HplLower ? 'L' : 'U' );
           CUBLAS_DTRSM( &cside, &cuplo, &ctran, &cdiag, &CUBLASM, &CUBLASN, &alpha,
     		    A, &CUBLASlda, B, &CUBLASldb, &IONE, &IONE, &IONE, &IONE );
+       }
        else
+       {
           cside = ( SIDE == HplRight ? 'L' : 'R' );
           cuplo = ( UPLO == HplLower ? 'U' : 'L' );
           CUBLAS_DTRSM( &cside, &cuplo, &ctran, &cdiag, &CUBLASN, &CUBLASM, &alpha,
     		    A, &CUBLASlda, B, &CUBLASldb, &IONE, &IONE, &IONE, &IONE );
+       }
     #endif
     /*
      * End of HPL_dtrsm
      */

setup/Make.Debian_CBLAS_OpenMPI (revision 9)
82	82	# used. The variable MPdir is only used for defining MPinc and MPlib.
83	83	#
84	84	MPdir = /usr/lib/openmpi
85		MPinc = -I$(MPdir)/include
	85	MPinc = -I/include/include/openmpi
86	86	MPlib = -L$(MPdir)/lib/libmpi.so
87	87	#
88	88	# ----------------------------------------------------------------------

+    #
     #  -- High Performance Computing Linpack Benchmark (HPL)
     #     HPL - 2.0 - September 10, 2008
     #     Antoine P. Petitet
     #     University of Tennessee, Knoxville
     #     Innovative Computing Laboratory
     #     (C) Copyright 2000-2008 All Rights Reserved
+    #
     #  -- Copyright notice and Licensing terms:
+    #
     #  Redistribution  and  use in  source and binary forms, with or without
     #  modification, are  permitted provided  that the following  conditions
     #  are met:
+    #
     #  1. Redistributions  of  source  code  must retain the above copyright
     #  notice, this list of conditions and the following disclaimer.
+    #
     #  2. Redistributions in binary form must reproduce  the above copyright
     #  notice, this list of conditions,  and the following disclaimer in the
     #  documentation and/or other materials provided with the distribution.
+    #
     #  3. All  advertising  materials  mentioning  features  or  use of this
     #  software must display the following acknowledgement:
     #  This  product  includes  software  developed  at  the  University  of
     #  Tennessee, Knoxville, Innovative Computing Laboratory.
+    #
     #  4. The name of the  University,  the name of the  Laboratory,  or the
     #  names  of  its  contributors  may  not  be used to endorse or promote
     #  products  derived   from   this  software  without  specific  written
     #  permission.
+    #
     #  -- Disclaimer:
+    #
     #  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     #  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
     #  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     #  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
     #  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
     #  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
     #  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     #  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
     #  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
     #  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     #  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     # ######################################################################
+    #
     # ----------------------------------------------------------------------
     # - shell --------------------------------------------------------------
     # ----------------------------------------------------------------------
+    #
     SHELL        = /bin/sh
+    #
     CD           = cd
     CP           = cp
     LN_S         = ln -s
     MKDIR        = mkdir
     RM           = /bin/rm -f
     TOUCH        = touch
+    #
     # ----------------------------------------------------------------------
     # - Platform identifier ------------------------------------------------
     # ----------------------------------------------------------------------
+    #
     ARCH         = $(arch)
+    #
     # ----------------------------------------------------------------------
     # - HPL Directory Structure / HPL library ------------------------------
     # ----------------------------------------------------------------------
+    #
     TOPdir       = $(topdir)
     INCdir       = $(TOPdir)/include
     BINdir       = $(TOPdir)/bin/$(ARCH)
     LIBdir       = $(TOPdir)/lib/$(ARCH)
+    #
     HPLlib       = $(LIBdir)/libhpl.a
+    #
     # ----------------------------------------------------------------------
     # - Message Passing library (MPI) --------------------------------------
     # ----------------------------------------------------------------------
     # MPinc tells the  C  compiler where to find the Message Passing library
     # header files,  MPlib  is defined  to be the name of  the library to be
     # used. The variable MPdir is only used for defining MPinc and MPlib.
+    #
     MPdir        = /usr/lib/openmpi
     MPinc        = -I/include/include/openmpi
     MPlib        = -L$(MPdir)/lib/libmpi.so
+    #
     # ----------------------------------------------------------------------
     # - Linear Algebra library (BLAS or VSIPL) -----------------------------
     # ----------------------------------------------------------------------
     # LAinc tells the  C  compiler where to find the Linear Algebra  library
     # header files,  LAlib  is defined  to be the name of  the library to be
     # used. The variable LAdir is only used for defining LAinc and LAlib.
+    #
     LAdir        = /opt/cuda/lib64
     LAinc        = -I/opt/cuda/include
     LAlib        = $(LAdir)/libcublas.so
+    #
     # ----------------------------------------------------------------------
     # - F77 / C interface --------------------------------------------------
     # ----------------------------------------------------------------------
     # You can skip this section  if and only if  you are not planning to use
     # a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
     # necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
     # options.  **One and only one**  option should be chosen in **each** of
     # the 3 following categories:
+    #
     # 1) name space (How C calls a Fortran 77 routine)
+    #
     # -DAdd_              : all lower case and a suffixed underscore  (Suns,
     #                       Intel, ...),                           [default]
     # -DNoChange          : all lower case (IBM RS6000),
     # -DUpCase            : all upper case (Cray),
     # -DAdd__             : the FORTRAN compiler in use is f2c.
+    #
     # 2) C and Fortran 77 integer mapping
+    #
     # -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
     # -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
     # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
+    #
     # 3) Fortran 77 string handling
+    #
     # -DStringSunStyle    : The string address is passed at the string loca-
     #                       tion on the stack, and the string length is then
     #                       passed as  an  F77_INTEGER  after  all  explicit
     #                       stack arguments,                       [default]
     # -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
     #                       Fortran 77  string,  and the structure is of the
     #                       form: struct {char *cp; F77_INTEGER len;},
     # -DStringStructVal   : A structure is passed by value for each  Fortran
     #                       77 string,  and  the  structure is  of the form:
     #                       struct {char *cp; F77_INTEGER len;},
     # -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
     #                       Cray  fcd  (fortran  character  descriptor)  for
     #                       interoperation.
+    #
     F2CDEFS      =
+    #
     # ----------------------------------------------------------------------
     # - HPL includes / libraries / specifics -------------------------------
     # ----------------------------------------------------------------------
+    #
     HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
     HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
+    #
     # - Compile time options -----------------------------------------------
+    #
     # -DHPL_COPY_L           force the copy of the panel L before bcast;
     # -DHPL_CALL_CBLAS       call the cblas interface;
     # -DHPL_CALL_VSIPL       call the vsip  library;
     # -DHPL_CALL_CUBLAS       call the CuBLAS library from Nvidia Cuda;
     # -DHPL_DETAILED_TIMING  enable detailed timers;
+    #
     # By default HPL will:
     #    *) not copy L before broadcast,
     #    *) call the BLAS Fortran 77 interface,
     #    *) not display detailed timing information.
+    #
     HPL_OPTS     = -DHPL_CALL_CUBLAS
+    #
     # ----------------------------------------------------------------------
+    #
     HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
+    #
     # ----------------------------------------------------------------------
     # - Compilers / linkers - Optimization flags ---------------------------
     # ----------------------------------------------------------------------
+    #
     CC           = /usr/bin/mpicc.openmpi
     CCNOOPT      = $(HPL_DEFS)
     CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
+    #
     # On some platforms,  it is necessary  to use the Fortran linker to find
     # the Fortran internals used in the BLAS library.
+    #
     LINKER       = /usr/bin/mpicc.openmpi
     # The object fortran_thunking.o has to be compiled in root HPL folder
     LINKFLAGS    = $(CCFLAGS) $(TOPdir)/fortran_thunking.o
+    #
     ARCHIVER     = ar
     ARFLAGS      = r
     RANLIB       = echo
+    #
     # ----------------------------------------------------------------------

     # CUBLAS script to build HPL using CUBLAS Thunking
+    #
     # 2010-11-26 : Initial release
     # Emmanuel Quemener <emmanuel.quemener@ens-lyon.fr>
+    #
     # Copy this script on HPL root folder and execute
     # Define root folder for CUDA toolkit (default dir is /usr/local/cuda)
     CUDASRC=/opt/cuda
     # Clean all objects files
     find . -name "*.o" -exec rm {} \;
     # Copy of source for thunking CUBLAS approach
     cp $CUDASRC/src/fortran_thunking.c $CUDASRC/src/fortran_thunking.h $CUDASRC/src/fortran_common.h .
     # Patch Thunking prototypes to compile on Debian Lenny
     patch fortran_common.h patch_thunking.h
     # patching file fortran_common.h
     gcc -funroll-loops -W  -I$CUDASRC/include -Wall -O3 -c fortran_thunking.c
     # Clean old builds
     make arch=Debian_CUBLAS_OpenMPI clean_all_arch
     # Compile HPL
     make arch=Debian_CUBLAS_OpenMPI

addons/patch_thunking.h (revision 9)
	1	41c41
	2	< #define CUBLAS_FORTRAN_COMPILER CUBLAS_G95
	3	---
	4	> #define CUBLAS_FORTRAN_COMPILER CUBLAS_INTEL_FORTRAN

     Innovative Computing Laboratory, University of Tennessee
     HPL.out      output file name (if any)
 device out (6=stdout,7=stderr,file)
 # of problems sizes (N)
 2048    Ns
 # of NBs
 512      NBs
 # of problems sizes (N)
 Ns
 # of NBs
 NBs
 PMAP process mapping (0=Row-,1=Column-major)
 # of process grids (P x Q)
 2        Ps
 1        Qs
 # of process grids (P x Q)
 Ps
 Qs
 .0         threshold
 # of panel fact
 1 2        PFACTs (0=left, 1=Crout, 2=Right)

Centre Blaise Pascal » HPL sur GPU

Révision 9