/src/blas/HPL_dgemm.c - HPL sur GPU - Forge du Centre Blaise Pascal

root / src / blas / HPL_dgemm.c

Historique | Voir | Annoter | Télécharger (21,2 ko)

       /*
        * -- High Performance Computing Linpack Benchmark (HPL)
        *    HPL - 2.0 - September 10, 2008
        *    Antoine P. Petitet
        *    University of Tennessee, Knoxville
        *    Innovative Computing Laboratory
        *    (C) Copyright 2000-2008 All Rights Reserved
+       *
        * -- Copyright notice and Licensing terms:
+       *
        * Redistribution  and  use in  source and binary forms, with or without
        * modification, are  permitted provided  that the following  conditions
        * are met:
+       *
        * 1. Redistributions  of  source  code  must retain the above copyright
        * notice, this list of conditions and the following disclaimer.
+       *
        * 2. Redistributions in binary form must reproduce  the above copyright
        * notice, this list of conditions,  and the following disclaimer in the
        * documentation and/or other materials provided with the distribution.
+       *
        * 3. All  advertising  materials  mentioning  features  or  use of this
        * software must display the following acknowledgement:
        * This  product  includes  software  developed  at  the  University  of
        * Tennessee, Knoxville, Innovative Computing Laboratory.
+       *
        * 4. The name of the  University,  the name of the  Laboratory,  or the
        * names  of  its  contributors  may  not  be used to endorse or promote
        * products  derived   from   this  software  without  specific  written
        * permission.
+       *
        * -- Disclaimer:
+       *
        * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
        * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
        * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
        * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
        * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
        * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
        * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
        * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
        * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
        * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
        * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        * ---------------------------------------------------------------------
        */
       /*
        * Include files
        */
       #include "hpl.h"
       #ifndef HPL_dgemm
       #ifdef HPL_CALL_VSIPL
       #ifdef STDC_HEADERS
       static void HPL_dgemmNN
+      (
          const int                  M,
          const int                  N,
          const int                  K,
          const double               ALPHA,
          const double               * A,
          const int                  LDA,
          const double               * B,
          const int                  LDB,
          const double               BETA,
          double                     * C,
          const int                  LDC
+      )
       #else
       static void HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
          const int                  K, LDA, LDB, LDC, M, N;
          const double               ALPHA, BETA;
          const double               * A, * B;
          double                     * C;
       #endif
+      {
          register double            t0;
          int                        i, iail, iblj, icij, j, jal, jbj, jcj, l;
          for( j = 0, jbj = 0, jcj  = 0; j < N; j++, jbj += LDB, jcj += LDC )
+         {
             HPL_dscal( M, BETA, C+jcj, 1 );
             for( l = 0, jal = 0, iblj = jbj; l < K; l++, jal += LDA, iblj += 1 )
+            {
                t0 = ALPHA * B[iblj];
                for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 )
                { C[icij] += A[iail] * t0; }
+            }
+         }
+      }
       #ifdef STDC_HEADERS
       static void HPL_dgemmNT
+      (
          const int                  M,
          const int                  N,
          const int                  K,
          const double               ALPHA,
          const double               * A,
          const int                  LDA,
          const double               * B,
          const int                  LDB,
          const double               BETA,
          double                     * C,
          const int                  LDC
+      )
       #else
       static void HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
          const int                  K, LDA, LDB, LDC, M, N;
          const double               ALPHA, BETA;
          const double               * A, * B;
          double                     * C;
       #endif
+      {
          register double            t0;
          int                        i, iail, ibj, ibjl, icij, j, jal, jcj, l;
          for( j = 0, ibj  = 0, jcj  = 0; j < N; j++, ibj += 1, jcj += LDC )
+         {
             HPL_dscal( M, BETA, C+jcj, 1 );
             for( l = 0, jal = 0, ibjl = ibj; l < K; l++, jal += LDA, ibjl += LDB )
+            {
                t0 = ALPHA * B[ibjl];
                for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 )
                { C[icij] += A[iail] * t0; }
+            }
+         }
+      }
       #ifdef STDC_HEADERS
       static void HPL_dgemmTN
+      (
          const int                  M,
          const int                  N,
          const int                  K,
          const double               ALPHA,
          const double               * A,
          const int                  LDA,
          const double               * B,
          const int                  LDB,
          const double               BETA,
          double                     * C,
          const int                  LDC
+      )
       #else
       static void HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
          const int                  K, LDA, LDB, LDC, M, N;
          const double               ALPHA, BETA;
          const double               * A, * B;
          double                     * C;
       #endif
+      {
          register double            t0;
          int                        i, iai, iail, iblj, icij, j, jbj, jcj, l;
          for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC )
+         {
             for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA )
+            {
                t0 = HPL_rzero;
                for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 )
                { t0 += A[iail] * B[iblj]; }
                if( BETA == HPL_rzero ) C[icij]  = HPL_rzero;
                else                    C[icij] *= BETA;
                C[icij] += ALPHA * t0;
+            }
+         }
+      }
       #ifdef STDC_HEADERS
       static void HPL_dgemmTT
+      (
          const int                  M,
          const int                  N,
          const int                  K,
          const double               ALPHA,
          const double               * A,
          const int                  LDA,
          const double               * B,
          const int                  LDB,
          const double               BETA,
          double                     * C,
          const int                  LDC
+      )
       #else
       static void HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
          const int                  K, LDA, LDB, LDC, M, N;
          const double               ALPHA, BETA;
          const double               * A, * B;
          double                     * C;
       #endif
+      {
          register double            t0;
          int                        i, iali, ibj, ibjl, icij, j, jai, jcj, l;
          for( j = 0, ibj = 0, jcj  = 0; j < N; j++, ibj += 1, jcj += LDC )
+         {
             for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 1, jai += LDA )
+            {
                t0 = HPL_rzero;
                for( l = 0,      iali  = jai, ibjl  = ibj;
                     l < K; l++, iali += 1,   ibjl += LDB ) t0 += A[iali] * B[ibjl];
                if( BETA == HPL_rzero ) C[icij]  = HPL_rzero;
                else                    C[icij] *= BETA;
                C[icij] += ALPHA * t0;
+            }
+         }
+      }
       #ifdef STDC_HEADERS
       static void HPL_dgemm0
+      (
          const enum HPL_TRANS       TRANSA,
          const enum HPL_TRANS       TRANSB,
          const int                  M,
          const int                  N,
          const int                  K,
          const double               ALPHA,
          const double               * A,
          const int                  LDA,
          const double               * B,
          const int                  LDB,
          const double               BETA,
          double                     * C,
          const int                  LDC
+      )
       #else
       static void HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
                               BETA, C, LDC )
          const enum HPL_TRANS       TRANSA, TRANSB;
          const int                  K, LDA, LDB, LDC, M, N;
          const double               ALPHA, BETA;
          const double               * A, * B;
          double                     * C;
       #endif
+      {
          int                        i, j;
          if( ( M == 0 ) || ( N == 0 ) ||
              ( ( ( ALPHA == HPL_rzero ) || ( K == 0 ) ) &&
                ( BETA == HPL_rone ) ) ) return;
          if( ALPHA == HPL_rzero )
+         {
             for( j = 0; j < N; j++ )
             {  for( i = 0; i < M; i++ ) *(C+i+j*LDC) = HPL_rzero; }
             return;
+         }
          if( TRANSB == HplNoTrans )
+         {
             if( TRANSA == HplNoTrans )
             { HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
             else
             { HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+         }
          else
+         {
             if( TRANSA == HplNoTrans )
             { HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
             else
             { HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
+         }
+      }
       #endif
       #ifdef STDC_HEADERS
       void HPL_dgemm
+      (
          const enum HPL_ORDER             ORDER,
          const enum HPL_TRANS             TRANSA,
          const enum HPL_TRANS             TRANSB,
          const int                        M,
          const int                        N,
          const int                        K,
          const double                     ALPHA,
          const double *                   A,
          const int                        LDA,
          const double *                   B,
          const int                        LDB,
          const double                     BETA,
          double *                         C,
          const int                        LDC
+      )
       #else
       void HPL_dgemm
       ( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
          const enum HPL_ORDER             ORDER;
          const enum HPL_TRANS             TRANSA;
          const enum HPL_TRANS             TRANSB;
          const int                        M;
          const int                        N;
          const int                        K;
          const double                     ALPHA;
          const double *                   A;
          const int                        LDA;
          const double *                   B;
          const int                        LDB;
          const double                     BETA;
          double *                         C;
          const int                        LDC;
       #endif
+      {
       /*
        * Purpose
        * =======
+       *
        * HPL_dgemm performs one of the matrix-matrix operations
+       *
        *     C := alpha * op( A ) * op( B ) + beta * C
+       *
        *  where op( X ) is one of
+       *
        *     op( X ) = X   or   op( X ) = X^T.
+       *
        * Alpha and beta are scalars,  and A,  B and C are matrices, with op(A)
        * an m by k matrix, op(B) a k by n matrix and  C an m by n matrix.
+       *
        * Arguments
        * =========
+       *
        * ORDER   (local input)                 const enum HPL_ORDER
        *         On entry, ORDER  specifies the storage format of the operands
        *         as follows:
        *            ORDER = HplRowMajor,
        *            ORDER = HplColumnMajor.
+       *
        * TRANSA  (local input)                 const enum HPL_TRANS
        *         On entry, TRANSA  specifies the form of  op(A)  to be used in
        *         the matrix-matrix operation follows:
        *            TRANSA==HplNoTrans    : op( A ) = A,
        *            TRANSA==HplTrans      : op( A ) = A^T,
        *            TRANSA==HplConjTrans  : op( A ) = A^T.
+       *
        * TRANSB  (local input)                 const enum HPL_TRANS
        *         On entry, TRANSB  specifies the form of  op(B)  to be used in
        *         the matrix-matrix operation follows:
        *            TRANSB==HplNoTrans    : op( B ) = B,
        *            TRANSB==HplTrans      : op( B ) = B^T,
        *            TRANSB==HplConjTrans  : op( B ) = B^T.
+       *
        * M       (local input)                 const int
        *         On entry,  M  specifies  the  number  of rows  of the  matrix
        *         op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
+       *
        * N       (local input)                 const int
        *         On entry,  N  specifies  the number  of columns of the matrix
        *         op(B)  and  the number of columns of the matrix  C. N must be
        *         at least zero.
+       *
        * K       (local input)                 const int
        *         On entry,  K  specifies  the  number of columns of the matrix
        *         op(A) and the number of rows of the matrix op(B).  K  must be
        *         be at least  zero.
+       *
        * ALPHA   (local input)                 const double
        *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
        *         supplied  as  zero  then the elements of the matrices A and B
        *         need not be set on input.
+       *
        * A       (local input)                 const double *
        *         On entry,  A  is an array of dimension (LDA,ka),  where ka is
        *         k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
        *         entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
        *         the array  A must contain the matrix A, otherwise the leading
        *         k  by  m  part of the array  A  must  contain the  matrix  A.
+       *
        * LDA     (local input)                 const int
        *         On entry, LDA  specifies the first dimension of A as declared
        *         in the  calling (sub) program. When  TRANSA==HplNoTrans  then
        *         LDA must be at least max(1,m), otherwise LDA must be at least
        *         max(1,k).
+       *
        * B       (local input)                 const double *
        *         On entry, B is an array of dimension (LDB,kb),  where  kb  is
        *         n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
        *         entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
        *         the array  B must contain the matrix B, otherwise the leading
        *         n  by  k  part of the array  B  must  contain  the matrix  B.
+       *
        * LDB     (local input)                 const int
        *         On entry, LDB  specifies the first dimension of B as declared
        *         in the  calling (sub) program. When  TRANSB==HplNoTrans  then
        *         LDB must be at least max(1,k), otherwise LDB must be at least
        *         max(1,n).
+       *
        * BETA    (local input)                 const double
        *         On entry,  BETA  specifies the scalar  beta.   When  BETA  is
        *         supplied  as  zero  then  the  elements of the matrix C  need
        *         not be set on input.
+       *
        * C       (local input/output)          double *
        *         On entry,  C  is an array of dimension (LDC,n). Before entry,
        *         the  leading m by n part  of  the  array  C  must contain the
        *         matrix C,  except when beta is zero, in which case C need not
        *         be set on entry. On exit, the array  C  is overwritten by the
        *         m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
+       *
        * LDC     (local input)                 const int
        *         On entry, LDC  specifies the first dimension of C as declared
        *         in  the   calling  (sub)  program.   LDC  must  be  at  least
        *         max(1,m).
+       *
        * ---------------------------------------------------------------------
        */
       #ifdef HPL_CALL_CBLAS
          cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
                       BETA, C, LDC );
       #endif
       #ifdef HPL_CALL_GSLCBLAS
          cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
                       BETA, C, LDC );
       #endif
       #ifdef HPL_CALL_VSIPL
          if( ORDER == HplColumnMajor )
+         {
             HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA,
                         C, LDC );
+         }
          else
+         {
             HPL_dgemm0( TRANSB, TRANSA, N, M, K, ALPHA, B, LDB, A, LDA, BETA,
                         C, LDC );
+         }
       #endif
       #ifdef HPL_CALL_FBLAS
          double                    alpha = ALPHA, beta = BETA;
       #ifdef StringSunStyle
       #ifdef HPL_USE_F77_INTEGER_DEF
          F77_INTEGER               IONE = 1;
       #else
          int                       IONE = 1;
       #endif
       #endif
       #ifdef StringStructVal
          F77_CHAR                  ftransa;
          F77_CHAR                  ftransb;
       #endif
       #ifdef StringStructPtr
          F77_CHAR                  ftransa;
          F77_CHAR                  ftransb;
       #endif
       #ifdef StringCrayStyle
          F77_CHAR                  ftransa;
          F77_CHAR                  ftransb;
       #endif
       #ifdef HPL_USE_F77_INTEGER_DEF
          const F77_INTEGER         F77M   = M,   F77N   = N,   F77K = K,
                                    F77lda = LDA, F77ldb = LDB, F77ldc = LDC;
       #else
       #define F77M                 M
       #define F77N                 N
       #define F77K                 K
       #define F77lda               LDA
       #define F77ldb               LDB
       #define F77ldc               LDC
       #endif
          char                      ctransa, ctransb;
          if(      TRANSA == HplNoTrans ) ctransa = 'N';
          else if( TRANSA == HplTrans   ) ctransa = 'T';
          else                            ctransa = 'C';
          if(      TRANSB == HplNoTrans ) ctransb = 'N';
          else if( TRANSB == HplTrans   ) ctransb = 'T';
          else                            ctransb = 'C';
          if( ORDER == HplColumnMajor )
+         {
       #ifdef StringSunStyle
             F77dgemm( &ctransa, &ctransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda,
                       B, &F77ldb, &beta, C, &F77ldc, IONE, IONE );
       #endif
       #ifdef StringCrayStyle
             ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb );
             F77dgemm( ftransa,  ftransb,  &F77M, &F77N, &F77K, &alpha, A, &F77lda,
                       B, &F77ldb, &beta, C, &F77ldc );
       #endif
       #ifdef StringStructVal
             ftransa.len = 1; ftransa.cp = &ctransa;
             ftransb.len = 1; ftransb.cp = &ctransb;
             F77dgemm( ftransa,  ftransb,  &F77M, &F77N, &F77K, &alpha, A, &F77lda,
                       B, &F77ldb, &beta, C, &F77ldc );
       #endif
       #ifdef StringStructPtr
             ftransa.len = 1; ftransa.cp = &ctransa;
             ftransb.len = 1; ftransb.cp = &ctransb;
             F77dgemm( &ftransa, &ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda,
                       B, &F77ldb, &beta, C, &F77ldc );
       #endif
+         }
          else
+         {
       #ifdef StringSunStyle
             F77dgemm( &ctransb, &ctransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
                       A, &F77lda, &beta, C, &F77ldc, IONE, IONE );
       #endif
       #ifdef StringCrayStyle
             ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb );
             F77dgemm( ftransb,  ftransa,  &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
                       A, &F77lda, &beta, C, &F77ldc );
       #endif
       #ifdef StringStructVal
             ftransa.len = 1; ftransa.cp = &ctransa;
             ftransb.len = 1; ftransb.cp = &ctransb;
             F77dgemm( ftransb,  ftransa,  &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
                       A, &F77lda, &beta, C, &F77ldc );
       #endif
       #ifdef StringStructPtr
             ftransa.len = 1; ftransa.cp = &ctransa;
             ftransb.len = 1; ftransb.cp = &ctransb;
             F77dgemm( &ftransb, &ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
                       A, &F77lda, &beta, C, &F77ldc );
       #endif
+         }
       #endif
       #ifdef HPL_CALL_CUBLAS
          double                    alpha = ALPHA, beta = BETA;
          int                       IONE = 1;
       #define CUBLASM                 M
       #define CUBLASN                 N
       #define CUBLASK                 K
       #define CUBLASlda               LDA
       #define CUBLASldb               LDB
       #define CUBLASldc               LDC
          char                      ctransa, ctransb;
          if(      TRANSA == HplNoTrans ) ctransa = 'N';
          else if( TRANSA == HplTrans   ) ctransa = 'T';
          else                            ctransa = 'C';
          if(      TRANSB == HplNoTrans ) ctransb = 'N';
          else if( TRANSB == HplTrans   ) ctransb = 'T';
          else                            ctransb = 'C';
          if( ORDER == HplColumnMajor )
+         {
             CUBLAS_DGEMM( &ctransa, &ctransb, &CUBLASM, &CUBLASN, &CUBLASK,
                           &alpha, A, &CUBLASlda, B, &CUBLASldb, &beta, C, &CUBLASldc,
                           &IONE, &IONE );
+         }
          else
+         {
             CUBLAS_DGEMM( &ctransb, &ctransa, &CUBLASN, &CUBLASM, &CUBLASK,
                           &alpha, B, &CUBLASldb, A, &CUBLASlda, &beta, C, &CUBLASldc,
                           &IONE, &IONE );
+         }
       #endif
       #ifdef HPL_CALL_ACML
          double                    alpha = ALPHA, beta = BETA;
          int                       IONE = 1;
       #define ACMLM                 M
       #define ACMLN                 N
       #define ACMLK                 K
       #define ACMLlda               LDA
       #define ACMLldb               LDB
       #define ACMLldc               LDC
          char                      ctransa, ctransb;
          if(      TRANSA == HplNoTrans ) ctransa = 'N';
          else if( TRANSA == HplTrans   ) ctransa = 'T';
          else                            ctransa = 'C';
          if(      TRANSB == HplNoTrans ) ctransb = 'N';
          else if( TRANSB == HplTrans   ) ctransb = 'T';
          else                            ctransb = 'C';
          if( ORDER == HplColumnMajor )
+         {
             dgemm_( &ctransa, &ctransb, &ACMLM, &ACMLN, &ACMLK,
                     &alpha, A, &ACMLlda, B, &ACMLldb, &beta, C, &ACMLldc,
                     &IONE, &IONE );
+         }
          else
+         {
             dgemm_( &ctransb, &ctransa, &ACMLN, &ACMLM, &ACMLK,
                     &alpha, B, &ACMLldb, A, &ACMLlda, &beta, C, &ACMLldc,
                     &IONE, &IONE );
+         }
       #endif
       /*
        * End of HPL_dgemm
        */
+      }
       #endif

Centre Blaise Pascal » HPL sur GPU

root / src / blas / HPL_dgemm.c