Révision 9

include/hpl_misc.h (revision 9)
66 66
#define STDC_ARGS(p)           ()
67 67
#endif
68 68

  
69

  
70

  
69 71
#ifdef HPL_CALL_VSIPL
70 72
#include <vsip.h>
71 73
#endif
74

  
75
#ifdef HPL_CALL_CUBLAS
76
#include <cublas.h>
77
#endif
78

  
72 79
/*
73 80
 * ---------------------------------------------------------------------
74 81
 * #define macro constants
include/hpl.h (revision 9)
64 64
 
65 65
#ifndef HPL_CALL_VSIPL          /* Call the Fortran 77 BLAS interface */
66 66
#ifndef HPL_CALL_CBLAS                       /* there can be only one */
67
#ifndef HPL_CALL_CUBLAS                     
67 68
#define HPL_CALL_FBLAS
68 69
#endif
69 70
#endif
71
#endif
70 72
/*
71 73
 * ---------------------------------------------------------------------
72 74
 * Include files
src/blas/HPL_dcopy.c (revision 9)
160 160
#endif
161 161
   F77dcopy( &F77N, X, &F77incx, Y, &F77incy );
162 162
#endif
163

  
164
#ifdef HPL_CALL_CUBLAS
165

  
166
#define CUBLASN                 N
167
#define CUBLASincx              INCX
168
#define CUBLASincy              INCY
169

  
170
   CUBLAS_DCOPY( &CUBLASN, X, &CUBLASincx, Y, &CUBLASincy );
171
#endif
163 172
/*
164 173
 * End of HPL_dcopy
165 174
 */
src/blas/HPL_dswap.c (revision 9)
149 149
#endif
150 150
   F77dswap( &F77N, X, &F77incx, Y, &F77incy );
151 151
#endif
152

  
153
#ifdef HPL_CALL_CUBLAS
154
#define CUBLASN                 N
155
#define CUBLASincx              INCX
156
#define CUBLASincy              INCY
157
   CUBLAS_DSWAP( &CUBLASN, X, &CUBLASincx, Y, &CUBLASincy );
158
#endif
159

  
152 160
/*
153 161
 * End of HPL_dswap
154 162
 */
src/blas/HPL_dger.c (revision 9)
169 169
      }
170 170
   }
171 171
#endif
172

  
172 173
#ifdef HPL_CALL_FBLAS
173 174
   double                    alpha = ALPHA;
174 175
#ifdef HPL_USE_F77_INTEGER_DEF
......
187 188
   else
188 189
   {  F77dger( &F77N, &F77M, &alpha, Y, &F77incy, X, &F77incx, A, &F77lda ); }
189 190
#endif
191

  
192
#ifdef HPL_CALL_CUBLAS
193
   double                    alpha = ALPHA;
194

  
195
#define CUBLASM                 M
196
#define CUBLASN                 N
197
#define CUBLASlda               LDA
198
#define CUBLASincx              INCX
199
#define CUBLASincy              INCY
200

  
201
   if( ORDER == HplColumnMajor )
202
   {  
203
     CUBLAS_DGER( &CUBLASM, &CUBLASN, &alpha, 
204
		  X, &CUBLASincx, Y, &CUBLASincy, A, &CUBLASlda ); 
205
   }
206
   else
207
   {  
208
     CUBLAS_DGER( &CUBLASN, &CUBLASM, &alpha, 
209
		  Y, &CUBLASincy, X, &CUBLASincx, A, &CUBLASlda ); }
210
#endif
190 211
/*
191 212
 * End of HPL_dger
192 213
 */
src/blas/HPL_dgemv.c (revision 9)
238 238
                  N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
239 239
   }
240 240
#endif
241

  
242

  
241 243
#ifdef HPL_CALL_FBLAS
242 244
   double                    alpha = ALPHA, beta = BETA;
243 245
#ifdef StringSunStyle
......
318 320
   }
319 321

  
320 322
#endif
323

  
324
#ifdef HPL_CALL_CUBLAS
325
   double                    alpha = ALPHA, beta = BETA;
326

  
327
   int                       IONE = 1;
328

  
329
#define CUBLASM                 M
330
#define CUBLASN                 N
331
#define CUBLASlda               LDA
332
#define CUBLASincx              INCX
333
#define CUBLASincy              INCY
334

  
335
   char                      ctran;
336

  
337
   if( ORDER == HplColumnMajor )
338
   {
339
      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
340

  
341
      CUBLAS_DGEMV( &ctran, &CUBLASM, &CUBLASN, 
342
		    &alpha, A, &CUBLASlda, X, &CUBLASincx,
343
		    &beta, Y, &CUBLASincy, IONE );
344
   }
345
   else
346
   {
347
      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
348

  
349
      CUBLAS_DGEMV( &ctran, &CUBLASN, &CUBLASM, 
350
		    &alpha, A, &CUBLASlda, X, &CUBLASincx,
351
		    &beta, Y, &CUBLASincy, IONE );
352
   }
353

  
354
#endif
321 355
/*
322 356
 * End of HPL_dgemv
323 357
 */
src/blas/HPL_dtrsv.c (revision 9)
452 452
                  DIAG, N, A, LDA, X, INCX );
453 453
   }
454 454
#endif
455

  
455 456
#ifdef HPL_CALL_FBLAS
456 457
#ifdef StringSunStyle
457 458
#ifdef HPL_USE_F77_INTEGER_DEF
......
512 513
#endif
513 514

  
514 515
#endif
516

  
517
#ifdef HPL_CALL_CUBLAS
518

  
519
   int                       IONE = 1;
520
 
521
#define CUBLASN              N
522
#define CUBLASlda            LDA
523
#define CUBLASincx           INCX
524

  
525
   char                      cuplo, ctran, cdiag;
526

  
527
   if( ORDER == HplColumnMajor )
528
   {
529
      cuplo = ( UPLO  == HplUpper   ? 'U' : 'L' );
530
      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
531
   }
532
   else
533
   {
534
      cuplo = ( UPLO  == HplUpper   ? 'L' : 'U' );
535
      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
536
   }
537
   cdiag = ( DIAG == HplNonUnit ? 'N' : 'U' );
538

  
539
   CUBLAS_DTRSV( &cuplo, &ctran, &cdiag, &CUBLASN, 
540
		 A, &CUBLASlda, X, &CUBLASincx, IONE, IONE, IONE );
541

  
542
#endif
515 543
/*
516 544
 * End of HPL_dtrsv
517 545
 */
src/blas/HPL_idamax.c (revision 9)
159 159
   if( N > 0 ) imax = F77idamax( &F77N, X, &F77incx ) - 1;
160 160
   return( imax );
161 161
#endif
162

  
163
#ifdef HPL_CALL_CUBLAS
164
#define CUBLASN                 N
165
#define CUBLASincx              INCX
166
   int                       imax = 0;
167

  
168
   if( N > 0 ) imax = CUBLAS_IDAMAX( &CUBLASN, X, &CUBLASincx ) - 1;
169
   return( imax );
170
#endif
162 171
/*
163 172
 * End of HPL_idamax
164 173
 */
src/blas/HPL_daxpy.c (revision 9)
110 110
      }
111 111
   }
112 112
#endif
113

  
113 114
#ifdef HPL_CALL_FBLAS
114 115
   double                    alpha = ALPHA;
115 116
#ifdef HPL_USE_F77_INTEGER_DEF
......
121 122
#endif
122 123
   F77daxpy( &F77N, &alpha, X, &F77incx, Y, &F77incy );
123 124
#endif
125

  
126
#ifdef HPL_CALL_CUBLAS
127
   double                    alpha = ALPHA;
128
#define CUBLASN                 N
129
#define CUBLASincx              INCX
130
#define CUBLASincy              INCY
131
   CUBLAS_DAXPY( &CUBLASN, &alpha, X, &CUBLASincx, Y, &CUBLASincy );
132
#endif
124 133
/*
125 134
 * End of HPL_daxpy
126 135
 */
src/blas/HPL_dscal.c (revision 9)
160 160
      }
161 161
   }
162 162
#endif
163

  
163 164
#ifdef HPL_CALL_FBLAS
164 165
   double                    alpha = ALPHA;
165 166
#ifdef HPL_USE_F77_INTEGER_DEF
......
171 172

  
172 173
   F77dscal( &F77N, &alpha, X, &F77incx );
173 174
#endif
175

  
176
#ifdef HPL_CALL_CUBLAS
177
   double                    alpha = ALPHA;
178
#define CUBLASN                 N
179
#define CUBLASincx              INCX
180

  
181
   CUBLAS_DSCAL( &CUBLASN, &alpha, X, &CUBLASincx );
182
#endif
174 183
/*
175 184
 * End of HPL_dscal
176 185
 */
src/blas/HPL_dgemm.c (revision 9)
513 513
#endif
514 514
   }
515 515
#endif
516

  
517
#ifdef HPL_CALL_CUBLAS
518
   double                    alpha = ALPHA, beta = BETA;
519

  
520
   int                       IONE = 1;
521

  
522
#define CUBLASM                 M
523
#define CUBLASN                 N
524
#define CUBLASK                 K
525
#define CUBLASlda               LDA
526
#define CUBLASldb               LDB
527
#define CUBLASldc               LDC
528

  
529
   char                      ctransa, ctransb;
530

  
531
   if(      TRANSA == HplNoTrans ) ctransa = 'N';
532
   else if( TRANSA == HplTrans   ) ctransa = 'T';
533
   else                            ctransa = 'C';
534
 
535
   if(      TRANSB == HplNoTrans ) ctransb = 'N';
536
   else if( TRANSB == HplTrans   ) ctransb = 'T';
537
   else                            ctransb = 'C';
538

  
539
   if( ORDER == HplColumnMajor )
540
   {
541
      CUBLAS_DGEMM( &ctransa, &ctransb, &CUBLASM, &CUBLASN, &CUBLASK, 
542
		    &alpha, A, &CUBLASlda, B, &CUBLASldb, &beta, C, &CUBLASldc,
543
		    &IONE, &IONE );
544
   }
545
   else
546
   {
547
      CUBLAS_DGEMM( &ctransb, &ctransa, &CUBLASN, &CUBLASM, &CUBLASK, 
548
		    &alpha, B, &CUBLASldb, A, &CUBLASlda, &beta, C, &CUBLASldc,
549
		    &IONE, &IONE );
550
   }
551
#endif
516 552
/*
517 553
 * End of HPL_dgemm
518 554
 */
src/blas/HPL_dtrsm.c (revision 9)
871 871
                  TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB );
872 872
   }
873 873
#endif
874

  
874 875
#ifdef HPL_CALL_FBLAS
875 876
   double                    alpha = ALPHA;
876 877
#ifdef StringSunStyle
......
969 970
#endif
970 971
   }
971 972
#endif
973

  
974
#ifdef HPL_CALL_CUBLAS
975
   double                    alpha = ALPHA;
976

  
977
   int                       IONE = 1;
978

  
979
#define  CUBLASM                M
980
#define  CUBLASN                N
981
#define  CUBLASlda              LDA
982
#define  CUBLASldb              LDB
983

  
984
   char                      cside, cuplo, ctran, cdiag;
985

  
986
   if(      TRANS == HplNoTrans ) ctran = 'N';
987
   else if( TRANS == HplTrans   ) ctran = 'T';
988
   else                           ctran = 'C';
989
   cdiag = ( DIAG == HplUnit  ? 'U' : 'N' );
990

  
991
   if( ORDER == HplColumnMajor )
992
   {
993
      cside = ( SIDE == HplRight ? 'R' : 'L' );
994
      cuplo = ( UPLO == HplLower ? 'L' : 'U' );
995

  
996
      CUBLAS_DTRSM( &cside, &cuplo, &ctran, &cdiag, &CUBLASM, &CUBLASN, &alpha,
997
		    A, &CUBLASlda, B, &CUBLASldb, &IONE, &IONE, &IONE, &IONE );
998
   }
999
   else
1000
   {
1001
      cside = ( SIDE == HplRight ? 'L' : 'R' );
1002
      cuplo = ( UPLO == HplLower ? 'U' : 'L' );
1003

  
1004
      CUBLAS_DTRSM( &cside, &cuplo, &ctran, &cdiag, &CUBLASN, &CUBLASM, &alpha,
1005
		    A, &CUBLASlda, B, &CUBLASldb, &IONE, &IONE, &IONE, &IONE );
1006
   }
1007
#endif
972 1008
/*
973 1009
 * End of HPL_dtrsm
974 1010
 */
setup/Make.Debian_CBLAS_OpenMPI (revision 9)
82 82
# used. The variable MPdir is only used for defining MPinc and MPlib.
83 83
#
84 84
MPdir        = /usr/lib/openmpi
85
MPinc        = -I$(MPdir)/include
85
MPinc        = -I/include/include/openmpi
86 86
MPlib        = -L$(MPdir)/lib/libmpi.so
87 87
#
88 88
# ----------------------------------------------------------------------
setup/Make.Debian_CUBLAS_OpenMPI (revision 9)
1
#  
2
#  -- High Performance Computing Linpack Benchmark (HPL)                
3
#     HPL - 2.0 - September 10, 2008                          
4
#     Antoine P. Petitet                                                
5
#     University of Tennessee, Knoxville                                
6
#     Innovative Computing Laboratory                                 
7
#     (C) Copyright 2000-2008 All Rights Reserved                       
8
#                                                                       
9
#  -- Copyright notice and Licensing terms:                             
10
#                                                                       
11
#  Redistribution  and  use in  source and binary forms, with or without
12
#  modification, are  permitted provided  that the following  conditions
13
#  are met:                                                             
14
#                                                                       
15
#  1. Redistributions  of  source  code  must retain the above copyright
16
#  notice, this list of conditions and the following disclaimer.        
17
#                                                                       
18
#  2. Redistributions in binary form must reproduce  the above copyright
19
#  notice, this list of conditions,  and the following disclaimer in the
20
#  documentation and/or other materials provided with the distribution. 
21
#                                                                       
22
#  3. All  advertising  materials  mentioning  features  or  use of this
23
#  software must display the following acknowledgement:                 
24
#  This  product  includes  software  developed  at  the  University  of
25
#  Tennessee, Knoxville, Innovative Computing Laboratory.             
26
#                                                                       
27
#  4. The name of the  University,  the name of the  Laboratory,  or the
28
#  names  of  its  contributors  may  not  be used to endorse or promote
29
#  products  derived   from   this  software  without  specific  written
30
#  permission.                                                          
31
#                                                                       
32
#  -- Disclaimer:                                                       
33
#                                                                       
34
#  THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35
#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36
#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37
#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38
#  OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39
#  SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40
#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41
#  DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42
#  THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43
#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44
#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
45
# ######################################################################
46
#  
47
# ----------------------------------------------------------------------
48
# - shell --------------------------------------------------------------
49
# ----------------------------------------------------------------------
50
#
51
SHELL        = /bin/sh
52
#
53
CD           = cd
54
CP           = cp
55
LN_S         = ln -s
56
MKDIR        = mkdir
57
RM           = /bin/rm -f
58
TOUCH        = touch
59
#
60
# ----------------------------------------------------------------------
61
# - Platform identifier ------------------------------------------------
62
# ----------------------------------------------------------------------
63
#
64
ARCH         = $(arch)
65
#
66
# ----------------------------------------------------------------------
67
# - HPL Directory Structure / HPL library ------------------------------
68
# ----------------------------------------------------------------------
69
#
70
TOPdir       = $(topdir)
71
INCdir       = $(TOPdir)/include
72
BINdir       = $(TOPdir)/bin/$(ARCH)
73
LIBdir       = $(TOPdir)/lib/$(ARCH)
74
#
75
HPLlib       = $(LIBdir)/libhpl.a 
76
#
77
# ----------------------------------------------------------------------
78
# - Message Passing library (MPI) --------------------------------------
79
# ----------------------------------------------------------------------
80
# MPinc tells the  C  compiler where to find the Message Passing library
81
# header files,  MPlib  is defined  to be the name of  the library to be
82
# used. The variable MPdir is only used for defining MPinc and MPlib.
83
#
84
MPdir        = /usr/lib/openmpi
85
MPinc        = -I/include/include/openmpi
86
MPlib        = -L$(MPdir)/lib/libmpi.so
87
#
88
# ----------------------------------------------------------------------
89
# - Linear Algebra library (BLAS or VSIPL) -----------------------------
90
# ----------------------------------------------------------------------
91
# LAinc tells the  C  compiler where to find the Linear Algebra  library
92
# header files,  LAlib  is defined  to be the name of  the library to be
93
# used. The variable LAdir is only used for defining LAinc and LAlib.
94
#
95
LAdir        = /opt/cuda/lib64
96
LAinc        = -I/opt/cuda/include
97
LAlib        = $(LAdir)/libcublas.so
98
#
99
# ----------------------------------------------------------------------
100
# - F77 / C interface --------------------------------------------------
101
# ----------------------------------------------------------------------
102
# You can skip this section  if and only if  you are not planning to use
103
# a  BLAS  library featuring a Fortran 77 interface.  Otherwise,  it  is
104
# necessary  to  fill out the  F2CDEFS  variable  with  the  appropriate
105
# options.  **One and only one**  option should be chosen in **each** of
106
# the 3 following categories:
107
#
108
# 1) name space (How C calls a Fortran 77 routine)
109
#
110
# -DAdd_              : all lower case and a suffixed underscore  (Suns,
111
#                       Intel, ...),                           [default]
112
# -DNoChange          : all lower case (IBM RS6000),
113
# -DUpCase            : all upper case (Cray),
114
# -DAdd__             : the FORTRAN compiler in use is f2c.
115
#
116
# 2) C and Fortran 77 integer mapping
117
#
118
# -DF77_INTEGER=int   : Fortran 77 INTEGER is a C int,         [default]
119
# -DF77_INTEGER=long  : Fortran 77 INTEGER is a C long,
120
# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short.
121
#
122
# 3) Fortran 77 string handling
123
#
124
# -DStringSunStyle    : The string address is passed at the string loca-
125
#                       tion on the stack, and the string length is then
126
#                       passed as  an  F77_INTEGER  after  all  explicit
127
#                       stack arguments,                       [default]
128
# -DStringStructPtr   : The address  of  a  structure  is  passed  by  a
129
#                       Fortran 77  string,  and the structure is of the
130
#                       form: struct {char *cp; F77_INTEGER len;},
131
# -DStringStructVal   : A structure is passed by value for each  Fortran
132
#                       77 string,  and  the  structure is  of the form:
133
#                       struct {char *cp; F77_INTEGER len;},
134
# -DStringCrayStyle   : Special option for  Cray  machines,  which  uses
135
#                       Cray  fcd  (fortran  character  descriptor)  for
136
#                       interoperation.
137
#
138
F2CDEFS      =
139
#
140
# ----------------------------------------------------------------------
141
# - HPL includes / libraries / specifics -------------------------------
142
# ----------------------------------------------------------------------
143
#
144
HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
145
HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
146
#
147
# - Compile time options -----------------------------------------------
148
#
149
# -DHPL_COPY_L           force the copy of the panel L before bcast;
150
# -DHPL_CALL_CBLAS       call the cblas interface;
151
# -DHPL_CALL_VSIPL       call the vsip  library;
152
# -DHPL_CALL_CUBLAS       call the CuBLAS library from Nvidia Cuda;
153

  
154
# -DHPL_DETAILED_TIMING  enable detailed timers;
155
#
156
# By default HPL will:
157
#    *) not copy L before broadcast,
158
#    *) call the BLAS Fortran 77 interface,
159
#    *) not display detailed timing information.
160
#
161
HPL_OPTS     = -DHPL_CALL_CUBLAS
162
#
163
# ----------------------------------------------------------------------
164
#
165
HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
166
#
167
# ----------------------------------------------------------------------
168
# - Compilers / linkers - Optimization flags ---------------------------
169
# ----------------------------------------------------------------------
170
#
171
CC           = /usr/bin/mpicc.openmpi
172
CCNOOPT      = $(HPL_DEFS)
173
CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall
174
#
175
# On some platforms,  it is necessary  to use the Fortran linker to find
176
# the Fortran internals used in the BLAS library.
177
#
178
LINKER       = /usr/bin/mpicc.openmpi
179
# The object fortran_thunking.o has to be compiled in root HPL folder
180
LINKFLAGS    = $(CCFLAGS) $(TOPdir)/fortran_thunking.o
181
#
182
ARCHIVER     = ar
183
ARFLAGS      = r
184
RANLIB       = echo
185
#
186
# ----------------------------------------------------------------------
addons/Build.Debian_CUBLAS.sh (revision 9)
1
# CUBLAS script to build HPL using CUBLAS Thunking
2
#
3
# 2010-11-26 : Initial release
4
# Emmanuel Quemener <emmanuel.quemener@ens-lyon.fr>
5
#
6
# Copy this script on HPL root folder and execute
7

  
8
# Define root folder for CUDA toolkit (default dir is /usr/local/cuda)
9
CUDASRC=/opt/cuda
10
# Clean all objects files
11
find . -name "*.o" -exec rm {} \; 
12
# Copy of source for thunking CUBLAS approach
13
cp $CUDASRC/src/fortran_thunking.c $CUDASRC/src/fortran_thunking.h $CUDASRC/src/fortran_common.h .
14
# Patch Thunking prototypes to compile on Debian Lenny
15
patch fortran_common.h patch_thunking.h
16
# patching file fortran_common.h
17
gcc -funroll-loops -W  -I$CUDASRC/include -Wall -O3 -c fortran_thunking.c
18
# Clean old builds
19
make arch=Debian_CUBLAS_OpenMPI clean_all_arch
20
# Compile HPL
21
make arch=Debian_CUBLAS_OpenMPI
0 22

  
addons/patch_thunking.h (revision 9)
1
41c41
2
< #define CUBLAS_FORTRAN_COMPILER CUBLAS_G95
3
---
4
> #define CUBLAS_FORTRAN_COMPILER CUBLAS_INTEL_FORTRAN
testing/ptest/HPL.dat (revision 9)
2 2
Innovative Computing Laboratory, University of Tennessee
3 3
HPL.out      output file name (if any)
4 4
6            device out (6=stdout,7=stderr,file)
5
2            # of problems sizes (N)
6
1024 2048    Ns
7
2            # of NBs
8
256 512      NBs
5
1            # of problems sizes (N)
6
2048    Ns
7
1            # of NBs
8
512      NBs
9 9
0            PMAP process mapping (0=Row-,1=Column-major)
10
2            # of process grids (P x Q)
11
1 2        Ps
12
2 1        Qs
10
1            # of process grids (P x Q)
11
1        Ps
12
1        Qs
13 13
16.0         threshold
14 14
3            # of panel fact
15 15
0 1 2        PFACTs (0=left, 1=Crout, 2=Right)

Formats disponibles : Unified diff