Révision 9
include/hpl_misc.h (revision 9) | ||
---|---|---|
66 | 66 |
#define STDC_ARGS(p) () |
67 | 67 |
#endif |
68 | 68 |
|
69 |
|
|
70 |
|
|
69 | 71 |
#ifdef HPL_CALL_VSIPL |
70 | 72 |
#include <vsip.h> |
71 | 73 |
#endif |
74 |
|
|
75 |
#ifdef HPL_CALL_CUBLAS |
|
76 |
#include <cublas.h> |
|
77 |
#endif |
|
78 |
|
|
72 | 79 |
/* |
73 | 80 |
* --------------------------------------------------------------------- |
74 | 81 |
* #define macro constants |
include/hpl.h (revision 9) | ||
---|---|---|
64 | 64 |
|
65 | 65 |
#ifndef HPL_CALL_VSIPL /* Call the Fortran 77 BLAS interface */ |
66 | 66 |
#ifndef HPL_CALL_CBLAS /* there can be only one */ |
67 |
#ifndef HPL_CALL_CUBLAS |
|
67 | 68 |
#define HPL_CALL_FBLAS |
68 | 69 |
#endif |
69 | 70 |
#endif |
71 |
#endif |
|
70 | 72 |
/* |
71 | 73 |
* --------------------------------------------------------------------- |
72 | 74 |
* Include files |
src/blas/HPL_dcopy.c (revision 9) | ||
---|---|---|
160 | 160 |
#endif |
161 | 161 |
F77dcopy( &F77N, X, &F77incx, Y, &F77incy ); |
162 | 162 |
#endif |
163 |
|
|
164 |
#ifdef HPL_CALL_CUBLAS |
|
165 |
|
|
166 |
#define CUBLASN N |
|
167 |
#define CUBLASincx INCX |
|
168 |
#define CUBLASincy INCY |
|
169 |
|
|
170 |
CUBLAS_DCOPY( &CUBLASN, X, &CUBLASincx, Y, &CUBLASincy ); |
|
171 |
#endif |
|
163 | 172 |
/* |
164 | 173 |
* End of HPL_dcopy |
165 | 174 |
*/ |
src/blas/HPL_dswap.c (revision 9) | ||
---|---|---|
149 | 149 |
#endif |
150 | 150 |
F77dswap( &F77N, X, &F77incx, Y, &F77incy ); |
151 | 151 |
#endif |
152 |
|
|
153 |
#ifdef HPL_CALL_CUBLAS |
|
154 |
#define CUBLASN N |
|
155 |
#define CUBLASincx INCX |
|
156 |
#define CUBLASincy INCY |
|
157 |
CUBLAS_DSWAP( &CUBLASN, X, &CUBLASincx, Y, &CUBLASincy ); |
|
158 |
#endif |
|
159 |
|
|
152 | 160 |
/* |
153 | 161 |
* End of HPL_dswap |
154 | 162 |
*/ |
src/blas/HPL_dger.c (revision 9) | ||
---|---|---|
169 | 169 |
} |
170 | 170 |
} |
171 | 171 |
#endif |
172 |
|
|
172 | 173 |
#ifdef HPL_CALL_FBLAS |
173 | 174 |
double alpha = ALPHA; |
174 | 175 |
#ifdef HPL_USE_F77_INTEGER_DEF |
... | ... | |
187 | 188 |
else |
188 | 189 |
{ F77dger( &F77N, &F77M, &alpha, Y, &F77incy, X, &F77incx, A, &F77lda ); } |
189 | 190 |
#endif |
191 |
|
|
192 |
#ifdef HPL_CALL_CUBLAS |
|
193 |
double alpha = ALPHA; |
|
194 |
|
|
195 |
#define CUBLASM M |
|
196 |
#define CUBLASN N |
|
197 |
#define CUBLASlda LDA |
|
198 |
#define CUBLASincx INCX |
|
199 |
#define CUBLASincy INCY |
|
200 |
|
|
201 |
if( ORDER == HplColumnMajor ) |
|
202 |
{ |
|
203 |
CUBLAS_DGER( &CUBLASM, &CUBLASN, &alpha, |
|
204 |
X, &CUBLASincx, Y, &CUBLASincy, A, &CUBLASlda ); |
|
205 |
} |
|
206 |
else |
|
207 |
{ |
|
208 |
CUBLAS_DGER( &CUBLASN, &CUBLASM, &alpha, |
|
209 |
Y, &CUBLASincy, X, &CUBLASincx, A, &CUBLASlda ); } |
|
210 |
#endif |
|
190 | 211 |
/* |
191 | 212 |
* End of HPL_dger |
192 | 213 |
*/ |
src/blas/HPL_dgemv.c (revision 9) | ||
---|---|---|
238 | 238 |
N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); |
239 | 239 |
} |
240 | 240 |
#endif |
241 |
|
|
242 |
|
|
241 | 243 |
#ifdef HPL_CALL_FBLAS |
242 | 244 |
double alpha = ALPHA, beta = BETA; |
243 | 245 |
#ifdef StringSunStyle |
... | ... | |
318 | 320 |
} |
319 | 321 |
|
320 | 322 |
#endif |
323 |
|
|
324 |
#ifdef HPL_CALL_CUBLAS |
|
325 |
double alpha = ALPHA, beta = BETA; |
|
326 |
|
|
327 |
int IONE = 1; |
|
328 |
|
|
329 |
#define CUBLASM M |
|
330 |
#define CUBLASN N |
|
331 |
#define CUBLASlda LDA |
|
332 |
#define CUBLASincx INCX |
|
333 |
#define CUBLASincy INCY |
|
334 |
|
|
335 |
char ctran; |
|
336 |
|
|
337 |
if( ORDER == HplColumnMajor ) |
|
338 |
{ |
|
339 |
ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); |
|
340 |
|
|
341 |
CUBLAS_DGEMV( &ctran, &CUBLASM, &CUBLASN, |
|
342 |
&alpha, A, &CUBLASlda, X, &CUBLASincx, |
|
343 |
&beta, Y, &CUBLASincy, IONE ); |
|
344 |
} |
|
345 |
else |
|
346 |
{ |
|
347 |
ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); |
|
348 |
|
|
349 |
CUBLAS_DGEMV( &ctran, &CUBLASN, &CUBLASM, |
|
350 |
&alpha, A, &CUBLASlda, X, &CUBLASincx, |
|
351 |
&beta, Y, &CUBLASincy, IONE ); |
|
352 |
} |
|
353 |
|
|
354 |
#endif |
|
321 | 355 |
/* |
322 | 356 |
* End of HPL_dgemv |
323 | 357 |
*/ |
src/blas/HPL_dtrsv.c (revision 9) | ||
---|---|---|
452 | 452 |
DIAG, N, A, LDA, X, INCX ); |
453 | 453 |
} |
454 | 454 |
#endif |
455 |
|
|
455 | 456 |
#ifdef HPL_CALL_FBLAS |
456 | 457 |
#ifdef StringSunStyle |
457 | 458 |
#ifdef HPL_USE_F77_INTEGER_DEF |
... | ... | |
512 | 513 |
#endif |
513 | 514 |
|
514 | 515 |
#endif |
516 |
|
|
517 |
#ifdef HPL_CALL_CUBLAS |
|
518 |
|
|
519 |
int IONE = 1; |
|
520 |
|
|
521 |
#define CUBLASN N |
|
522 |
#define CUBLASlda LDA |
|
523 |
#define CUBLASincx INCX |
|
524 |
|
|
525 |
char cuplo, ctran, cdiag; |
|
526 |
|
|
527 |
if( ORDER == HplColumnMajor ) |
|
528 |
{ |
|
529 |
cuplo = ( UPLO == HplUpper ? 'U' : 'L' ); |
|
530 |
ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); |
|
531 |
} |
|
532 |
else |
|
533 |
{ |
|
534 |
cuplo = ( UPLO == HplUpper ? 'L' : 'U' ); |
|
535 |
ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); |
|
536 |
} |
|
537 |
cdiag = ( DIAG == HplNonUnit ? 'N' : 'U' ); |
|
538 |
|
|
539 |
CUBLAS_DTRSV( &cuplo, &ctran, &cdiag, &CUBLASN, |
|
540 |
A, &CUBLASlda, X, &CUBLASincx, IONE, IONE, IONE ); |
|
541 |
|
|
542 |
#endif |
|
515 | 543 |
/* |
516 | 544 |
* End of HPL_dtrsv |
517 | 545 |
*/ |
src/blas/HPL_idamax.c (revision 9) | ||
---|---|---|
159 | 159 |
if( N > 0 ) imax = F77idamax( &F77N, X, &F77incx ) - 1; |
160 | 160 |
return( imax ); |
161 | 161 |
#endif |
162 |
|
|
163 |
#ifdef HPL_CALL_CUBLAS |
|
164 |
#define CUBLASN N |
|
165 |
#define CUBLASincx INCX |
|
166 |
int imax = 0; |
|
167 |
|
|
168 |
if( N > 0 ) imax = CUBLAS_IDAMAX( &CUBLASN, X, &CUBLASincx ) - 1; |
|
169 |
return( imax ); |
|
170 |
#endif |
|
162 | 171 |
/* |
163 | 172 |
* End of HPL_idamax |
164 | 173 |
*/ |
src/blas/HPL_daxpy.c (revision 9) | ||
---|---|---|
110 | 110 |
} |
111 | 111 |
} |
112 | 112 |
#endif |
113 |
|
|
113 | 114 |
#ifdef HPL_CALL_FBLAS |
114 | 115 |
double alpha = ALPHA; |
115 | 116 |
#ifdef HPL_USE_F77_INTEGER_DEF |
... | ... | |
121 | 122 |
#endif |
122 | 123 |
F77daxpy( &F77N, &alpha, X, &F77incx, Y, &F77incy ); |
123 | 124 |
#endif |
125 |
|
|
126 |
#ifdef HPL_CALL_CUBLAS |
|
127 |
double alpha = ALPHA; |
|
128 |
#define CUBLASN N |
|
129 |
#define CUBLASincx INCX |
|
130 |
#define CUBLASincy INCY |
|
131 |
CUBLAS_DAXPY( &CUBLASN, &alpha, X, &CUBLASincx, Y, &CUBLASincy ); |
|
132 |
#endif |
|
124 | 133 |
/* |
125 | 134 |
* End of HPL_daxpy |
126 | 135 |
*/ |
src/blas/HPL_dscal.c (revision 9) | ||
---|---|---|
160 | 160 |
} |
161 | 161 |
} |
162 | 162 |
#endif |
163 |
|
|
163 | 164 |
#ifdef HPL_CALL_FBLAS |
164 | 165 |
double alpha = ALPHA; |
165 | 166 |
#ifdef HPL_USE_F77_INTEGER_DEF |
... | ... | |
171 | 172 |
|
172 | 173 |
F77dscal( &F77N, &alpha, X, &F77incx ); |
173 | 174 |
#endif |
175 |
|
|
176 |
#ifdef HPL_CALL_CUBLAS |
|
177 |
double alpha = ALPHA; |
|
178 |
#define CUBLASN N |
|
179 |
#define CUBLASincx INCX |
|
180 |
|
|
181 |
CUBLAS_DSCAL( &CUBLASN, &alpha, X, &CUBLASincx ); |
|
182 |
#endif |
|
174 | 183 |
/* |
175 | 184 |
* End of HPL_dscal |
176 | 185 |
*/ |
src/blas/HPL_dgemm.c (revision 9) | ||
---|---|---|
513 | 513 |
#endif |
514 | 514 |
} |
515 | 515 |
#endif |
516 |
|
|
517 |
#ifdef HPL_CALL_CUBLAS |
|
518 |
double alpha = ALPHA, beta = BETA; |
|
519 |
|
|
520 |
int IONE = 1; |
|
521 |
|
|
522 |
#define CUBLASM M |
|
523 |
#define CUBLASN N |
|
524 |
#define CUBLASK K |
|
525 |
#define CUBLASlda LDA |
|
526 |
#define CUBLASldb LDB |
|
527 |
#define CUBLASldc LDC |
|
528 |
|
|
529 |
char ctransa, ctransb; |
|
530 |
|
|
531 |
if( TRANSA == HplNoTrans ) ctransa = 'N'; |
|
532 |
else if( TRANSA == HplTrans ) ctransa = 'T'; |
|
533 |
else ctransa = 'C'; |
|
534 |
|
|
535 |
if( TRANSB == HplNoTrans ) ctransb = 'N'; |
|
536 |
else if( TRANSB == HplTrans ) ctransb = 'T'; |
|
537 |
else ctransb = 'C'; |
|
538 |
|
|
539 |
if( ORDER == HplColumnMajor ) |
|
540 |
{ |
|
541 |
CUBLAS_DGEMM( &ctransa, &ctransb, &CUBLASM, &CUBLASN, &CUBLASK, |
|
542 |
&alpha, A, &CUBLASlda, B, &CUBLASldb, &beta, C, &CUBLASldc, |
|
543 |
&IONE, &IONE ); |
|
544 |
} |
|
545 |
else |
|
546 |
{ |
|
547 |
CUBLAS_DGEMM( &ctransb, &ctransa, &CUBLASN, &CUBLASM, &CUBLASK, |
|
548 |
&alpha, B, &CUBLASldb, A, &CUBLASlda, &beta, C, &CUBLASldc, |
|
549 |
&IONE, &IONE ); |
|
550 |
} |
|
551 |
#endif |
|
516 | 552 |
/* |
517 | 553 |
* End of HPL_dgemm |
518 | 554 |
*/ |
src/blas/HPL_dtrsm.c (revision 9) | ||
---|---|---|
871 | 871 |
TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB ); |
872 | 872 |
} |
873 | 873 |
#endif |
874 |
|
|
874 | 875 |
#ifdef HPL_CALL_FBLAS |
875 | 876 |
double alpha = ALPHA; |
876 | 877 |
#ifdef StringSunStyle |
... | ... | |
969 | 970 |
#endif |
970 | 971 |
} |
971 | 972 |
#endif |
973 |
|
|
974 |
#ifdef HPL_CALL_CUBLAS |
|
975 |
double alpha = ALPHA; |
|
976 |
|
|
977 |
int IONE = 1; |
|
978 |
|
|
979 |
#define CUBLASM M |
|
980 |
#define CUBLASN N |
|
981 |
#define CUBLASlda LDA |
|
982 |
#define CUBLASldb LDB |
|
983 |
|
|
984 |
char cside, cuplo, ctran, cdiag; |
|
985 |
|
|
986 |
if( TRANS == HplNoTrans ) ctran = 'N'; |
|
987 |
else if( TRANS == HplTrans ) ctran = 'T'; |
|
988 |
else ctran = 'C'; |
|
989 |
cdiag = ( DIAG == HplUnit ? 'U' : 'N' ); |
|
990 |
|
|
991 |
if( ORDER == HplColumnMajor ) |
|
992 |
{ |
|
993 |
cside = ( SIDE == HplRight ? 'R' : 'L' ); |
|
994 |
cuplo = ( UPLO == HplLower ? 'L' : 'U' ); |
|
995 |
|
|
996 |
CUBLAS_DTRSM( &cside, &cuplo, &ctran, &cdiag, &CUBLASM, &CUBLASN, &alpha, |
|
997 |
A, &CUBLASlda, B, &CUBLASldb, &IONE, &IONE, &IONE, &IONE ); |
|
998 |
} |
|
999 |
else |
|
1000 |
{ |
|
1001 |
cside = ( SIDE == HplRight ? 'L' : 'R' ); |
|
1002 |
cuplo = ( UPLO == HplLower ? 'U' : 'L' ); |
|
1003 |
|
|
1004 |
CUBLAS_DTRSM( &cside, &cuplo, &ctran, &cdiag, &CUBLASN, &CUBLASM, &alpha, |
|
1005 |
A, &CUBLASlda, B, &CUBLASldb, &IONE, &IONE, &IONE, &IONE ); |
|
1006 |
} |
|
1007 |
#endif |
|
972 | 1008 |
/* |
973 | 1009 |
* End of HPL_dtrsm |
974 | 1010 |
*/ |
setup/Make.Debian_CBLAS_OpenMPI (revision 9) | ||
---|---|---|
82 | 82 |
# used. The variable MPdir is only used for defining MPinc and MPlib. |
83 | 83 |
# |
84 | 84 |
MPdir = /usr/lib/openmpi |
85 |
MPinc = -I$(MPdir)/include
|
|
85 |
MPinc = -I/include/include/openmpi
|
|
86 | 86 |
MPlib = -L$(MPdir)/lib/libmpi.so |
87 | 87 |
# |
88 | 88 |
# ---------------------------------------------------------------------- |
setup/Make.Debian_CUBLAS_OpenMPI (revision 9) | ||
---|---|---|
1 |
# |
|
2 |
# -- High Performance Computing Linpack Benchmark (HPL) |
|
3 |
# HPL - 2.0 - September 10, 2008 |
|
4 |
# Antoine P. Petitet |
|
5 |
# University of Tennessee, Knoxville |
|
6 |
# Innovative Computing Laboratory |
|
7 |
# (C) Copyright 2000-2008 All Rights Reserved |
|
8 |
# |
|
9 |
# -- Copyright notice and Licensing terms: |
|
10 |
# |
|
11 |
# Redistribution and use in source and binary forms, with or without |
|
12 |
# modification, are permitted provided that the following conditions |
|
13 |
# are met: |
|
14 |
# |
|
15 |
# 1. Redistributions of source code must retain the above copyright |
|
16 |
# notice, this list of conditions and the following disclaimer. |
|
17 |
# |
|
18 |
# 2. Redistributions in binary form must reproduce the above copyright |
|
19 |
# notice, this list of conditions, and the following disclaimer in the |
|
20 |
# documentation and/or other materials provided with the distribution. |
|
21 |
# |
|
22 |
# 3. All advertising materials mentioning features or use of this |
|
23 |
# software must display the following acknowledgement: |
|
24 |
# This product includes software developed at the University of |
|
25 |
# Tennessee, Knoxville, Innovative Computing Laboratory. |
|
26 |
# |
|
27 |
# 4. The name of the University, the name of the Laboratory, or the |
|
28 |
# names of its contributors may not be used to endorse or promote |
|
29 |
# products derived from this software without specific written |
|
30 |
# permission. |
|
31 |
# |
|
32 |
# -- Disclaimer: |
|
33 |
# |
|
34 |
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
35 |
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
36 |
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
37 |
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY |
|
38 |
# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
39 |
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
40 |
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
41 |
# DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
42 |
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
43 |
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
44 |
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
45 |
# ###################################################################### |
|
46 |
# |
|
47 |
# ---------------------------------------------------------------------- |
|
48 |
# - shell -------------------------------------------------------------- |
|
49 |
# ---------------------------------------------------------------------- |
|
50 |
# |
|
51 |
SHELL = /bin/sh |
|
52 |
# |
|
53 |
CD = cd |
|
54 |
CP = cp |
|
55 |
LN_S = ln -s |
|
56 |
MKDIR = mkdir |
|
57 |
RM = /bin/rm -f |
|
58 |
TOUCH = touch |
|
59 |
# |
|
60 |
# ---------------------------------------------------------------------- |
|
61 |
# - Platform identifier ------------------------------------------------ |
|
62 |
# ---------------------------------------------------------------------- |
|
63 |
# |
|
64 |
ARCH = $(arch) |
|
65 |
# |
|
66 |
# ---------------------------------------------------------------------- |
|
67 |
# - HPL Directory Structure / HPL library ------------------------------ |
|
68 |
# ---------------------------------------------------------------------- |
|
69 |
# |
|
70 |
TOPdir = $(topdir) |
|
71 |
INCdir = $(TOPdir)/include |
|
72 |
BINdir = $(TOPdir)/bin/$(ARCH) |
|
73 |
LIBdir = $(TOPdir)/lib/$(ARCH) |
|
74 |
# |
|
75 |
HPLlib = $(LIBdir)/libhpl.a |
|
76 |
# |
|
77 |
# ---------------------------------------------------------------------- |
|
78 |
# - Message Passing library (MPI) -------------------------------------- |
|
79 |
# ---------------------------------------------------------------------- |
|
80 |
# MPinc tells the C compiler where to find the Message Passing library |
|
81 |
# header files, MPlib is defined to be the name of the library to be |
|
82 |
# used. The variable MPdir is only used for defining MPinc and MPlib. |
|
83 |
# |
|
84 |
MPdir = /usr/lib/openmpi |
|
85 |
MPinc = -I/include/include/openmpi |
|
86 |
MPlib = -L$(MPdir)/lib/libmpi.so |
|
87 |
# |
|
88 |
# ---------------------------------------------------------------------- |
|
89 |
# - Linear Algebra library (BLAS or VSIPL) ----------------------------- |
|
90 |
# ---------------------------------------------------------------------- |
|
91 |
# LAinc tells the C compiler where to find the Linear Algebra library |
|
92 |
# header files, LAlib is defined to be the name of the library to be |
|
93 |
# used. The variable LAdir is only used for defining LAinc and LAlib. |
|
94 |
# |
|
95 |
LAdir = /opt/cuda/lib64 |
|
96 |
LAinc = -I/opt/cuda/include |
|
97 |
LAlib = $(LAdir)/libcublas.so |
|
98 |
# |
|
99 |
# ---------------------------------------------------------------------- |
|
100 |
# - F77 / C interface -------------------------------------------------- |
|
101 |
# ---------------------------------------------------------------------- |
|
102 |
# You can skip this section if and only if you are not planning to use |
|
103 |
# a BLAS library featuring a Fortran 77 interface. Otherwise, it is |
|
104 |
# necessary to fill out the F2CDEFS variable with the appropriate |
|
105 |
# options. **One and only one** option should be chosen in **each** of |
|
106 |
# the 3 following categories: |
|
107 |
# |
|
108 |
# 1) name space (How C calls a Fortran 77 routine) |
|
109 |
# |
|
110 |
# -DAdd_ : all lower case and a suffixed underscore (Suns, |
|
111 |
# Intel, ...), [default] |
|
112 |
# -DNoChange : all lower case (IBM RS6000), |
|
113 |
# -DUpCase : all upper case (Cray), |
|
114 |
# -DAdd__ : the FORTRAN compiler in use is f2c. |
|
115 |
# |
|
116 |
# 2) C and Fortran 77 integer mapping |
|
117 |
# |
|
118 |
# -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] |
|
119 |
# -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, |
|
120 |
# -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. |
|
121 |
# |
|
122 |
# 3) Fortran 77 string handling |
|
123 |
# |
|
124 |
# -DStringSunStyle : The string address is passed at the string loca- |
|
125 |
# tion on the stack, and the string length is then |
|
126 |
# passed as an F77_INTEGER after all explicit |
|
127 |
# stack arguments, [default] |
|
128 |
# -DStringStructPtr : The address of a structure is passed by a |
|
129 |
# Fortran 77 string, and the structure is of the |
|
130 |
# form: struct {char *cp; F77_INTEGER len;}, |
|
131 |
# -DStringStructVal : A structure is passed by value for each Fortran |
|
132 |
# 77 string, and the structure is of the form: |
|
133 |
# struct {char *cp; F77_INTEGER len;}, |
|
134 |
# -DStringCrayStyle : Special option for Cray machines, which uses |
|
135 |
# Cray fcd (fortran character descriptor) for |
|
136 |
# interoperation. |
|
137 |
# |
|
138 |
F2CDEFS = |
|
139 |
# |
|
140 |
# ---------------------------------------------------------------------- |
|
141 |
# - HPL includes / libraries / specifics ------------------------------- |
|
142 |
# ---------------------------------------------------------------------- |
|
143 |
# |
|
144 |
HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) |
|
145 |
HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) |
|
146 |
# |
|
147 |
# - Compile time options ----------------------------------------------- |
|
148 |
# |
|
149 |
# -DHPL_COPY_L force the copy of the panel L before bcast; |
|
150 |
# -DHPL_CALL_CBLAS call the cblas interface; |
|
151 |
# -DHPL_CALL_VSIPL call the vsip library; |
|
152 |
# -DHPL_CALL_CUBLAS call the CuBLAS library from Nvidia Cuda; |
|
153 |
|
|
154 |
# -DHPL_DETAILED_TIMING enable detailed timers; |
|
155 |
# |
|
156 |
# By default HPL will: |
|
157 |
# *) not copy L before broadcast, |
|
158 |
# *) call the BLAS Fortran 77 interface, |
|
159 |
# *) not display detailed timing information. |
|
160 |
# |
|
161 |
HPL_OPTS = -DHPL_CALL_CUBLAS |
|
162 |
# |
|
163 |
# ---------------------------------------------------------------------- |
|
164 |
# |
|
165 |
HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) |
|
166 |
# |
|
167 |
# ---------------------------------------------------------------------- |
|
168 |
# - Compilers / linkers - Optimization flags --------------------------- |
|
169 |
# ---------------------------------------------------------------------- |
|
170 |
# |
|
171 |
CC = /usr/bin/mpicc.openmpi |
|
172 |
CCNOOPT = $(HPL_DEFS) |
|
173 |
CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall |
|
174 |
# |
|
175 |
# On some platforms, it is necessary to use the Fortran linker to find |
|
176 |
# the Fortran internals used in the BLAS library. |
|
177 |
# |
|
178 |
LINKER = /usr/bin/mpicc.openmpi |
|
179 |
# The object fortran_thunking.o has to be compiled in root HPL folder |
|
180 |
LINKFLAGS = $(CCFLAGS) $(TOPdir)/fortran_thunking.o |
|
181 |
# |
|
182 |
ARCHIVER = ar |
|
183 |
ARFLAGS = r |
|
184 |
RANLIB = echo |
|
185 |
# |
|
186 |
# ---------------------------------------------------------------------- |
addons/Build.Debian_CUBLAS.sh (revision 9) | ||
---|---|---|
1 |
# CUBLAS script to build HPL using CUBLAS Thunking |
|
2 |
# |
|
3 |
# 2010-11-26 : Initial release |
|
4 |
# Emmanuel Quemener <emmanuel.quemener@ens-lyon.fr> |
|
5 |
# |
|
6 |
# Copy this script on HPL root folder and execute |
|
7 |
|
|
8 |
# Define root folder for CUDA toolkit (default dir is /usr/local/cuda) |
|
9 |
CUDASRC=/opt/cuda |
|
10 |
# Clean all objects files |
|
11 |
find . -name "*.o" -exec rm {} \; |
|
12 |
# Copy of source for thunking CUBLAS approach |
|
13 |
cp $CUDASRC/src/fortran_thunking.c $CUDASRC/src/fortran_thunking.h $CUDASRC/src/fortran_common.h . |
|
14 |
# Patch Thunking prototypes to compile on Debian Lenny |
|
15 |
patch fortran_common.h patch_thunking.h |
|
16 |
# patching file fortran_common.h |
|
17 |
gcc -funroll-loops -W -I$CUDASRC/include -Wall -O3 -c fortran_thunking.c |
|
18 |
# Clean old builds |
|
19 |
make arch=Debian_CUBLAS_OpenMPI clean_all_arch |
|
20 |
# Compile HPL |
|
21 |
make arch=Debian_CUBLAS_OpenMPI |
|
0 | 22 |
addons/patch_thunking.h (revision 9) | ||
---|---|---|
1 |
41c41 |
|
2 |
< #define CUBLAS_FORTRAN_COMPILER CUBLAS_G95 |
|
3 |
--- |
|
4 |
> #define CUBLAS_FORTRAN_COMPILER CUBLAS_INTEL_FORTRAN |
testing/ptest/HPL.dat (revision 9) | ||
---|---|---|
2 | 2 |
Innovative Computing Laboratory, University of Tennessee |
3 | 3 |
HPL.out output file name (if any) |
4 | 4 |
6 device out (6=stdout,7=stderr,file) |
5 |
2 # of problems sizes (N)
|
|
6 |
1024 2048 Ns
|
|
7 |
2 # of NBs
|
|
8 |
256 512 NBs
|
|
5 |
1 # of problems sizes (N)
|
|
6 |
2048 Ns |
|
7 |
1 # of NBs
|
|
8 |
512 NBs |
|
9 | 9 |
0 PMAP process mapping (0=Row-,1=Column-major) |
10 |
2 # of process grids (P x Q)
|
|
11 |
1 2 Ps
|
|
12 |
2 1 Qs
|
|
10 |
1 # of process grids (P x Q)
|
|
11 |
1 Ps |
|
12 |
1 Qs |
|
13 | 13 |
16.0 threshold |
14 | 14 |
3 # of panel fact |
15 | 15 |
0 1 2 PFACTs (0=left, 1=Crout, 2=Right) |
Formats disponibles : Unified diff