root / src / blas / HPL_dgemm.c
Historique | Voir | Annoter | Télécharger (21,2 ko)
1 |
/*
|
---|---|
2 |
* -- High Performance Computing Linpack Benchmark (HPL)
|
3 |
* HPL - 2.0 - September 10, 2008
|
4 |
* Antoine P. Petitet
|
5 |
* University of Tennessee, Knoxville
|
6 |
* Innovative Computing Laboratory
|
7 |
* (C) Copyright 2000-2008 All Rights Reserved
|
8 |
*
|
9 |
* -- Copyright notice and Licensing terms:
|
10 |
*
|
11 |
* Redistribution and use in source and binary forms, with or without
|
12 |
* modification, are permitted provided that the following conditions
|
13 |
* are met:
|
14 |
*
|
15 |
* 1. Redistributions of source code must retain the above copyright
|
16 |
* notice, this list of conditions and the following disclaimer.
|
17 |
*
|
18 |
* 2. Redistributions in binary form must reproduce the above copyright
|
19 |
* notice, this list of conditions, and the following disclaimer in the
|
20 |
* documentation and/or other materials provided with the distribution.
|
21 |
*
|
22 |
* 3. All advertising materials mentioning features or use of this
|
23 |
* software must display the following acknowledgement:
|
24 |
* This product includes software developed at the University of
|
25 |
* Tennessee, Knoxville, Innovative Computing Laboratory.
|
26 |
*
|
27 |
* 4. The name of the University, the name of the Laboratory, or the
|
28 |
* names of its contributors may not be used to endorse or promote
|
29 |
* products derived from this software without specific written
|
30 |
* permission.
|
31 |
*
|
32 |
* -- Disclaimer:
|
33 |
*
|
34 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
35 |
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
36 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
37 |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
|
38 |
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
39 |
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
40 |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
41 |
* DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
42 |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
43 |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
44 |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
45 |
* ---------------------------------------------------------------------
|
46 |
*/
|
47 |
/*
|
48 |
* Include files
|
49 |
*/
|
50 |
#include "hpl.h" |
51 |
|
52 |
#ifndef HPL_dgemm
|
53 |
|
54 |
#ifdef HPL_CALL_VSIPL
|
55 |
|
56 |
#ifdef STDC_HEADERS
|
57 |
static void HPL_dgemmNN |
58 |
( |
59 |
const int M, |
60 |
const int N, |
61 |
const int K, |
62 |
const double ALPHA, |
63 |
const double * A, |
64 |
const int LDA, |
65 |
const double * B, |
66 |
const int LDB, |
67 |
const double BETA, |
68 |
double * C,
|
69 |
const int LDC |
70 |
) |
71 |
#else
|
72 |
static void HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) |
73 |
const int K, LDA, LDB, LDC, M, N; |
74 |
const double ALPHA, BETA; |
75 |
const double * A, * B; |
76 |
double * C;
|
77 |
#endif
|
78 |
{ |
79 |
register double t0; |
80 |
int i, iail, iblj, icij, j, jal, jbj, jcj, l;
|
81 |
|
82 |
for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) |
83 |
{ |
84 |
HPL_dscal( M, BETA, C+jcj, 1 );
|
85 |
for( l = 0, jal = 0, iblj = jbj; l < K; l++, jal += LDA, iblj += 1 ) |
86 |
{ |
87 |
t0 = ALPHA * B[iblj]; |
88 |
for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 ) |
89 |
{ C[icij] += A[iail] * t0; } |
90 |
} |
91 |
} |
92 |
} |
93 |
|
94 |
#ifdef STDC_HEADERS
|
95 |
static void HPL_dgemmNT |
96 |
( |
97 |
const int M, |
98 |
const int N, |
99 |
const int K, |
100 |
const double ALPHA, |
101 |
const double * A, |
102 |
const int LDA, |
103 |
const double * B, |
104 |
const int LDB, |
105 |
const double BETA, |
106 |
double * C,
|
107 |
const int LDC |
108 |
) |
109 |
#else
|
110 |
static void HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) |
111 |
const int K, LDA, LDB, LDC, M, N; |
112 |
const double ALPHA, BETA; |
113 |
const double * A, * B; |
114 |
double * C;
|
115 |
#endif
|
116 |
{ |
117 |
register double t0; |
118 |
int i, iail, ibj, ibjl, icij, j, jal, jcj, l;
|
119 |
|
120 |
for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 1, jcj += LDC ) |
121 |
{ |
122 |
HPL_dscal( M, BETA, C+jcj, 1 );
|
123 |
for( l = 0, jal = 0, ibjl = ibj; l < K; l++, jal += LDA, ibjl += LDB ) |
124 |
{ |
125 |
t0 = ALPHA * B[ibjl]; |
126 |
for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 ) |
127 |
{ C[icij] += A[iail] * t0; } |
128 |
} |
129 |
} |
130 |
} |
131 |
|
132 |
#ifdef STDC_HEADERS
|
133 |
static void HPL_dgemmTN |
134 |
( |
135 |
const int M, |
136 |
const int N, |
137 |
const int K, |
138 |
const double ALPHA, |
139 |
const double * A, |
140 |
const int LDA, |
141 |
const double * B, |
142 |
const int LDB, |
143 |
const double BETA, |
144 |
double * C,
|
145 |
const int LDC |
146 |
) |
147 |
#else
|
148 |
static void HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) |
149 |
const int K, LDA, LDB, LDC, M, N; |
150 |
const double ALPHA, BETA; |
151 |
const double * A, * B; |
152 |
double * C;
|
153 |
#endif
|
154 |
{ |
155 |
register double t0; |
156 |
int i, iai, iail, iblj, icij, j, jbj, jcj, l;
|
157 |
|
158 |
for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) |
159 |
{ |
160 |
for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA ) |
161 |
{ |
162 |
t0 = HPL_rzero; |
163 |
for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 ) |
164 |
{ t0 += A[iail] * B[iblj]; } |
165 |
if( BETA == HPL_rzero ) C[icij] = HPL_rzero;
|
166 |
else C[icij] *= BETA;
|
167 |
C[icij] += ALPHA * t0; |
168 |
} |
169 |
} |
170 |
} |
171 |
|
172 |
#ifdef STDC_HEADERS
|
173 |
static void HPL_dgemmTT |
174 |
( |
175 |
const int M, |
176 |
const int N, |
177 |
const int K, |
178 |
const double ALPHA, |
179 |
const double * A, |
180 |
const int LDA, |
181 |
const double * B, |
182 |
const int LDB, |
183 |
const double BETA, |
184 |
double * C,
|
185 |
const int LDC |
186 |
) |
187 |
#else
|
188 |
static void HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) |
189 |
const int K, LDA, LDB, LDC, M, N; |
190 |
const double ALPHA, BETA; |
191 |
const double * A, * B; |
192 |
double * C;
|
193 |
#endif
|
194 |
{ |
195 |
register double t0; |
196 |
int i, iali, ibj, ibjl, icij, j, jai, jcj, l;
|
197 |
|
198 |
for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 1, jcj += LDC ) |
199 |
{ |
200 |
for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 1, jai += LDA ) |
201 |
{ |
202 |
t0 = HPL_rzero; |
203 |
for( l = 0, iali = jai, ibjl = ibj; |
204 |
l < K; l++, iali += 1, ibjl += LDB ) t0 += A[iali] * B[ibjl];
|
205 |
if( BETA == HPL_rzero ) C[icij] = HPL_rzero;
|
206 |
else C[icij] *= BETA;
|
207 |
C[icij] += ALPHA * t0; |
208 |
} |
209 |
} |
210 |
} |
211 |
|
212 |
#ifdef STDC_HEADERS
|
213 |
static void HPL_dgemm0 |
214 |
( |
215 |
const enum HPL_TRANS TRANSA, |
216 |
const enum HPL_TRANS TRANSB, |
217 |
const int M, |
218 |
const int N, |
219 |
const int K, |
220 |
const double ALPHA, |
221 |
const double * A, |
222 |
const int LDA, |
223 |
const double * B, |
224 |
const int LDB, |
225 |
const double BETA, |
226 |
double * C,
|
227 |
const int LDC |
228 |
) |
229 |
#else
|
230 |
static void HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, |
231 |
BETA, C, LDC ) |
232 |
const enum HPL_TRANS TRANSA, TRANSB; |
233 |
const int K, LDA, LDB, LDC, M, N; |
234 |
const double ALPHA, BETA; |
235 |
const double * A, * B; |
236 |
double * C;
|
237 |
#endif
|
238 |
{ |
239 |
int i, j;
|
240 |
|
241 |
if( ( M == 0 ) || ( N == 0 ) || |
242 |
( ( ( ALPHA == HPL_rzero ) || ( K == 0 ) ) &&
|
243 |
( BETA == HPL_rone ) ) ) return;
|
244 |
|
245 |
if( ALPHA == HPL_rzero )
|
246 |
{ |
247 |
for( j = 0; j < N; j++ ) |
248 |
{ for( i = 0; i < M; i++ ) *(C+i+j*LDC) = HPL_rzero; } |
249 |
return;
|
250 |
} |
251 |
|
252 |
if( TRANSB == HplNoTrans )
|
253 |
{ |
254 |
if( TRANSA == HplNoTrans )
|
255 |
{ HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } |
256 |
else
|
257 |
{ HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } |
258 |
} |
259 |
else
|
260 |
{ |
261 |
if( TRANSA == HplNoTrans )
|
262 |
{ HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } |
263 |
else
|
264 |
{ HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } |
265 |
} |
266 |
} |
267 |
|
268 |
#endif
|
269 |
|
270 |
#ifdef STDC_HEADERS
|
271 |
void HPL_dgemm
|
272 |
( |
273 |
const enum HPL_ORDER ORDER, |
274 |
const enum HPL_TRANS TRANSA, |
275 |
const enum HPL_TRANS TRANSB, |
276 |
const int M, |
277 |
const int N, |
278 |
const int K, |
279 |
const double ALPHA, |
280 |
const double * A, |
281 |
const int LDA, |
282 |
const double * B, |
283 |
const int LDB, |
284 |
const double BETA, |
285 |
double * C,
|
286 |
const int LDC |
287 |
) |
288 |
#else
|
289 |
void HPL_dgemm
|
290 |
( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) |
291 |
const enum HPL_ORDER ORDER; |
292 |
const enum HPL_TRANS TRANSA; |
293 |
const enum HPL_TRANS TRANSB; |
294 |
const int M; |
295 |
const int N; |
296 |
const int K; |
297 |
const double ALPHA; |
298 |
const double * A; |
299 |
const int LDA; |
300 |
const double * B; |
301 |
const int LDB; |
302 |
const double BETA; |
303 |
double * C;
|
304 |
const int LDC; |
305 |
#endif
|
306 |
{ |
307 |
/*
|
308 |
* Purpose
|
309 |
* =======
|
310 |
*
|
311 |
* HPL_dgemm performs one of the matrix-matrix operations
|
312 |
*
|
313 |
* C := alpha * op( A ) * op( B ) + beta * C
|
314 |
*
|
315 |
* where op( X ) is one of
|
316 |
*
|
317 |
* op( X ) = X or op( X ) = X^T.
|
318 |
*
|
319 |
* Alpha and beta are scalars, and A, B and C are matrices, with op(A)
|
320 |
* an m by k matrix, op(B) a k by n matrix and C an m by n matrix.
|
321 |
*
|
322 |
* Arguments
|
323 |
* =========
|
324 |
*
|
325 |
* ORDER (local input) const enum HPL_ORDER
|
326 |
* On entry, ORDER specifies the storage format of the operands
|
327 |
* as follows:
|
328 |
* ORDER = HplRowMajor,
|
329 |
* ORDER = HplColumnMajor.
|
330 |
*
|
331 |
* TRANSA (local input) const enum HPL_TRANS
|
332 |
* On entry, TRANSA specifies the form of op(A) to be used in
|
333 |
* the matrix-matrix operation follows:
|
334 |
* TRANSA==HplNoTrans : op( A ) = A,
|
335 |
* TRANSA==HplTrans : op( A ) = A^T,
|
336 |
* TRANSA==HplConjTrans : op( A ) = A^T.
|
337 |
*
|
338 |
* TRANSB (local input) const enum HPL_TRANS
|
339 |
* On entry, TRANSB specifies the form of op(B) to be used in
|
340 |
* the matrix-matrix operation follows:
|
341 |
* TRANSB==HplNoTrans : op( B ) = B,
|
342 |
* TRANSB==HplTrans : op( B ) = B^T,
|
343 |
* TRANSB==HplConjTrans : op( B ) = B^T.
|
344 |
*
|
345 |
* M (local input) const int
|
346 |
* On entry, M specifies the number of rows of the matrix
|
347 |
* op(A) and of the matrix C. M must be at least zero.
|
348 |
*
|
349 |
* N (local input) const int
|
350 |
* On entry, N specifies the number of columns of the matrix
|
351 |
* op(B) and the number of columns of the matrix C. N must be
|
352 |
* at least zero.
|
353 |
*
|
354 |
* K (local input) const int
|
355 |
* On entry, K specifies the number of columns of the matrix
|
356 |
* op(A) and the number of rows of the matrix op(B). K must be
|
357 |
* be at least zero.
|
358 |
*
|
359 |
* ALPHA (local input) const double
|
360 |
* On entry, ALPHA specifies the scalar alpha. When ALPHA is
|
361 |
* supplied as zero then the elements of the matrices A and B
|
362 |
* need not be set on input.
|
363 |
*
|
364 |
* A (local input) const double *
|
365 |
* On entry, A is an array of dimension (LDA,ka), where ka is
|
366 |
* k when TRANSA==HplNoTrans, and is m otherwise. Before
|
367 |
* entry with TRANSA==HplNoTrans, the leading m by k part of
|
368 |
* the array A must contain the matrix A, otherwise the leading
|
369 |
* k by m part of the array A must contain the matrix A.
|
370 |
*
|
371 |
* LDA (local input) const int
|
372 |
* On entry, LDA specifies the first dimension of A as declared
|
373 |
* in the calling (sub) program. When TRANSA==HplNoTrans then
|
374 |
* LDA must be at least max(1,m), otherwise LDA must be at least
|
375 |
* max(1,k).
|
376 |
*
|
377 |
* B (local input) const double *
|
378 |
* On entry, B is an array of dimension (LDB,kb), where kb is
|
379 |
* n when TRANSB==HplNoTrans, and is k otherwise. Before
|
380 |
* entry with TRANSB==HplNoTrans, the leading k by n part of
|
381 |
* the array B must contain the matrix B, otherwise the leading
|
382 |
* n by k part of the array B must contain the matrix B.
|
383 |
*
|
384 |
* LDB (local input) const int
|
385 |
* On entry, LDB specifies the first dimension of B as declared
|
386 |
* in the calling (sub) program. When TRANSB==HplNoTrans then
|
387 |
* LDB must be at least max(1,k), otherwise LDB must be at least
|
388 |
* max(1,n).
|
389 |
*
|
390 |
* BETA (local input) const double
|
391 |
* On entry, BETA specifies the scalar beta. When BETA is
|
392 |
* supplied as zero then the elements of the matrix C need
|
393 |
* not be set on input.
|
394 |
*
|
395 |
* C (local input/output) double *
|
396 |
* On entry, C is an array of dimension (LDC,n). Before entry,
|
397 |
* the leading m by n part of the array C must contain the
|
398 |
* matrix C, except when beta is zero, in which case C need not
|
399 |
* be set on entry. On exit, the array C is overwritten by the
|
400 |
* m by n matrix ( alpha*op( A )*op( B ) + beta*C ).
|
401 |
*
|
402 |
* LDC (local input) const int
|
403 |
* On entry, LDC specifies the first dimension of C as declared
|
404 |
* in the calling (sub) program. LDC must be at least
|
405 |
* max(1,m).
|
406 |
*
|
407 |
* ---------------------------------------------------------------------
|
408 |
*/
|
409 |
#ifdef HPL_CALL_CBLAS
|
410 |
cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, |
411 |
BETA, C, LDC ); |
412 |
#endif
|
413 |
#ifdef HPL_CALL_GSLCBLAS
|
414 |
cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, |
415 |
BETA, C, LDC ); |
416 |
#endif
|
417 |
#ifdef HPL_CALL_VSIPL
|
418 |
if( ORDER == HplColumnMajor )
|
419 |
{ |
420 |
HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, |
421 |
C, LDC ); |
422 |
} |
423 |
else
|
424 |
{ |
425 |
HPL_dgemm0( TRANSB, TRANSA, N, M, K, ALPHA, B, LDB, A, LDA, BETA, |
426 |
C, LDC ); |
427 |
} |
428 |
#endif
|
429 |
|
430 |
#ifdef HPL_CALL_FBLAS
|
431 |
double alpha = ALPHA, beta = BETA;
|
432 |
#ifdef StringSunStyle
|
433 |
#ifdef HPL_USE_F77_INTEGER_DEF
|
434 |
F77_INTEGER IONE = 1;
|
435 |
#else
|
436 |
int IONE = 1; |
437 |
#endif
|
438 |
#endif
|
439 |
#ifdef StringStructVal
|
440 |
F77_CHAR ftransa; |
441 |
F77_CHAR ftransb; |
442 |
#endif
|
443 |
#ifdef StringStructPtr
|
444 |
F77_CHAR ftransa; |
445 |
F77_CHAR ftransb; |
446 |
#endif
|
447 |
#ifdef StringCrayStyle
|
448 |
F77_CHAR ftransa; |
449 |
F77_CHAR ftransb; |
450 |
#endif
|
451 |
#ifdef HPL_USE_F77_INTEGER_DEF
|
452 |
const F77_INTEGER F77M = M, F77N = N, F77K = K,
|
453 |
F77lda = LDA, F77ldb = LDB, F77ldc = LDC; |
454 |
#else
|
455 |
#define F77M M
|
456 |
#define F77N N
|
457 |
#define F77K K
|
458 |
#define F77lda LDA
|
459 |
#define F77ldb LDB
|
460 |
#define F77ldc LDC
|
461 |
#endif
|
462 |
char ctransa, ctransb;
|
463 |
|
464 |
if( TRANSA == HplNoTrans ) ctransa = 'N'; |
465 |
else if( TRANSA == HplTrans ) ctransa = 'T'; |
466 |
else ctransa = 'C'; |
467 |
|
468 |
if( TRANSB == HplNoTrans ) ctransb = 'N'; |
469 |
else if( TRANSB == HplTrans ) ctransb = 'T'; |
470 |
else ctransb = 'C'; |
471 |
|
472 |
if( ORDER == HplColumnMajor )
|
473 |
{ |
474 |
#ifdef StringSunStyle
|
475 |
F77dgemm( &ctransa, &ctransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, |
476 |
B, &F77ldb, &beta, C, &F77ldc, IONE, IONE ); |
477 |
#endif
|
478 |
#ifdef StringCrayStyle
|
479 |
ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb ); |
480 |
F77dgemm( ftransa, ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, |
481 |
B, &F77ldb, &beta, C, &F77ldc ); |
482 |
#endif
|
483 |
#ifdef StringStructVal
|
484 |
ftransa.len = 1; ftransa.cp = &ctransa;
|
485 |
ftransb.len = 1; ftransb.cp = &ctransb;
|
486 |
F77dgemm( ftransa, ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, |
487 |
B, &F77ldb, &beta, C, &F77ldc ); |
488 |
#endif
|
489 |
#ifdef StringStructPtr
|
490 |
ftransa.len = 1; ftransa.cp = &ctransa;
|
491 |
ftransb.len = 1; ftransb.cp = &ctransb;
|
492 |
F77dgemm( &ftransa, &ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, |
493 |
B, &F77ldb, &beta, C, &F77ldc ); |
494 |
#endif
|
495 |
} |
496 |
else
|
497 |
{ |
498 |
#ifdef StringSunStyle
|
499 |
F77dgemm( &ctransb, &ctransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, |
500 |
A, &F77lda, &beta, C, &F77ldc, IONE, IONE ); |
501 |
#endif
|
502 |
#ifdef StringCrayStyle
|
503 |
ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb ); |
504 |
F77dgemm( ftransb, ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, |
505 |
A, &F77lda, &beta, C, &F77ldc ); |
506 |
#endif
|
507 |
#ifdef StringStructVal
|
508 |
ftransa.len = 1; ftransa.cp = &ctransa;
|
509 |
ftransb.len = 1; ftransb.cp = &ctransb;
|
510 |
F77dgemm( ftransb, ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, |
511 |
A, &F77lda, &beta, C, &F77ldc ); |
512 |
#endif
|
513 |
#ifdef StringStructPtr
|
514 |
ftransa.len = 1; ftransa.cp = &ctransa;
|
515 |
ftransb.len = 1; ftransb.cp = &ctransb;
|
516 |
F77dgemm( &ftransb, &ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, |
517 |
A, &F77lda, &beta, C, &F77ldc ); |
518 |
#endif
|
519 |
} |
520 |
#endif
|
521 |
|
522 |
#ifdef HPL_CALL_CUBLAS
|
523 |
double alpha = ALPHA, beta = BETA;
|
524 |
|
525 |
int IONE = 1; |
526 |
|
527 |
#define CUBLASM M
|
528 |
#define CUBLASN N
|
529 |
#define CUBLASK K
|
530 |
#define CUBLASlda LDA
|
531 |
#define CUBLASldb LDB
|
532 |
#define CUBLASldc LDC
|
533 |
|
534 |
char ctransa, ctransb;
|
535 |
|
536 |
if( TRANSA == HplNoTrans ) ctransa = 'N'; |
537 |
else if( TRANSA == HplTrans ) ctransa = 'T'; |
538 |
else ctransa = 'C'; |
539 |
|
540 |
if( TRANSB == HplNoTrans ) ctransb = 'N'; |
541 |
else if( TRANSB == HplTrans ) ctransb = 'T'; |
542 |
else ctransb = 'C'; |
543 |
|
544 |
if( ORDER == HplColumnMajor )
|
545 |
{ |
546 |
CUBLAS_DGEMM( &ctransa, &ctransb, &CUBLASM, &CUBLASN, &CUBLASK, |
547 |
&alpha, A, &CUBLASlda, B, &CUBLASldb, &beta, C, &CUBLASldc, |
548 |
&IONE, &IONE ); |
549 |
} |
550 |
else
|
551 |
{ |
552 |
CUBLAS_DGEMM( &ctransb, &ctransa, &CUBLASN, &CUBLASM, &CUBLASK, |
553 |
&alpha, B, &CUBLASldb, A, &CUBLASlda, &beta, C, &CUBLASldc, |
554 |
&IONE, &IONE ); |
555 |
} |
556 |
#endif
|
557 |
|
558 |
#ifdef HPL_CALL_ACML
|
559 |
double alpha = ALPHA, beta = BETA;
|
560 |
|
561 |
int IONE = 1; |
562 |
|
563 |
#define ACMLM M
|
564 |
#define ACMLN N
|
565 |
#define ACMLK K
|
566 |
#define ACMLlda LDA
|
567 |
#define ACMLldb LDB
|
568 |
#define ACMLldc LDC
|
569 |
|
570 |
char ctransa, ctransb;
|
571 |
|
572 |
if( TRANSA == HplNoTrans ) ctransa = 'N'; |
573 |
else if( TRANSA == HplTrans ) ctransa = 'T'; |
574 |
else ctransa = 'C'; |
575 |
|
576 |
if( TRANSB == HplNoTrans ) ctransb = 'N'; |
577 |
else if( TRANSB == HplTrans ) ctransb = 'T'; |
578 |
else ctransb = 'C'; |
579 |
|
580 |
if( ORDER == HplColumnMajor )
|
581 |
{ |
582 |
dgemm_( &ctransa, &ctransb, &ACMLM, &ACMLN, &ACMLK, |
583 |
&alpha, A, &ACMLlda, B, &ACMLldb, &beta, C, &ACMLldc, |
584 |
&IONE, &IONE ); |
585 |
} |
586 |
else
|
587 |
{ |
588 |
dgemm_( &ctransb, &ctransa, &ACMLN, &ACMLM, &ACMLK, |
589 |
&alpha, B, &ACMLldb, A, &ACMLlda, &beta, C, &ACMLldc, |
590 |
&IONE, &IONE ); |
591 |
} |
592 |
#endif
|
593 |
/*
|
594 |
* End of HPL_dgemm
|
595 |
*/
|
596 |
} |
597 |
|
598 |
#endif
|