Statistiques
| Révision :

root / src / blas / HPL_dgemm.c

Historique | Voir | Annoter | Télécharger (21,2 ko)

1 1 equemene
/*
2 1 equemene
 * -- High Performance Computing Linpack Benchmark (HPL)
3 1 equemene
 *    HPL - 2.0 - September 10, 2008
4 1 equemene
 *    Antoine P. Petitet
5 1 equemene
 *    University of Tennessee, Knoxville
6 1 equemene
 *    Innovative Computing Laboratory
7 1 equemene
 *    (C) Copyright 2000-2008 All Rights Reserved
8 1 equemene
 *
9 1 equemene
 * -- Copyright notice and Licensing terms:
10 1 equemene
 *
11 1 equemene
 * Redistribution  and  use in  source and binary forms, with or without
12 1 equemene
 * modification, are  permitted provided  that the following  conditions
13 1 equemene
 * are met:
14 1 equemene
 *
15 1 equemene
 * 1. Redistributions  of  source  code  must retain the above copyright
16 1 equemene
 * notice, this list of conditions and the following disclaimer.
17 1 equemene
 *
18 1 equemene
 * 2. Redistributions in binary form must reproduce  the above copyright
19 1 equemene
 * notice, this list of conditions,  and the following disclaimer in the
20 1 equemene
 * documentation and/or other materials provided with the distribution.
21 1 equemene
 *
22 1 equemene
 * 3. All  advertising  materials  mentioning  features  or  use of this
23 1 equemene
 * software must display the following acknowledgement:
24 1 equemene
 * This  product  includes  software  developed  at  the  University  of
25 1 equemene
 * Tennessee, Knoxville, Innovative Computing Laboratory.
26 1 equemene
 *
27 1 equemene
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28 1 equemene
 * names  of  its  contributors  may  not  be used to endorse or promote
29 1 equemene
 * products  derived   from   this  software  without  specific  written
30 1 equemene
 * permission.
31 1 equemene
 *
32 1 equemene
 * -- Disclaimer:
33 1 equemene
 *
34 1 equemene
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 1 equemene
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36 1 equemene
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 1 equemene
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38 1 equemene
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39 1 equemene
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40 1 equemene
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 1 equemene
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42 1 equemene
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43 1 equemene
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 1 equemene
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 1 equemene
 * ---------------------------------------------------------------------
46 1 equemene
 */
47 1 equemene
/*
48 1 equemene
 * Include files
49 1 equemene
 */
50 1 equemene
#include "hpl.h"
51 1 equemene
52 1 equemene
#ifndef HPL_dgemm
53 1 equemene
54 1 equemene
#ifdef HPL_CALL_VSIPL
55 1 equemene
56 1 equemene
#ifdef STDC_HEADERS
57 1 equemene
static void HPL_dgemmNN
58 1 equemene
(
59 1 equemene
   const int                  M,
60 1 equemene
   const int                  N,
61 1 equemene
   const int                  K,
62 1 equemene
   const double               ALPHA,
63 1 equemene
   const double               * A,
64 1 equemene
   const int                  LDA,
65 1 equemene
   const double               * B,
66 1 equemene
   const int                  LDB,
67 1 equemene
   const double               BETA,
68 1 equemene
   double                     * C,
69 1 equemene
   const int                  LDC
70 1 equemene
)
71 1 equemene
#else
72 1 equemene
static void HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
73 1 equemene
   const int                  K, LDA, LDB, LDC, M, N;
74 1 equemene
   const double               ALPHA, BETA;
75 1 equemene
   const double               * A, * B;
76 1 equemene
   double                     * C;
77 1 equemene
#endif
78 1 equemene
{
79 1 equemene
   register double            t0;
80 1 equemene
   int                        i, iail, iblj, icij, j, jal, jbj, jcj, l;
81 1 equemene
82 1 equemene
   for( j = 0, jbj = 0, jcj  = 0; j < N; j++, jbj += LDB, jcj += LDC )
83 1 equemene
   {
84 1 equemene
      HPL_dscal( M, BETA, C+jcj, 1 );
85 1 equemene
      for( l = 0, jal = 0, iblj = jbj; l < K; l++, jal += LDA, iblj += 1 )
86 1 equemene
      {
87 1 equemene
         t0 = ALPHA * B[iblj];
88 1 equemene
         for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 )
89 1 equemene
         { C[icij] += A[iail] * t0; }
90 1 equemene
      }
91 1 equemene
   }
92 1 equemene
}
93 1 equemene
94 1 equemene
#ifdef STDC_HEADERS
95 1 equemene
static void HPL_dgemmNT
96 1 equemene
(
97 1 equemene
   const int                  M,
98 1 equemene
   const int                  N,
99 1 equemene
   const int                  K,
100 1 equemene
   const double               ALPHA,
101 1 equemene
   const double               * A,
102 1 equemene
   const int                  LDA,
103 1 equemene
   const double               * B,
104 1 equemene
   const int                  LDB,
105 1 equemene
   const double               BETA,
106 1 equemene
   double                     * C,
107 1 equemene
   const int                  LDC
108 1 equemene
)
109 1 equemene
#else
110 1 equemene
static void HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
111 1 equemene
   const int                  K, LDA, LDB, LDC, M, N;
112 1 equemene
   const double               ALPHA, BETA;
113 1 equemene
   const double               * A, * B;
114 1 equemene
   double                     * C;
115 1 equemene
#endif
116 1 equemene
{
117 1 equemene
   register double            t0;
118 1 equemene
   int                        i, iail, ibj, ibjl, icij, j, jal, jcj, l;
119 1 equemene
120 1 equemene
   for( j = 0, ibj  = 0, jcj  = 0; j < N; j++, ibj += 1, jcj += LDC )
121 1 equemene
   {
122 1 equemene
      HPL_dscal( M, BETA, C+jcj, 1 );
123 1 equemene
      for( l = 0, jal = 0, ibjl = ibj; l < K; l++, jal += LDA, ibjl += LDB )
124 1 equemene
      {
125 1 equemene
         t0 = ALPHA * B[ibjl];
126 1 equemene
         for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 )
127 1 equemene
         { C[icij] += A[iail] * t0; }
128 1 equemene
      }
129 1 equemene
   }
130 1 equemene
}
131 1 equemene
132 1 equemene
#ifdef STDC_HEADERS
133 1 equemene
static void HPL_dgemmTN
134 1 equemene
(
135 1 equemene
   const int                  M,
136 1 equemene
   const int                  N,
137 1 equemene
   const int                  K,
138 1 equemene
   const double               ALPHA,
139 1 equemene
   const double               * A,
140 1 equemene
   const int                  LDA,
141 1 equemene
   const double               * B,
142 1 equemene
   const int                  LDB,
143 1 equemene
   const double               BETA,
144 1 equemene
   double                     * C,
145 1 equemene
   const int                  LDC
146 1 equemene
)
147 1 equemene
#else
148 1 equemene
static void HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
149 1 equemene
   const int                  K, LDA, LDB, LDC, M, N;
150 1 equemene
   const double               ALPHA, BETA;
151 1 equemene
   const double               * A, * B;
152 1 equemene
   double                     * C;
153 1 equemene
#endif
154 1 equemene
{
155 1 equemene
   register double            t0;
156 1 equemene
   int                        i, iai, iail, iblj, icij, j, jbj, jcj, l;
157 1 equemene
158 1 equemene
   for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC )
159 1 equemene
   {
160 1 equemene
      for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA )
161 1 equemene
      {
162 1 equemene
         t0 = HPL_rzero;
163 1 equemene
         for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 )
164 1 equemene
         { t0 += A[iail] * B[iblj]; }
165 1 equemene
         if( BETA == HPL_rzero ) C[icij]  = HPL_rzero;
166 1 equemene
         else                    C[icij] *= BETA;
167 1 equemene
         C[icij] += ALPHA * t0;
168 1 equemene
      }
169 1 equemene
   }
170 1 equemene
}
171 1 equemene
172 1 equemene
#ifdef STDC_HEADERS
173 1 equemene
static void HPL_dgemmTT
174 1 equemene
(
175 1 equemene
   const int                  M,
176 1 equemene
   const int                  N,
177 1 equemene
   const int                  K,
178 1 equemene
   const double               ALPHA,
179 1 equemene
   const double               * A,
180 1 equemene
   const int                  LDA,
181 1 equemene
   const double               * B,
182 1 equemene
   const int                  LDB,
183 1 equemene
   const double               BETA,
184 1 equemene
   double                     * C,
185 1 equemene
   const int                  LDC
186 1 equemene
)
187 1 equemene
#else
188 1 equemene
static void HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
189 1 equemene
   const int                  K, LDA, LDB, LDC, M, N;
190 1 equemene
   const double               ALPHA, BETA;
191 1 equemene
   const double               * A, * B;
192 1 equemene
   double                     * C;
193 1 equemene
#endif
194 1 equemene
{
195 1 equemene
   register double            t0;
196 1 equemene
   int                        i, iali, ibj, ibjl, icij, j, jai, jcj, l;
197 1 equemene
198 1 equemene
   for( j = 0, ibj = 0, jcj  = 0; j < N; j++, ibj += 1, jcj += LDC )
199 1 equemene
   {
200 1 equemene
      for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 1, jai += LDA )
201 1 equemene
      {
202 1 equemene
         t0 = HPL_rzero;
203 1 equemene
         for( l = 0,      iali  = jai, ibjl  = ibj;
204 1 equemene
              l < K; l++, iali += 1,   ibjl += LDB ) t0 += A[iali] * B[ibjl];
205 1 equemene
         if( BETA == HPL_rzero ) C[icij]  = HPL_rzero;
206 1 equemene
         else                    C[icij] *= BETA;
207 1 equemene
         C[icij] += ALPHA * t0;
208 1 equemene
      }
209 1 equemene
   }
210 1 equemene
}
211 1 equemene
212 1 equemene
#ifdef STDC_HEADERS
213 1 equemene
static void HPL_dgemm0
214 1 equemene
(
215 1 equemene
   const enum HPL_TRANS       TRANSA,
216 1 equemene
   const enum HPL_TRANS       TRANSB,
217 1 equemene
   const int                  M,
218 1 equemene
   const int                  N,
219 1 equemene
   const int                  K,
220 1 equemene
   const double               ALPHA,
221 1 equemene
   const double               * A,
222 1 equemene
   const int                  LDA,
223 1 equemene
   const double               * B,
224 1 equemene
   const int                  LDB,
225 1 equemene
   const double               BETA,
226 1 equemene
   double                     * C,
227 1 equemene
   const int                  LDC
228 1 equemene
)
229 1 equemene
#else
230 1 equemene
static void HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
231 1 equemene
                        BETA, C, LDC )
232 1 equemene
   const enum HPL_TRANS       TRANSA, TRANSB;
233 1 equemene
   const int                  K, LDA, LDB, LDC, M, N;
234 1 equemene
   const double               ALPHA, BETA;
235 1 equemene
   const double               * A, * B;
236 1 equemene
   double                     * C;
237 1 equemene
#endif
238 1 equemene
{
239 1 equemene
   int                        i, j;
240 1 equemene
241 1 equemene
   if( ( M == 0 ) || ( N == 0 ) ||
242 1 equemene
       ( ( ( ALPHA == HPL_rzero ) || ( K == 0 ) ) &&
243 1 equemene
         ( BETA == HPL_rone ) ) ) return;
244 1 equemene
245 1 equemene
   if( ALPHA == HPL_rzero )
246 1 equemene
   {
247 1 equemene
      for( j = 0; j < N; j++ )
248 1 equemene
      {  for( i = 0; i < M; i++ ) *(C+i+j*LDC) = HPL_rzero; }
249 1 equemene
      return;
250 1 equemene
   }
251 1 equemene
252 1 equemene
   if( TRANSB == HplNoTrans )
253 1 equemene
   {
254 1 equemene
      if( TRANSA == HplNoTrans )
255 1 equemene
      { HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
256 1 equemene
      else
257 1 equemene
      { HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
258 1 equemene
   }
259 1 equemene
   else
260 1 equemene
   {
261 1 equemene
      if( TRANSA == HplNoTrans )
262 1 equemene
      { HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
263 1 equemene
      else
264 1 equemene
      { HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
265 1 equemene
   }
266 1 equemene
}
267 1 equemene
268 1 equemene
#endif
269 1 equemene
270 1 equemene
#ifdef STDC_HEADERS
271 1 equemene
void HPL_dgemm
272 1 equemene
(
273 1 equemene
   const enum HPL_ORDER             ORDER,
274 1 equemene
   const enum HPL_TRANS             TRANSA,
275 1 equemene
   const enum HPL_TRANS             TRANSB,
276 1 equemene
   const int                        M,
277 1 equemene
   const int                        N,
278 1 equemene
   const int                        K,
279 1 equemene
   const double                     ALPHA,
280 1 equemene
   const double *                   A,
281 1 equemene
   const int                        LDA,
282 1 equemene
   const double *                   B,
283 1 equemene
   const int                        LDB,
284 1 equemene
   const double                     BETA,
285 1 equemene
   double *                         C,
286 1 equemene
   const int                        LDC
287 1 equemene
)
288 1 equemene
#else
289 1 equemene
void HPL_dgemm
290 1 equemene
( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
291 1 equemene
   const enum HPL_ORDER             ORDER;
292 1 equemene
   const enum HPL_TRANS             TRANSA;
293 1 equemene
   const enum HPL_TRANS             TRANSB;
294 1 equemene
   const int                        M;
295 1 equemene
   const int                        N;
296 1 equemene
   const int                        K;
297 1 equemene
   const double                     ALPHA;
298 1 equemene
   const double *                   A;
299 1 equemene
   const int                        LDA;
300 1 equemene
   const double *                   B;
301 1 equemene
   const int                        LDB;
302 1 equemene
   const double                     BETA;
303 1 equemene
   double *                         C;
304 1 equemene
   const int                        LDC;
305 1 equemene
#endif
306 1 equemene
{
307 1 equemene
/*
308 1 equemene
 * Purpose
309 1 equemene
 * =======
310 1 equemene
 *
311 1 equemene
 * HPL_dgemm performs one of the matrix-matrix operations
312 1 equemene
 *
313 1 equemene
 *     C := alpha * op( A ) * op( B ) + beta * C
314 1 equemene
 *
315 1 equemene
 *  where op( X ) is one of
316 1 equemene
 *
317 1 equemene
 *     op( X ) = X   or   op( X ) = X^T.
318 1 equemene
 *
319 1 equemene
 * Alpha and beta are scalars,  and A,  B and C are matrices, with op(A)
320 1 equemene
 * an m by k matrix, op(B) a k by n matrix and  C an m by n matrix.
321 1 equemene
 *
322 1 equemene
 * Arguments
323 1 equemene
 * =========
324 1 equemene
 *
325 1 equemene
 * ORDER   (local input)                 const enum HPL_ORDER
326 1 equemene
 *         On entry, ORDER  specifies the storage format of the operands
327 1 equemene
 *         as follows:
328 1 equemene
 *            ORDER = HplRowMajor,
329 1 equemene
 *            ORDER = HplColumnMajor.
330 1 equemene
 *
331 1 equemene
 * TRANSA  (local input)                 const enum HPL_TRANS
332 1 equemene
 *         On entry, TRANSA  specifies the form of  op(A)  to be used in
333 1 equemene
 *         the matrix-matrix operation follows:
334 1 equemene
 *            TRANSA==HplNoTrans    : op( A ) = A,
335 1 equemene
 *            TRANSA==HplTrans      : op( A ) = A^T,
336 1 equemene
 *            TRANSA==HplConjTrans  : op( A ) = A^T.
337 1 equemene
 *
338 1 equemene
 * TRANSB  (local input)                 const enum HPL_TRANS
339 1 equemene
 *         On entry, TRANSB  specifies the form of  op(B)  to be used in
340 1 equemene
 *         the matrix-matrix operation follows:
341 1 equemene
 *            TRANSB==HplNoTrans    : op( B ) = B,
342 1 equemene
 *            TRANSB==HplTrans      : op( B ) = B^T,
343 1 equemene
 *            TRANSB==HplConjTrans  : op( B ) = B^T.
344 1 equemene
 *
345 1 equemene
 * M       (local input)                 const int
346 1 equemene
 *         On entry,  M  specifies  the  number  of rows  of the  matrix
347 1 equemene
 *         op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
348 1 equemene
 *
349 1 equemene
 * N       (local input)                 const int
350 1 equemene
 *         On entry,  N  specifies  the number  of columns of the matrix
351 1 equemene
 *         op(B)  and  the number of columns of the matrix  C. N must be
352 1 equemene
 *         at least zero.
353 1 equemene
 *
354 1 equemene
 * K       (local input)                 const int
355 1 equemene
 *         On entry,  K  specifies  the  number of columns of the matrix
356 1 equemene
 *         op(A) and the number of rows of the matrix op(B).  K  must be
357 1 equemene
 *         be at least  zero.
358 1 equemene
 *
359 1 equemene
 * ALPHA   (local input)                 const double
360 1 equemene
 *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
361 1 equemene
 *         supplied  as  zero  then the elements of the matrices A and B
362 1 equemene
 *         need not be set on input.
363 1 equemene
 *
364 1 equemene
 * A       (local input)                 const double *
365 1 equemene
 *         On entry,  A  is an array of dimension (LDA,ka),  where ka is
366 1 equemene
 *         k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
367 1 equemene
 *         entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
368 1 equemene
 *         the array  A must contain the matrix A, otherwise the leading
369 1 equemene
 *         k  by  m  part of the array  A  must  contain the  matrix  A.
370 1 equemene
 *
371 1 equemene
 * LDA     (local input)                 const int
372 1 equemene
 *         On entry, LDA  specifies the first dimension of A as declared
373 1 equemene
 *         in the  calling (sub) program. When  TRANSA==HplNoTrans  then
374 1 equemene
 *         LDA must be at least max(1,m), otherwise LDA must be at least
375 1 equemene
 *         max(1,k).
376 1 equemene
 *
377 1 equemene
 * B       (local input)                 const double *
378 1 equemene
 *         On entry, B is an array of dimension (LDB,kb),  where  kb  is
379 1 equemene
 *         n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
380 1 equemene
 *         entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
381 1 equemene
 *         the array  B must contain the matrix B, otherwise the leading
382 1 equemene
 *         n  by  k  part of the array  B  must  contain  the matrix  B.
383 1 equemene
 *
384 1 equemene
 * LDB     (local input)                 const int
385 1 equemene
 *         On entry, LDB  specifies the first dimension of B as declared
386 1 equemene
 *         in the  calling (sub) program. When  TRANSB==HplNoTrans  then
387 1 equemene
 *         LDB must be at least max(1,k), otherwise LDB must be at least
388 1 equemene
 *         max(1,n).
389 1 equemene
 *
390 1 equemene
 * BETA    (local input)                 const double
391 1 equemene
 *         On entry,  BETA  specifies the scalar  beta.   When  BETA  is
392 1 equemene
 *         supplied  as  zero  then  the  elements of the matrix C  need
393 1 equemene
 *         not be set on input.
394 1 equemene
 *
395 1 equemene
 * C       (local input/output)          double *
396 1 equemene
 *         On entry,  C  is an array of dimension (LDC,n). Before entry,
397 1 equemene
 *         the  leading m by n part  of  the  array  C  must contain the
398 1 equemene
 *         matrix C,  except when beta is zero, in which case C need not
399 1 equemene
 *         be set on entry. On exit, the array  C  is overwritten by the
400 1 equemene
 *         m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
401 1 equemene
 *
402 1 equemene
 * LDC     (local input)                 const int
403 1 equemene
 *         On entry, LDC  specifies the first dimension of C as declared
404 1 equemene
 *         in  the   calling  (sub)  program.   LDC  must  be  at  least
405 1 equemene
 *         max(1,m).
406 1 equemene
 *
407 1 equemene
 * ---------------------------------------------------------------------
408 1 equemene
 */
409 1 equemene
#ifdef HPL_CALL_CBLAS
410 1 equemene
   cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
411 1 equemene
                BETA, C, LDC );
412 1 equemene
#endif
413 12 equemene
#ifdef HPL_CALL_GSLCBLAS
414 12 equemene
   cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
415 12 equemene
                BETA, C, LDC );
416 12 equemene
#endif
417 1 equemene
#ifdef HPL_CALL_VSIPL
418 1 equemene
   if( ORDER == HplColumnMajor )
419 1 equemene
   {
420 1 equemene
      HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA,
421 1 equemene
                  C, LDC );
422 1 equemene
   }
423 1 equemene
   else
424 1 equemene
   {
425 1 equemene
      HPL_dgemm0( TRANSB, TRANSA, N, M, K, ALPHA, B, LDB, A, LDA, BETA,
426 1 equemene
                  C, LDC );
427 1 equemene
   }
428 1 equemene
#endif
429 12 equemene
430 1 equemene
#ifdef HPL_CALL_FBLAS
431 1 equemene
   double                    alpha = ALPHA, beta = BETA;
432 1 equemene
#ifdef StringSunStyle
433 1 equemene
#ifdef HPL_USE_F77_INTEGER_DEF
434 1 equemene
   F77_INTEGER               IONE = 1;
435 1 equemene
#else
436 1 equemene
   int                       IONE = 1;
437 1 equemene
#endif
438 1 equemene
#endif
439 1 equemene
#ifdef StringStructVal
440 1 equemene
   F77_CHAR                  ftransa;
441 1 equemene
   F77_CHAR                  ftransb;
442 1 equemene
#endif
443 1 equemene
#ifdef StringStructPtr
444 1 equemene
   F77_CHAR                  ftransa;
445 1 equemene
   F77_CHAR                  ftransb;
446 1 equemene
#endif
447 1 equemene
#ifdef StringCrayStyle
448 1 equemene
   F77_CHAR                  ftransa;
449 1 equemene
   F77_CHAR                  ftransb;
450 1 equemene
#endif
451 1 equemene
#ifdef HPL_USE_F77_INTEGER_DEF
452 1 equemene
   const F77_INTEGER         F77M   = M,   F77N   = N,   F77K = K,
453 1 equemene
                             F77lda = LDA, F77ldb = LDB, F77ldc = LDC;
454 1 equemene
#else
455 1 equemene
#define F77M                 M
456 1 equemene
#define F77N                 N
457 1 equemene
#define F77K                 K
458 1 equemene
#define F77lda               LDA
459 1 equemene
#define F77ldb               LDB
460 1 equemene
#define F77ldc               LDC
461 1 equemene
#endif
462 1 equemene
   char                      ctransa, ctransb;
463 1 equemene
464 1 equemene
   if(      TRANSA == HplNoTrans ) ctransa = 'N';
465 1 equemene
   else if( TRANSA == HplTrans   ) ctransa = 'T';
466 1 equemene
   else                            ctransa = 'C';
467 1 equemene
468 1 equemene
   if(      TRANSB == HplNoTrans ) ctransb = 'N';
469 1 equemene
   else if( TRANSB == HplTrans   ) ctransb = 'T';
470 1 equemene
   else                            ctransb = 'C';
471 1 equemene
472 1 equemene
   if( ORDER == HplColumnMajor )
473 1 equemene
   {
474 1 equemene
#ifdef StringSunStyle
475 1 equemene
      F77dgemm( &ctransa, &ctransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda,
476 1 equemene
                B, &F77ldb, &beta, C, &F77ldc, IONE, IONE );
477 1 equemene
#endif
478 1 equemene
#ifdef StringCrayStyle
479 1 equemene
      ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb );
480 1 equemene
      F77dgemm( ftransa,  ftransb,  &F77M, &F77N, &F77K, &alpha, A, &F77lda,
481 1 equemene
                B, &F77ldb, &beta, C, &F77ldc );
482 1 equemene
#endif
483 1 equemene
#ifdef StringStructVal
484 1 equemene
      ftransa.len = 1; ftransa.cp = &ctransa;
485 1 equemene
      ftransb.len = 1; ftransb.cp = &ctransb;
486 1 equemene
      F77dgemm( ftransa,  ftransb,  &F77M, &F77N, &F77K, &alpha, A, &F77lda,
487 1 equemene
                B, &F77ldb, &beta, C, &F77ldc );
488 1 equemene
#endif
489 1 equemene
#ifdef StringStructPtr
490 1 equemene
      ftransa.len = 1; ftransa.cp = &ctransa;
491 1 equemene
      ftransb.len = 1; ftransb.cp = &ctransb;
492 1 equemene
      F77dgemm( &ftransa, &ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda,
493 1 equemene
                B, &F77ldb, &beta, C, &F77ldc );
494 1 equemene
#endif
495 1 equemene
   }
496 1 equemene
   else
497 1 equemene
   {
498 1 equemene
#ifdef StringSunStyle
499 1 equemene
      F77dgemm( &ctransb, &ctransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
500 1 equemene
                A, &F77lda, &beta, C, &F77ldc, IONE, IONE );
501 1 equemene
#endif
502 1 equemene
#ifdef StringCrayStyle
503 1 equemene
      ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb );
504 1 equemene
      F77dgemm( ftransb,  ftransa,  &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
505 1 equemene
                A, &F77lda, &beta, C, &F77ldc );
506 1 equemene
#endif
507 1 equemene
#ifdef StringStructVal
508 1 equemene
      ftransa.len = 1; ftransa.cp = &ctransa;
509 1 equemene
      ftransb.len = 1; ftransb.cp = &ctransb;
510 1 equemene
      F77dgemm( ftransb,  ftransa,  &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
511 1 equemene
                A, &F77lda, &beta, C, &F77ldc );
512 1 equemene
#endif
513 1 equemene
#ifdef StringStructPtr
514 1 equemene
      ftransa.len = 1; ftransa.cp = &ctransa;
515 1 equemene
      ftransb.len = 1; ftransb.cp = &ctransb;
516 1 equemene
      F77dgemm( &ftransb, &ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
517 1 equemene
                A, &F77lda, &beta, C, &F77ldc );
518 1 equemene
#endif
519 1 equemene
   }
520 1 equemene
#endif
521 9 equemene
522 9 equemene
#ifdef HPL_CALL_CUBLAS
523 9 equemene
   double                    alpha = ALPHA, beta = BETA;
524 9 equemene
525 9 equemene
   int                       IONE = 1;
526 9 equemene
527 9 equemene
#define CUBLASM                 M
528 9 equemene
#define CUBLASN                 N
529 9 equemene
#define CUBLASK                 K
530 9 equemene
#define CUBLASlda               LDA
531 9 equemene
#define CUBLASldb               LDB
532 9 equemene
#define CUBLASldc               LDC
533 9 equemene
534 9 equemene
   char                      ctransa, ctransb;
535 9 equemene
536 9 equemene
   if(      TRANSA == HplNoTrans ) ctransa = 'N';
537 9 equemene
   else if( TRANSA == HplTrans   ) ctransa = 'T';
538 9 equemene
   else                            ctransa = 'C';
539 9 equemene
540 9 equemene
   if(      TRANSB == HplNoTrans ) ctransb = 'N';
541 9 equemene
   else if( TRANSB == HplTrans   ) ctransb = 'T';
542 9 equemene
   else                            ctransb = 'C';
543 9 equemene
544 9 equemene
   if( ORDER == HplColumnMajor )
545 9 equemene
   {
546 9 equemene
      CUBLAS_DGEMM( &ctransa, &ctransb, &CUBLASM, &CUBLASN, &CUBLASK,
547 9 equemene
                    &alpha, A, &CUBLASlda, B, &CUBLASldb, &beta, C, &CUBLASldc,
548 9 equemene
                    &IONE, &IONE );
549 9 equemene
   }
550 9 equemene
   else
551 9 equemene
   {
552 9 equemene
      CUBLAS_DGEMM( &ctransb, &ctransa, &CUBLASN, &CUBLASM, &CUBLASK,
553 9 equemene
                    &alpha, B, &CUBLASldb, A, &CUBLASlda, &beta, C, &CUBLASldc,
554 9 equemene
                    &IONE, &IONE );
555 9 equemene
   }
556 9 equemene
#endif
557 10 equemene
558 10 equemene
#ifdef HPL_CALL_ACML
559 10 equemene
   double                    alpha = ALPHA, beta = BETA;
560 10 equemene
561 10 equemene
   int                       IONE = 1;
562 10 equemene
563 10 equemene
#define ACMLM                 M
564 10 equemene
#define ACMLN                 N
565 10 equemene
#define ACMLK                 K
566 10 equemene
#define ACMLlda               LDA
567 10 equemene
#define ACMLldb               LDB
568 10 equemene
#define ACMLldc               LDC
569 10 equemene
570 10 equemene
   char                      ctransa, ctransb;
571 10 equemene
572 10 equemene
   if(      TRANSA == HplNoTrans ) ctransa = 'N';
573 10 equemene
   else if( TRANSA == HplTrans   ) ctransa = 'T';
574 10 equemene
   else                            ctransa = 'C';
575 10 equemene
576 10 equemene
   if(      TRANSB == HplNoTrans ) ctransb = 'N';
577 10 equemene
   else if( TRANSB == HplTrans   ) ctransb = 'T';
578 10 equemene
   else                            ctransb = 'C';
579 10 equemene
580 10 equemene
   if( ORDER == HplColumnMajor )
581 10 equemene
   {
582 10 equemene
      dgemm_( &ctransa, &ctransb, &ACMLM, &ACMLN, &ACMLK,
583 10 equemene
              &alpha, A, &ACMLlda, B, &ACMLldb, &beta, C, &ACMLldc,
584 10 equemene
              &IONE, &IONE );
585 10 equemene
   }
586 10 equemene
   else
587 10 equemene
   {
588 10 equemene
      dgemm_( &ctransb, &ctransa, &ACMLN, &ACMLM, &ACMLK,
589 10 equemene
              &alpha, B, &ACMLldb, A, &ACMLlda, &beta, C, &ACMLldc,
590 10 equemene
              &IONE, &IONE );
591 10 equemene
   }
592 10 equemene
#endif
593 1 equemene
/*
594 1 equemene
 * End of HPL_dgemm
595 1 equemene
 */
596 1 equemene
}
597 1 equemene
598 1 equemene
#endif