Statistiques
| Révision :

root / src / blas / HPL_dgemm.c

Historique | Voir | Annoter | Télécharger (21,2 ko)

1
/* 
2
 * -- High Performance Computing Linpack Benchmark (HPL)                
3
 *    HPL - 2.0 - September 10, 2008                          
4
 *    Antoine P. Petitet                                                
5
 *    University of Tennessee, Knoxville                                
6
 *    Innovative Computing Laboratory                                 
7
 *    (C) Copyright 2000-2008 All Rights Reserved                       
8
 *                                                                      
9
 * -- Copyright notice and Licensing terms:                             
10
 *                                                                      
11
 * Redistribution  and  use in  source and binary forms, with or without
12
 * modification, are  permitted provided  that the following  conditions
13
 * are met:                                                             
14
 *                                                                      
15
 * 1. Redistributions  of  source  code  must retain the above copyright
16
 * notice, this list of conditions and the following disclaimer.        
17
 *                                                                      
18
 * 2. Redistributions in binary form must reproduce  the above copyright
19
 * notice, this list of conditions,  and the following disclaimer in the
20
 * documentation and/or other materials provided with the distribution. 
21
 *                                                                      
22
 * 3. All  advertising  materials  mentioning  features  or  use of this
23
 * software must display the following acknowledgement:                 
24
 * This  product  includes  software  developed  at  the  University  of
25
 * Tennessee, Knoxville, Innovative Computing Laboratory.             
26
 *                                                                      
27
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28
 * names  of  its  contributors  may  not  be used to endorse or promote
29
 * products  derived   from   this  software  without  specific  written
30
 * permission.                                                          
31
 *                                                                      
32
 * -- Disclaimer:                                                       
33
 *                                                                      
34
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
45
 * ---------------------------------------------------------------------
46
 */ 
47
/*
48
 * Include files
49
 */
50
#include "hpl.h"
51

    
52
#ifndef HPL_dgemm
53

    
54
#ifdef HPL_CALL_VSIPL
55

    
56
#ifdef STDC_HEADERS
57
static void HPL_dgemmNN
58
(
59
   const int                  M,
60
   const int                  N,
61
   const int                  K,
62
   const double               ALPHA,
63
   const double               * A,
64
   const int                  LDA,
65
   const double               * B,
66
   const int                  LDB,
67
   const double               BETA,
68
   double                     * C,
69
   const int                  LDC
70
)
71
#else
72
static void HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
73
   const int                  K, LDA, LDB, LDC, M, N;
74
   const double               ALPHA, BETA;
75
   const double               * A, * B;
76
   double                     * C;
77
#endif
78
{
79
   register double            t0;
80
   int                        i, iail, iblj, icij, j, jal, jbj, jcj, l;
81

    
82
   for( j = 0, jbj = 0, jcj  = 0; j < N; j++, jbj += LDB, jcj += LDC )
83
   {
84
      HPL_dscal( M, BETA, C+jcj, 1 );
85
      for( l = 0, jal = 0, iblj = jbj; l < K; l++, jal += LDA, iblj += 1 )
86
      {
87
         t0 = ALPHA * B[iblj];
88
         for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 )
89
         { C[icij] += A[iail] * t0; }
90
      }
91
   }
92
}
93

    
94
#ifdef STDC_HEADERS
95
static void HPL_dgemmNT
96
(
97
   const int                  M,
98
   const int                  N,
99
   const int                  K,
100
   const double               ALPHA,
101
   const double               * A,
102
   const int                  LDA,
103
   const double               * B,
104
   const int                  LDB,
105
   const double               BETA,
106
   double                     * C,
107
   const int                  LDC
108
)
109
#else
110
static void HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
111
   const int                  K, LDA, LDB, LDC, M, N;
112
   const double               ALPHA, BETA;
113
   const double               * A, * B;
114
   double                     * C;
115
#endif
116
{
117
   register double            t0;
118
   int                        i, iail, ibj, ibjl, icij, j, jal, jcj, l;
119

    
120
   for( j = 0, ibj  = 0, jcj  = 0; j < N; j++, ibj += 1, jcj += LDC )
121
   {
122
      HPL_dscal( M, BETA, C+jcj, 1 );
123
      for( l = 0, jal = 0, ibjl = ibj; l < K; l++, jal += LDA, ibjl += LDB )
124
      {
125
         t0 = ALPHA * B[ibjl];
126
         for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 )
127
         { C[icij] += A[iail] * t0; }
128
      }
129
   }
130
}
131

    
132
#ifdef STDC_HEADERS
133
static void HPL_dgemmTN
134
(
135
   const int                  M,
136
   const int                  N,
137
   const int                  K,
138
   const double               ALPHA,
139
   const double               * A,
140
   const int                  LDA,
141
   const double               * B,
142
   const int                  LDB,
143
   const double               BETA,
144
   double                     * C,
145
   const int                  LDC
146
)
147
#else
148
static void HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
149
   const int                  K, LDA, LDB, LDC, M, N;
150
   const double               ALPHA, BETA;
151
   const double               * A, * B;
152
   double                     * C;
153
#endif
154
{
155
   register double            t0;
156
   int                        i, iai, iail, iblj, icij, j, jbj, jcj, l;
157

    
158
   for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC )
159
   {
160
      for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA )
161
      {
162
         t0 = HPL_rzero;
163
         for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 )
164
         { t0 += A[iail] * B[iblj]; }
165
         if( BETA == HPL_rzero ) C[icij]  = HPL_rzero;
166
         else                    C[icij] *= BETA;
167
         C[icij] += ALPHA * t0;
168
      }
169
   }
170
}
171

    
172
#ifdef STDC_HEADERS
173
static void HPL_dgemmTT
174
(
175
   const int                  M,
176
   const int                  N,
177
   const int                  K,
178
   const double               ALPHA,
179
   const double               * A,
180
   const int                  LDA,
181
   const double               * B,
182
   const int                  LDB,
183
   const double               BETA,
184
   double                     * C,
185
   const int                  LDC
186
)
187
#else
188
static void HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
189
   const int                  K, LDA, LDB, LDC, M, N;
190
   const double               ALPHA, BETA;
191
   const double               * A, * B;
192
   double                     * C;
193
#endif
194
{
195
   register double            t0;
196
   int                        i, iali, ibj, ibjl, icij, j, jai, jcj, l;
197

    
198
   for( j = 0, ibj = 0, jcj  = 0; j < N; j++, ibj += 1, jcj += LDC )
199
   {
200
      for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 1, jai += LDA )
201
      {
202
         t0 = HPL_rzero;
203
         for( l = 0,      iali  = jai, ibjl  = ibj;
204
              l < K; l++, iali += 1,   ibjl += LDB ) t0 += A[iali] * B[ibjl];
205
         if( BETA == HPL_rzero ) C[icij]  = HPL_rzero;
206
         else                    C[icij] *= BETA;
207
         C[icij] += ALPHA * t0;
208
      }
209
   }
210
}
211

    
212
#ifdef STDC_HEADERS
213
static void HPL_dgemm0
214
(
215
   const enum HPL_TRANS       TRANSA,
216
   const enum HPL_TRANS       TRANSB,
217
   const int                  M,
218
   const int                  N,
219
   const int                  K,
220
   const double               ALPHA,
221
   const double               * A,
222
   const int                  LDA,
223
   const double               * B,
224
   const int                  LDB,
225
   const double               BETA,
226
   double                     * C,
227
   const int                  LDC
228
)
229
#else
230
static void HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
231
                        BETA, C, LDC )
232
   const enum HPL_TRANS       TRANSA, TRANSB;
233
   const int                  K, LDA, LDB, LDC, M, N;
234
   const double               ALPHA, BETA;
235
   const double               * A, * B;
236
   double                     * C;
237
#endif
238
{
239
   int                        i, j;
240

    
241
   if( ( M == 0 ) || ( N == 0 ) ||
242
       ( ( ( ALPHA == HPL_rzero ) || ( K == 0 ) ) &&
243
         ( BETA == HPL_rone ) ) ) return;
244

    
245
   if( ALPHA == HPL_rzero )
246
   {
247
      for( j = 0; j < N; j++ )
248
      {  for( i = 0; i < M; i++ ) *(C+i+j*LDC) = HPL_rzero; }
249
      return;
250
   }
251

    
252
   if( TRANSB == HplNoTrans )
253
   {
254
      if( TRANSA == HplNoTrans )
255
      { HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
256
      else
257
      { HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
258
   }
259
   else
260
   {
261
      if( TRANSA == HplNoTrans )
262
      { HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
263
      else
264
      { HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); }
265
   }
266
}
267

    
268
#endif
269

    
270
#ifdef STDC_HEADERS
271
void HPL_dgemm
272
(
273
   const enum HPL_ORDER             ORDER,
274
   const enum HPL_TRANS             TRANSA,
275
   const enum HPL_TRANS             TRANSB,
276
   const int                        M,
277
   const int                        N,
278
   const int                        K,
279
   const double                     ALPHA,
280
   const double *                   A,
281
   const int                        LDA,
282
   const double *                   B,
283
   const int                        LDB,
284
   const double                     BETA,
285
   double *                         C,
286
   const int                        LDC
287
)
288
#else
289
void HPL_dgemm
290
( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC )
291
   const enum HPL_ORDER             ORDER;
292
   const enum HPL_TRANS             TRANSA;
293
   const enum HPL_TRANS             TRANSB;
294
   const int                        M;
295
   const int                        N;
296
   const int                        K;
297
   const double                     ALPHA;
298
   const double *                   A;
299
   const int                        LDA;
300
   const double *                   B;
301
   const int                        LDB;
302
   const double                     BETA;
303
   double *                         C;
304
   const int                        LDC;
305
#endif
306
{
307
/* 
308
 * Purpose
309
 * =======
310
 *
311
 * HPL_dgemm performs one of the matrix-matrix operations
312
 *  
313
 *     C := alpha * op( A ) * op( B ) + beta * C
314
 *  
315
 *  where op( X ) is one of
316
 *  
317
 *     op( X ) = X   or   op( X ) = X^T.
318
 *  
319
 * Alpha and beta are scalars,  and A,  B and C are matrices, with op(A)
320
 * an m by k matrix, op(B) a k by n matrix and  C an m by n matrix.
321
 *
322
 * Arguments
323
 * =========
324
 *
325
 * ORDER   (local input)                 const enum HPL_ORDER
326
 *         On entry, ORDER  specifies the storage format of the operands
327
 *         as follows:                                                  
328
 *            ORDER = HplRowMajor,                                      
329
 *            ORDER = HplColumnMajor.                                   
330
 *
331
 * TRANSA  (local input)                 const enum HPL_TRANS
332
 *         On entry, TRANSA  specifies the form of  op(A)  to be used in
333
 *         the matrix-matrix operation follows:                         
334
 *            TRANSA==HplNoTrans    : op( A ) = A,                     
335
 *            TRANSA==HplTrans      : op( A ) = A^T,                   
336
 *            TRANSA==HplConjTrans  : op( A ) = A^T.                   
337
 *
338
 * TRANSB  (local input)                 const enum HPL_TRANS
339
 *         On entry, TRANSB  specifies the form of  op(B)  to be used in
340
 *         the matrix-matrix operation follows:                         
341
 *            TRANSB==HplNoTrans    : op( B ) = B,                     
342
 *            TRANSB==HplTrans      : op( B ) = B^T,                   
343
 *            TRANSB==HplConjTrans  : op( B ) = B^T.                   
344
 *
345
 * M       (local input)                 const int
346
 *         On entry,  M  specifies  the  number  of rows  of the  matrix
347
 *         op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
348
 *
349
 * N       (local input)                 const int
350
 *         On entry,  N  specifies  the number  of columns of the matrix
351
 *         op(B)  and  the number of columns of the matrix  C. N must be
352
 *         at least zero.
353
 *
354
 * K       (local input)                 const int
355
 *         On entry,  K  specifies  the  number of columns of the matrix
356
 *         op(A) and the number of rows of the matrix op(B).  K  must be
357
 *         be at least  zero.
358
 *
359
 * ALPHA   (local input)                 const double
360
 *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
361
 *         supplied  as  zero  then the elements of the matrices A and B
362
 *         need not be set on input.
363
 *
364
 * A       (local input)                 const double *
365
 *         On entry,  A  is an array of dimension (LDA,ka),  where ka is
366
 *         k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
367
 *         entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
368
 *         the array  A must contain the matrix A, otherwise the leading
369
 *         k  by  m  part of the array  A  must  contain the  matrix  A.
370
 *
371
 * LDA     (local input)                 const int
372
 *         On entry, LDA  specifies the first dimension of A as declared
373
 *         in the  calling (sub) program. When  TRANSA==HplNoTrans  then
374
 *         LDA must be at least max(1,m), otherwise LDA must be at least
375
 *         max(1,k).
376
 *
377
 * B       (local input)                 const double *
378
 *         On entry, B is an array of dimension (LDB,kb),  where  kb  is
379
 *         n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
380
 *         entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
381
 *         the array  B must contain the matrix B, otherwise the leading
382
 *         n  by  k  part of the array  B  must  contain  the matrix  B.
383
 *
384
 * LDB     (local input)                 const int
385
 *         On entry, LDB  specifies the first dimension of B as declared
386
 *         in the  calling (sub) program. When  TRANSB==HplNoTrans  then
387
 *         LDB must be at least max(1,k), otherwise LDB must be at least
388
 *         max(1,n).
389
 *
390
 * BETA    (local input)                 const double
391
 *         On entry,  BETA  specifies the scalar  beta.   When  BETA  is
392
 *         supplied  as  zero  then  the  elements of the matrix C  need
393
 *         not be set on input.
394
 *
395
 * C       (local input/output)          double *
396
 *         On entry,  C  is an array of dimension (LDC,n). Before entry,
397
 *         the  leading m by n part  of  the  array  C  must contain the
398
 *         matrix C,  except when beta is zero, in which case C need not
399
 *         be set on entry. On exit, the array  C  is overwritten by the
400
 *         m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
401
 *
402
 * LDC     (local input)                 const int
403
 *         On entry, LDC  specifies the first dimension of C as declared
404
 *         in  the   calling  (sub)  program.   LDC  must  be  at  least
405
 *         max(1,m).
406
 *
407
 * ---------------------------------------------------------------------
408
 */ 
409
#ifdef HPL_CALL_CBLAS
410
   cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
411
                BETA, C, LDC );
412
#endif
413
#ifdef HPL_CALL_GSLCBLAS
414
   cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
415
                BETA, C, LDC );
416
#endif
417
#ifdef HPL_CALL_VSIPL
418
   if( ORDER == HplColumnMajor )
419
   {
420
      HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA,
421
                  C, LDC );
422
   }
423
   else
424
   {
425
      HPL_dgemm0( TRANSB, TRANSA, N, M, K, ALPHA, B, LDB, A, LDA, BETA,
426
                  C, LDC );
427
   }
428
#endif
429

    
430
#ifdef HPL_CALL_FBLAS
431
   double                    alpha = ALPHA, beta = BETA;
432
#ifdef StringSunStyle
433
#ifdef HPL_USE_F77_INTEGER_DEF
434
   F77_INTEGER               IONE = 1;
435
#else
436
   int                       IONE = 1;
437
#endif
438
#endif
439
#ifdef StringStructVal
440
   F77_CHAR                  ftransa;
441
   F77_CHAR                  ftransb;
442
#endif
443
#ifdef StringStructPtr
444
   F77_CHAR                  ftransa;
445
   F77_CHAR                  ftransb;
446
#endif
447
#ifdef StringCrayStyle
448
   F77_CHAR                  ftransa;
449
   F77_CHAR                  ftransb;
450
#endif
451
#ifdef HPL_USE_F77_INTEGER_DEF
452
   const F77_INTEGER         F77M   = M,   F77N   = N,   F77K = K,
453
                             F77lda = LDA, F77ldb = LDB, F77ldc = LDC;
454
#else
455
#define F77M                 M
456
#define F77N                 N
457
#define F77K                 K
458
#define F77lda               LDA
459
#define F77ldb               LDB
460
#define F77ldc               LDC
461
#endif
462
   char                      ctransa, ctransb;
463

    
464
   if(      TRANSA == HplNoTrans ) ctransa = 'N';
465
   else if( TRANSA == HplTrans   ) ctransa = 'T';
466
   else                            ctransa = 'C';
467
 
468
   if(      TRANSB == HplNoTrans ) ctransb = 'N';
469
   else if( TRANSB == HplTrans   ) ctransb = 'T';
470
   else                            ctransb = 'C';
471

    
472
   if( ORDER == HplColumnMajor )
473
   {
474
#ifdef StringSunStyle
475
      F77dgemm( &ctransa, &ctransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda,
476
                B, &F77ldb, &beta, C, &F77ldc, IONE, IONE );
477
#endif
478
#ifdef StringCrayStyle
479
      ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb );
480
      F77dgemm( ftransa,  ftransb,  &F77M, &F77N, &F77K, &alpha, A, &F77lda,
481
                B, &F77ldb, &beta, C, &F77ldc );
482
#endif
483
#ifdef StringStructVal
484
      ftransa.len = 1; ftransa.cp = &ctransa;
485
      ftransb.len = 1; ftransb.cp = &ctransb;
486
      F77dgemm( ftransa,  ftransb,  &F77M, &F77N, &F77K, &alpha, A, &F77lda,
487
                B, &F77ldb, &beta, C, &F77ldc );
488
#endif
489
#ifdef StringStructPtr
490
      ftransa.len = 1; ftransa.cp = &ctransa;
491
      ftransb.len = 1; ftransb.cp = &ctransb;
492
      F77dgemm( &ftransa, &ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda,
493
                B, &F77ldb, &beta, C, &F77ldc );
494
#endif
495
   }
496
   else
497
   {
498
#ifdef StringSunStyle
499
      F77dgemm( &ctransb, &ctransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
500
                A, &F77lda, &beta, C, &F77ldc, IONE, IONE );
501
#endif
502
#ifdef StringCrayStyle
503
      ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb );
504
      F77dgemm( ftransb,  ftransa,  &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
505
                A, &F77lda, &beta, C, &F77ldc );
506
#endif
507
#ifdef StringStructVal
508
      ftransa.len = 1; ftransa.cp = &ctransa;
509
      ftransb.len = 1; ftransb.cp = &ctransb;
510
      F77dgemm( ftransb,  ftransa,  &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
511
                A, &F77lda, &beta, C, &F77ldc );
512
#endif
513
#ifdef StringStructPtr
514
      ftransa.len = 1; ftransa.cp = &ctransa;
515
      ftransb.len = 1; ftransb.cp = &ctransb;
516
      F77dgemm( &ftransb, &ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb,
517
                A, &F77lda, &beta, C, &F77ldc );
518
#endif
519
   }
520
#endif
521

    
522
#ifdef HPL_CALL_CUBLAS
523
   double                    alpha = ALPHA, beta = BETA;
524

    
525
   int                       IONE = 1;
526

    
527
#define CUBLASM                 M
528
#define CUBLASN                 N
529
#define CUBLASK                 K
530
#define CUBLASlda               LDA
531
#define CUBLASldb               LDB
532
#define CUBLASldc               LDC
533

    
534
   char                      ctransa, ctransb;
535

    
536
   if(      TRANSA == HplNoTrans ) ctransa = 'N';
537
   else if( TRANSA == HplTrans   ) ctransa = 'T';
538
   else                            ctransa = 'C';
539
 
540
   if(      TRANSB == HplNoTrans ) ctransb = 'N';
541
   else if( TRANSB == HplTrans   ) ctransb = 'T';
542
   else                            ctransb = 'C';
543

    
544
   if( ORDER == HplColumnMajor )
545
   {
546
      CUBLAS_DGEMM( &ctransa, &ctransb, &CUBLASM, &CUBLASN, &CUBLASK, 
547
                    &alpha, A, &CUBLASlda, B, &CUBLASldb, &beta, C, &CUBLASldc,
548
                    &IONE, &IONE );
549
   }
550
   else
551
   {
552
      CUBLAS_DGEMM( &ctransb, &ctransa, &CUBLASN, &CUBLASM, &CUBLASK, 
553
                    &alpha, B, &CUBLASldb, A, &CUBLASlda, &beta, C, &CUBLASldc,
554
                    &IONE, &IONE );
555
   }
556
#endif
557

    
558
#ifdef HPL_CALL_ACML
559
   double                    alpha = ALPHA, beta = BETA;
560

    
561
   int                       IONE = 1;
562

    
563
#define ACMLM                 M
564
#define ACMLN                 N
565
#define ACMLK                 K
566
#define ACMLlda               LDA
567
#define ACMLldb               LDB
568
#define ACMLldc               LDC
569

    
570
   char                      ctransa, ctransb;
571

    
572
   if(      TRANSA == HplNoTrans ) ctransa = 'N';
573
   else if( TRANSA == HplTrans   ) ctransa = 'T';
574
   else                            ctransa = 'C';
575
 
576
   if(      TRANSB == HplNoTrans ) ctransb = 'N';
577
   else if( TRANSB == HplTrans   ) ctransb = 'T';
578
   else                            ctransb = 'C';
579

    
580
   if( ORDER == HplColumnMajor )
581
   {
582
      dgemm_( &ctransa, &ctransb, &ACMLM, &ACMLN, &ACMLK, 
583
              &alpha, A, &ACMLlda, B, &ACMLldb, &beta, C, &ACMLldc,
584
              &IONE, &IONE );
585
   }
586
   else
587
   {
588
      dgemm_( &ctransb, &ctransa, &ACMLN, &ACMLM, &ACMLK, 
589
              &alpha, B, &ACMLldb, A, &ACMLlda, &beta, C, &ACMLldc,
590
              &IONE, &IONE );
591
   }
592
#endif
593
/*
594
 * End of HPL_dgemm
595
 */
596
}
597

    
598
#endif