Statistiques
| Révision :

root / src / blas / HPL_dgemv.c @ 9

Historique | Voir | Annoter | Télécharger (12,58 ko)

1
/* 
2
 * -- High Performance Computing Linpack Benchmark (HPL)                
3
 *    HPL - 2.0 - September 10, 2008                          
4
 *    Antoine P. Petitet                                                
5
 *    University of Tennessee, Knoxville                                
6
 *    Innovative Computing Laboratory                                 
7
 *    (C) Copyright 2000-2008 All Rights Reserved                       
8
 *                                                                      
9
 * -- Copyright notice and Licensing terms:                             
10
 *                                                                      
11
 * Redistribution  and  use in  source and binary forms, with or without
12
 * modification, are  permitted provided  that the following  conditions
13
 * are met:                                                             
14
 *                                                                      
15
 * 1. Redistributions  of  source  code  must retain the above copyright
16
 * notice, this list of conditions and the following disclaimer.        
17
 *                                                                      
18
 * 2. Redistributions in binary form must reproduce  the above copyright
19
 * notice, this list of conditions,  and the following disclaimer in the
20
 * documentation and/or other materials provided with the distribution. 
21
 *                                                                      
22
 * 3. All  advertising  materials  mentioning  features  or  use of this
23
 * software must display the following acknowledgement:                 
24
 * This  product  includes  software  developed  at  the  University  of
25
 * Tennessee, Knoxville, Innovative Computing Laboratory.             
26
 *                                                                      
27
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28
 * names  of  its  contributors  may  not  be used to endorse or promote
29
 * products  derived   from   this  software  without  specific  written
30
 * permission.                                                          
31
 *                                                                      
32
 * -- Disclaimer:                                                       
33
 *                                                                      
34
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
45
 * ---------------------------------------------------------------------
46
 */ 
47
/*
48
 * Include files
49
 */
50
#include "hpl.h"
51

    
52
#ifndef HPL_dgemv
53

    
54
#ifdef HPL_CALL_VSIPL
55

    
56
#ifdef STDC_HEADERS
57
static void HPL_dgemv0
58
(
59
   const enum HPL_TRANS       TRANS,
60
   const int                  M,
61
   const int                  N,
62
   const double               ALPHA,
63
   const double               * A,
64
   const int                  LDA,
65
   const double               * X,
66
   const int                  INCX,
67
   const double               BETA,
68
   double                     * Y,
69
   const int                  INCY
70
)
71
#else
72
static void HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
73
   const enum HPL_TRANS       TRANS;
74
   const int                  INCX, INCY, LDA, M, N;
75
   const double               ALPHA, BETA;
76
   const double               * A, * X;
77
   double                     * Y;
78
#endif
79
{
80
/*
81
 * .. Local Variables ..
82
 */
83
   int                        i, iaij, ix, iy, j, jaj, jx, jy;
84
   register double            t0;
85
/* ..
86
 * .. Executable Statements ..
87
 */
88
   if( ( M == 0 ) || ( N == 0 ) ||
89
       ( ( ALPHA == HPL_rzero ) && ( BETA == HPL_rone  ) ) ) return;
90
 
91
   if( ALPHA == HPL_rzero ) { HPL_dscal( M, BETA, Y, INCY ); return; }
92
 
93
   if( TRANS == HplNoTrans )
94
   {
95
      HPL_dscal( M, BETA, Y, INCY );
96
      for( j = 0, jaj  = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX )
97
      {
98
         t0 = ALPHA * X[jx];
99
         for( i = 0, iaij = jaj, iy = 0; i < M; i++, iaij += 1, iy += INCY )
100
         { Y[iy] += A[iaij] * t0; }
101
      }
102
   }
103
   else
104
   {
105
      for( j = 0, jaj  = 0, jy  = 0; j < N; j++, jaj += LDA, jy += INCY )
106
      {
107
         t0 = HPL_rzero;
108
         for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX )
109
         { t0 += A[iaij] * X[ix]; }
110
         if( BETA == HPL_rzero ) Y[jy] = ALPHA * t0;
111
         else                    Y[jy] = BETA * Y[jy] + ALPHA * t0;
112
      }
113
   }
114
}
115
#endif
116

    
117
#ifdef STDC_HEADERS
118
void HPL_dgemv
119
(
120
   const enum HPL_ORDER             ORDER,
121
   const enum HPL_TRANS             TRANS,
122
   const int                        M,
123
   const int                        N,
124
   const double                     ALPHA,
125
   const double *                   A,
126
   const int                        LDA,
127
   const double *                   X,
128
   const int                        INCX,
129
   const double                     BETA,
130
   double *                         Y,
131
   const int                        INCY
132
)
133
#else
134
void HPL_dgemv
135
( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
136
   const enum HPL_ORDER             ORDER;
137
   const enum HPL_TRANS             TRANS;
138
   const int                        M;
139
   const int                        N;
140
   const double                     ALPHA;
141
   const double *                   A;
142
   const int                        LDA;
143
   const double *                   X;
144
   const int                        INCX;
145
   const double                     BETA;
146
   double *                         Y;
147
   const int                        INCY;
148
#endif
149
{
150
/* 
151
 * Purpose
152
 * =======
153
 *
154
 * HPL_dgemv performs one of the matrix-vector operations
155
 *  
156
 *     y := alpha * op( A ) * x + beta * y,
157
 *  
158
 *  where op( X ) is one of
159
 *  
160
 *     op( X ) = X   or   op( X ) = X^T.
161
 *  
162
 * where alpha and beta are scalars, x and y are vectors and  A  is an m
163
 * by n matrix.
164
 *
165
 * Arguments
166
 * =========
167
 *
168
 * ORDER   (local input)                 const enum HPL_ORDER
169
 *         On entry, ORDER  specifies the storage format of the operands
170
 *         as follows:                                                  
171
 *            ORDER = HplRowMajor,                                      
172
 *            ORDER = HplColumnMajor.                                   
173
 *
174
 * TRANS   (local input)                 const enum HPL_TRANS
175
 *         On entry,  TRANS  specifies the  operation to be performed as
176
 *         follows:   
177
 *            TRANS = HplNoTrans y := alpha*A  *x + beta*y,
178
 *            TRANS = HplTrans   y := alpha*A^T*x + beta*y.
179
 *
180
 * M       (local input)                 const int
181
 *         On entry,  M  specifies  the number of rows of  the matrix A.
182
 *         M must be at least zero.
183
 *
184
 * N       (local input)                 const int
185
 *         On entry, N  specifies the number of columns of the matrix A.
186
 *         N must be at least zero.
187
 *
188
 * ALPHA   (local input)                 const double
189
 *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
190
 *         supplied as zero then  A and X  need not be set on input.
191
 *
192
 * A       (local input)                 const double *
193
 *         On entry,  A  points  to an array of size equal to or greater
194
 *         than LDA * n.  Before  entry, the leading m by n part  of the
195
 *         array  A  must contain the matrix coefficients.
196
 *
197
 * LDA     (local input)                 const int
198
 *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
199
 *         declared  in  the  calling  (sub) program.  LDA  must  be  at
200
 *         least MAX(1,m).
201
 *
202
 * X       (local input)                 const double *
203
 *         On entry,  X  is an incremented array of dimension  at  least
204
 *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
205
 *
206
 * INCX    (local input)                 const int
207
 *         On entry, INCX specifies the increment for the elements of X.
208
 *         INCX must not be zero.
209
 *
210
 * BETA    (local input)                 const double
211
 *         On entry, BETA  specifies the scalar beta.    When  ALPHA  is
212
 *         supplied as zero then  Y  need not be set on input.
213
 *
214
 * Y       (local input/output)          double *
215
 *         On entry,  Y  is an incremented array of dimension  at  least
216
 *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
217
 *         Before entry with BETA non-zero, the incremented array Y must
218
 *         contain the vector  y.  On exit,  Y  is  overwritten  by  the
219
 *         updated vector y.
220
 *
221
 * INCY    (local input)                 const int
222
 *         On entry, INCY specifies the increment for the elements of Y.
223
 *         INCY must not be zero.
224
 *
225
 * ---------------------------------------------------------------------
226
 */ 
227
#ifdef HPL_CALL_CBLAS
228
   cblas_dgemv( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
229
#endif
230
#ifdef HPL_CALL_VSIPL
231
   if( ORDER == HplColumnMajor )
232
   {
233
      HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
234
   }
235
   else
236
   {
237
      HPL_dgemv0( ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ),
238
                  N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
239
   }
240
#endif
241

    
242

    
243
#ifdef HPL_CALL_FBLAS
244
   double                    alpha = ALPHA, beta = BETA;
245
#ifdef StringSunStyle
246
#ifdef HPL_USE_F77_INTEGER_DEF
247
   F77_INTEGER               IONE = 1;
248
#else
249
   int                       IONE = 1;
250
#endif
251
#endif
252
#ifdef StringStructVal
253
   F77_CHAR                  ftran;
254
#endif
255
#ifdef StringStructPtr
256
   F77_CHAR                  ftran;
257
#endif
258
#ifdef StringCrayStyle
259
   F77_CHAR                  ftran;
260
#endif
261
 
262
#ifdef HPL_USE_F77_INTEGER_DEF
263
   const F77_INTEGER         F77M    = M,   F77N    = N,
264
                             F77lda  = LDA, F77incx = INCX, F77incy = INCY;
265
#else
266
#define F77M                 M
267
#define F77N                 N
268
#define F77lda               LDA
269
#define F77incx              INCX
270
#define F77incy              INCY
271
#endif
272
   char                      ctran;
273

    
274
   if( ORDER == HplColumnMajor )
275
   {
276
      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
277

    
278
#ifdef StringSunStyle
279
      F77dgemv( &ctran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
280
                &beta, Y, &F77incy, IONE );
281
#endif
282
#ifdef StringCrayStyle
283
      ftran = HPL_C2F_CHAR( ctran );
284
      F77dgemv( ftran,  &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
285
                &beta, Y, &F77incy );
286
#endif
287
#ifdef StringStructVal
288
      ftran.len = 1; ftran.cp = &ctran;
289
      F77dgemv( ftran,  &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
290
                &beta, Y, &F77incy );
291
#endif
292
#ifdef StringStructPtr
293
      ftran.len = 1; ftran.cp = &ctran;
294
      F77dgemv( &ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
295
                &beta, Y, &F77incy );
296
#endif
297
   }
298
   else
299
   {
300
      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
301
#ifdef StringSunStyle
302
      F77dgemv( &ctran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
303
                &beta, Y, &F77incy, IONE );
304
#endif
305
#ifdef StringCrayStyle
306
      ftran = HPL_C2F_CHAR( ctran );
307
      F77dgemv( ftran,  &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
308
                &beta, Y, &F77incy );
309
#endif
310
#ifdef StringStructVal
311
      ftran.len = 1; ftran.cp = &ctran;
312
      F77dgemv( ftran,  &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
313
                &beta, Y, &F77incy );
314
#endif
315
#ifdef StringStructPtr
316
      ftran.len = 1; ftran.cp = &ctran;
317
      F77dgemv( &ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
318
                &beta, Y, &F77incy );
319
#endif
320
   }
321

    
322
#endif
323

    
324
#ifdef HPL_CALL_CUBLAS
325
   double                    alpha = ALPHA, beta = BETA;
326

    
327
   int                       IONE = 1;
328

    
329
#define CUBLASM                 M
330
#define CUBLASN                 N
331
#define CUBLASlda               LDA
332
#define CUBLASincx              INCX
333
#define CUBLASincy              INCY
334

    
335
   char                      ctran;
336

    
337
   if( ORDER == HplColumnMajor )
338
   {
339
      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
340

    
341
      CUBLAS_DGEMV( &ctran, &CUBLASM, &CUBLASN, 
342
                    &alpha, A, &CUBLASlda, X, &CUBLASincx,
343
                    &beta, Y, &CUBLASincy, IONE );
344
   }
345
   else
346
   {
347
      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
348

    
349
      CUBLAS_DGEMV( &ctran, &CUBLASN, &CUBLASM, 
350
                    &alpha, A, &CUBLASlda, X, &CUBLASincx,
351
                    &beta, Y, &CUBLASincy, IONE );
352
   }
353

    
354
#endif
355
/*
356
 * End of HPL_dgemv
357
 */
358
}
359

    
360
#endif