Statistiques
| Révision :

root / src / blas / HPL_dgemv.c

Historique | Voir | Annoter | Télécharger (13,4 ko)

1 1 equemene
/*
2 1 equemene
 * -- High Performance Computing Linpack Benchmark (HPL)
3 1 equemene
 *    HPL - 2.0 - September 10, 2008
4 1 equemene
 *    Antoine P. Petitet
5 1 equemene
 *    University of Tennessee, Knoxville
6 1 equemene
 *    Innovative Computing Laboratory
7 1 equemene
 *    (C) Copyright 2000-2008 All Rights Reserved
8 1 equemene
 *
9 1 equemene
 * -- Copyright notice and Licensing terms:
10 1 equemene
 *
11 1 equemene
 * Redistribution  and  use in  source and binary forms, with or without
12 1 equemene
 * modification, are  permitted provided  that the following  conditions
13 1 equemene
 * are met:
14 1 equemene
 *
15 1 equemene
 * 1. Redistributions  of  source  code  must retain the above copyright
16 1 equemene
 * notice, this list of conditions and the following disclaimer.
17 1 equemene
 *
18 1 equemene
 * 2. Redistributions in binary form must reproduce  the above copyright
19 1 equemene
 * notice, this list of conditions,  and the following disclaimer in the
20 1 equemene
 * documentation and/or other materials provided with the distribution.
21 1 equemene
 *
22 1 equemene
 * 3. All  advertising  materials  mentioning  features  or  use of this
23 1 equemene
 * software must display the following acknowledgement:
24 1 equemene
 * This  product  includes  software  developed  at  the  University  of
25 1 equemene
 * Tennessee, Knoxville, Innovative Computing Laboratory.
26 1 equemene
 *
27 1 equemene
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28 1 equemene
 * names  of  its  contributors  may  not  be used to endorse or promote
29 1 equemene
 * products  derived   from   this  software  without  specific  written
30 1 equemene
 * permission.
31 1 equemene
 *
32 1 equemene
 * -- Disclaimer:
33 1 equemene
 *
34 1 equemene
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 1 equemene
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36 1 equemene
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 1 equemene
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38 1 equemene
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39 1 equemene
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40 1 equemene
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 1 equemene
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42 1 equemene
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43 1 equemene
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 1 equemene
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 1 equemene
 * ---------------------------------------------------------------------
46 1 equemene
 */
47 1 equemene
/*
48 1 equemene
 * Include files
49 1 equemene
 */
50 1 equemene
#include "hpl.h"
51 1 equemene
52 1 equemene
#ifndef HPL_dgemv
53 1 equemene
54 1 equemene
#ifdef HPL_CALL_VSIPL
55 1 equemene
56 1 equemene
#ifdef STDC_HEADERS
57 1 equemene
static void HPL_dgemv0
58 1 equemene
(
59 1 equemene
   const enum HPL_TRANS       TRANS,
60 1 equemene
   const int                  M,
61 1 equemene
   const int                  N,
62 1 equemene
   const double               ALPHA,
63 1 equemene
   const double               * A,
64 1 equemene
   const int                  LDA,
65 1 equemene
   const double               * X,
66 1 equemene
   const int                  INCX,
67 1 equemene
   const double               BETA,
68 1 equemene
   double                     * Y,
69 1 equemene
   const int                  INCY
70 1 equemene
)
71 1 equemene
#else
72 1 equemene
static void HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
73 1 equemene
   const enum HPL_TRANS       TRANS;
74 1 equemene
   const int                  INCX, INCY, LDA, M, N;
75 1 equemene
   const double               ALPHA, BETA;
76 1 equemene
   const double               * A, * X;
77 1 equemene
   double                     * Y;
78 1 equemene
#endif
79 1 equemene
{
80 1 equemene
/*
81 1 equemene
 * .. Local Variables ..
82 1 equemene
 */
83 1 equemene
   int                        i, iaij, ix, iy, j, jaj, jx, jy;
84 1 equemene
   register double            t0;
85 1 equemene
/* ..
86 1 equemene
 * .. Executable Statements ..
87 1 equemene
 */
88 1 equemene
   if( ( M == 0 ) || ( N == 0 ) ||
89 1 equemene
       ( ( ALPHA == HPL_rzero ) && ( BETA == HPL_rone  ) ) ) return;
90 1 equemene
91 1 equemene
   if( ALPHA == HPL_rzero ) { HPL_dscal( M, BETA, Y, INCY ); return; }
92 1 equemene
93 1 equemene
   if( TRANS == HplNoTrans )
94 1 equemene
   {
95 1 equemene
      HPL_dscal( M, BETA, Y, INCY );
96 1 equemene
      for( j = 0, jaj  = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX )
97 1 equemene
      {
98 1 equemene
         t0 = ALPHA * X[jx];
99 1 equemene
         for( i = 0, iaij = jaj, iy = 0; i < M; i++, iaij += 1, iy += INCY )
100 1 equemene
         { Y[iy] += A[iaij] * t0; }
101 1 equemene
      }
102 1 equemene
   }
103 1 equemene
   else
104 1 equemene
   {
105 1 equemene
      for( j = 0, jaj  = 0, jy  = 0; j < N; j++, jaj += LDA, jy += INCY )
106 1 equemene
      {
107 1 equemene
         t0 = HPL_rzero;
108 1 equemene
         for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX )
109 1 equemene
         { t0 += A[iaij] * X[ix]; }
110 1 equemene
         if( BETA == HPL_rzero ) Y[jy] = ALPHA * t0;
111 1 equemene
         else                    Y[jy] = BETA * Y[jy] + ALPHA * t0;
112 1 equemene
      }
113 1 equemene
   }
114 1 equemene
}
115 1 equemene
#endif
116 1 equemene
117 1 equemene
#ifdef STDC_HEADERS
118 1 equemene
void HPL_dgemv
119 1 equemene
(
120 1 equemene
   const enum HPL_ORDER             ORDER,
121 1 equemene
   const enum HPL_TRANS             TRANS,
122 1 equemene
   const int                        M,
123 1 equemene
   const int                        N,
124 1 equemene
   const double                     ALPHA,
125 1 equemene
   const double *                   A,
126 1 equemene
   const int                        LDA,
127 1 equemene
   const double *                   X,
128 1 equemene
   const int                        INCX,
129 1 equemene
   const double                     BETA,
130 1 equemene
   double *                         Y,
131 1 equemene
   const int                        INCY
132 1 equemene
)
133 1 equemene
#else
134 1 equemene
void HPL_dgemv
135 1 equemene
( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
136 1 equemene
   const enum HPL_ORDER             ORDER;
137 1 equemene
   const enum HPL_TRANS             TRANS;
138 1 equemene
   const int                        M;
139 1 equemene
   const int                        N;
140 1 equemene
   const double                     ALPHA;
141 1 equemene
   const double *                   A;
142 1 equemene
   const int                        LDA;
143 1 equemene
   const double *                   X;
144 1 equemene
   const int                        INCX;
145 1 equemene
   const double                     BETA;
146 1 equemene
   double *                         Y;
147 1 equemene
   const int                        INCY;
148 1 equemene
#endif
149 1 equemene
{
150 1 equemene
/*
151 1 equemene
 * Purpose
152 1 equemene
 * =======
153 1 equemene
 *
154 1 equemene
 * HPL_dgemv performs one of the matrix-vector operations
155 1 equemene
 *
156 1 equemene
 *     y := alpha * op( A ) * x + beta * y,
157 1 equemene
 *
158 1 equemene
 *  where op( X ) is one of
159 1 equemene
 *
160 1 equemene
 *     op( X ) = X   or   op( X ) = X^T.
161 1 equemene
 *
162 1 equemene
 * where alpha and beta are scalars, x and y are vectors and  A  is an m
163 1 equemene
 * by n matrix.
164 1 equemene
 *
165 1 equemene
 * Arguments
166 1 equemene
 * =========
167 1 equemene
 *
168 1 equemene
 * ORDER   (local input)                 const enum HPL_ORDER
169 1 equemene
 *         On entry, ORDER  specifies the storage format of the operands
170 1 equemene
 *         as follows:
171 1 equemene
 *            ORDER = HplRowMajor,
172 1 equemene
 *            ORDER = HplColumnMajor.
173 1 equemene
 *
174 1 equemene
 * TRANS   (local input)                 const enum HPL_TRANS
175 1 equemene
 *         On entry,  TRANS  specifies the  operation to be performed as
176 1 equemene
 *         follows:
177 1 equemene
 *            TRANS = HplNoTrans y := alpha*A  *x + beta*y,
178 1 equemene
 *            TRANS = HplTrans   y := alpha*A^T*x + beta*y.
179 1 equemene
 *
180 1 equemene
 * M       (local input)                 const int
181 1 equemene
 *         On entry,  M  specifies  the number of rows of  the matrix A.
182 1 equemene
 *         M must be at least zero.
183 1 equemene
 *
184 1 equemene
 * N       (local input)                 const int
185 1 equemene
 *         On entry, N  specifies the number of columns of the matrix A.
186 1 equemene
 *         N must be at least zero.
187 1 equemene
 *
188 1 equemene
 * ALPHA   (local input)                 const double
189 1 equemene
 *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
190 1 equemene
 *         supplied as zero then  A and X  need not be set on input.
191 1 equemene
 *
192 1 equemene
 * A       (local input)                 const double *
193 1 equemene
 *         On entry,  A  points  to an array of size equal to or greater
194 1 equemene
 *         than LDA * n.  Before  entry, the leading m by n part  of the
195 1 equemene
 *         array  A  must contain the matrix coefficients.
196 1 equemene
 *
197 1 equemene
 * LDA     (local input)                 const int
198 1 equemene
 *         On entry,  LDA  specifies  the  leading  dimension  of  A  as
199 1 equemene
 *         declared  in  the  calling  (sub) program.  LDA  must  be  at
200 1 equemene
 *         least MAX(1,m).
201 1 equemene
 *
202 1 equemene
 * X       (local input)                 const double *
203 1 equemene
 *         On entry,  X  is an incremented array of dimension  at  least
204 1 equemene
 *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
205 1 equemene
 *
206 1 equemene
 * INCX    (local input)                 const int
207 1 equemene
 *         On entry, INCX specifies the increment for the elements of X.
208 1 equemene
 *         INCX must not be zero.
209 1 equemene
 *
210 1 equemene
 * BETA    (local input)                 const double
211 1 equemene
 *         On entry, BETA  specifies the scalar beta.    When  ALPHA  is
212 1 equemene
 *         supplied as zero then  Y  need not be set on input.
213 1 equemene
 *
214 1 equemene
 * Y       (local input/output)          double *
215 1 equemene
 *         On entry,  Y  is an incremented array of dimension  at  least
216 1 equemene
 *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
217 1 equemene
 *         Before entry with BETA non-zero, the incremented array Y must
218 1 equemene
 *         contain the vector  y.  On exit,  Y  is  overwritten  by  the
219 1 equemene
 *         updated vector y.
220 1 equemene
 *
221 1 equemene
 * INCY    (local input)                 const int
222 1 equemene
 *         On entry, INCY specifies the increment for the elements of Y.
223 1 equemene
 *         INCY must not be zero.
224 1 equemene
 *
225 1 equemene
 * ---------------------------------------------------------------------
226 1 equemene
 */
227 1 equemene
#ifdef HPL_CALL_CBLAS
228 1 equemene
   cblas_dgemv( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
229 1 equemene
#endif
230 12 equemene
#ifdef HPL_CALL_GSLCBLAS
231 12 equemene
   cblas_dgemv( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
232 12 equemene
#endif
233 1 equemene
#ifdef HPL_CALL_VSIPL
234 1 equemene
   if( ORDER == HplColumnMajor )
235 1 equemene
   {
236 1 equemene
      HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
237 1 equemene
   }
238 1 equemene
   else
239 1 equemene
   {
240 1 equemene
      HPL_dgemv0( ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ),
241 1 equemene
                  N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY );
242 1 equemene
   }
243 1 equemene
#endif
244 9 equemene
245 9 equemene
246 1 equemene
#ifdef HPL_CALL_FBLAS
247 1 equemene
   double                    alpha = ALPHA, beta = BETA;
248 1 equemene
#ifdef StringSunStyle
249 1 equemene
#ifdef HPL_USE_F77_INTEGER_DEF
250 1 equemene
   F77_INTEGER               IONE = 1;
251 1 equemene
#else
252 1 equemene
   int                       IONE = 1;
253 1 equemene
#endif
254 1 equemene
#endif
255 1 equemene
#ifdef StringStructVal
256 1 equemene
   F77_CHAR                  ftran;
257 1 equemene
#endif
258 1 equemene
#ifdef StringStructPtr
259 1 equemene
   F77_CHAR                  ftran;
260 1 equemene
#endif
261 1 equemene
#ifdef StringCrayStyle
262 1 equemene
   F77_CHAR                  ftran;
263 1 equemene
#endif
264 1 equemene
265 1 equemene
#ifdef HPL_USE_F77_INTEGER_DEF
266 1 equemene
   const F77_INTEGER         F77M    = M,   F77N    = N,
267 1 equemene
                             F77lda  = LDA, F77incx = INCX, F77incy = INCY;
268 1 equemene
#else
269 1 equemene
#define F77M                 M
270 1 equemene
#define F77N                 N
271 1 equemene
#define F77lda               LDA
272 1 equemene
#define F77incx              INCX
273 1 equemene
#define F77incy              INCY
274 1 equemene
#endif
275 1 equemene
   char                      ctran;
276 1 equemene
277 1 equemene
   if( ORDER == HplColumnMajor )
278 1 equemene
   {
279 1 equemene
      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
280 1 equemene
281 1 equemene
#ifdef StringSunStyle
282 1 equemene
      F77dgemv( &ctran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
283 1 equemene
                &beta, Y, &F77incy, IONE );
284 1 equemene
#endif
285 1 equemene
#ifdef StringCrayStyle
286 1 equemene
      ftran = HPL_C2F_CHAR( ctran );
287 1 equemene
      F77dgemv( ftran,  &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
288 1 equemene
                &beta, Y, &F77incy );
289 1 equemene
#endif
290 1 equemene
#ifdef StringStructVal
291 1 equemene
      ftran.len = 1; ftran.cp = &ctran;
292 1 equemene
      F77dgemv( ftran,  &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
293 1 equemene
                &beta, Y, &F77incy );
294 1 equemene
#endif
295 1 equemene
#ifdef StringStructPtr
296 1 equemene
      ftran.len = 1; ftran.cp = &ctran;
297 1 equemene
      F77dgemv( &ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx,
298 1 equemene
                &beta, Y, &F77incy );
299 1 equemene
#endif
300 1 equemene
   }
301 1 equemene
   else
302 1 equemene
   {
303 1 equemene
      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
304 1 equemene
#ifdef StringSunStyle
305 1 equemene
      F77dgemv( &ctran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
306 1 equemene
                &beta, Y, &F77incy, IONE );
307 1 equemene
#endif
308 1 equemene
#ifdef StringCrayStyle
309 1 equemene
      ftran = HPL_C2F_CHAR( ctran );
310 1 equemene
      F77dgemv( ftran,  &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
311 1 equemene
                &beta, Y, &F77incy );
312 1 equemene
#endif
313 1 equemene
#ifdef StringStructVal
314 1 equemene
      ftran.len = 1; ftran.cp = &ctran;
315 1 equemene
      F77dgemv( ftran,  &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
316 1 equemene
                &beta, Y, &F77incy );
317 1 equemene
#endif
318 1 equemene
#ifdef StringStructPtr
319 1 equemene
      ftran.len = 1; ftran.cp = &ctran;
320 1 equemene
      F77dgemv( &ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx,
321 1 equemene
                &beta, Y, &F77incy );
322 1 equemene
#endif
323 1 equemene
   }
324 1 equemene
325 1 equemene
#endif
326 9 equemene
327 9 equemene
#ifdef HPL_CALL_CUBLAS
328 9 equemene
   double                    alpha = ALPHA, beta = BETA;
329 9 equemene
330 9 equemene
   int                       IONE = 1;
331 9 equemene
332 9 equemene
#define CUBLASM                 M
333 9 equemene
#define CUBLASN                 N
334 9 equemene
#define CUBLASlda               LDA
335 9 equemene
#define CUBLASincx              INCX
336 9 equemene
#define CUBLASincy              INCY
337 9 equemene
338 9 equemene
   char                      ctran;
339 9 equemene
340 9 equemene
   if( ORDER == HplColumnMajor )
341 9 equemene
   {
342 9 equemene
      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
343 9 equemene
344 9 equemene
      CUBLAS_DGEMV( &ctran, &CUBLASM, &CUBLASN,
345 9 equemene
                    &alpha, A, &CUBLASlda, X, &CUBLASincx,
346 9 equemene
                    &beta, Y, &CUBLASincy, IONE );
347 9 equemene
   }
348 9 equemene
   else
349 9 equemene
   {
350 9 equemene
      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
351 9 equemene
352 9 equemene
      CUBLAS_DGEMV( &ctran, &CUBLASN, &CUBLASM,
353 9 equemene
                    &alpha, A, &CUBLASlda, X, &CUBLASincx,
354 9 equemene
                    &beta, Y, &CUBLASincy, IONE );
355 9 equemene
   }
356 9 equemene
357 9 equemene
#endif
358 10 equemene
359 10 equemene
#ifdef HPL_CALL_ACML
360 10 equemene
   double                    alpha = ALPHA, beta = BETA;
361 10 equemene
362 10 equemene
   int                       IONE = 1;
363 10 equemene
364 10 equemene
#define ACMLM                 M
365 10 equemene
#define ACMLN                 N
366 10 equemene
#define ACMLlda               LDA
367 10 equemene
#define ACMLincx              INCX
368 10 equemene
#define ACMLincy              INCY
369 10 equemene
370 10 equemene
   char                      ctran;
371 10 equemene
372 10 equemene
   if( ORDER == HplColumnMajor )
373 10 equemene
   {
374 10 equemene
      ctran = ( TRANS == HplNoTrans ? 'N' : 'T' );
375 10 equemene
376 10 equemene
      dgemv_( &ctran, &ACMLM, &ACMLN,
377 10 equemene
              &alpha, A, &ACMLlda, X, &ACMLincx,
378 10 equemene
              &beta, Y, &ACMLincy, IONE );
379 10 equemene
   }
380 10 equemene
   else
381 10 equemene
   {
382 10 equemene
      ctran = ( TRANS == HplNoTrans ? 'T' : 'N' );
383 10 equemene
384 10 equemene
      dgemv_( &ctran, &ACMLN, &ACMLM,
385 10 equemene
              &alpha, A, &ACMLlda, X, &ACMLincx,
386 10 equemene
              &beta, Y, &ACMLincy, IONE );
387 10 equemene
   }
388 10 equemene
389 10 equemene
#endif
390 1 equemene
/*
391 1 equemene
 * End of HPL_dgemv
392 1 equemene
 */
393 1 equemene
}
394 1 equemene
395 1 equemene
#endif