root / src / blas / HPL_dgemv.c @ 9
Historique | Voir | Annoter | Télécharger (12,58 ko)
1 |
/*
|
---|---|
2 |
* -- High Performance Computing Linpack Benchmark (HPL)
|
3 |
* HPL - 2.0 - September 10, 2008
|
4 |
* Antoine P. Petitet
|
5 |
* University of Tennessee, Knoxville
|
6 |
* Innovative Computing Laboratory
|
7 |
* (C) Copyright 2000-2008 All Rights Reserved
|
8 |
*
|
9 |
* -- Copyright notice and Licensing terms:
|
10 |
*
|
11 |
* Redistribution and use in source and binary forms, with or without
|
12 |
* modification, are permitted provided that the following conditions
|
13 |
* are met:
|
14 |
*
|
15 |
* 1. Redistributions of source code must retain the above copyright
|
16 |
* notice, this list of conditions and the following disclaimer.
|
17 |
*
|
18 |
* 2. Redistributions in binary form must reproduce the above copyright
|
19 |
* notice, this list of conditions, and the following disclaimer in the
|
20 |
* documentation and/or other materials provided with the distribution.
|
21 |
*
|
22 |
* 3. All advertising materials mentioning features or use of this
|
23 |
* software must display the following acknowledgement:
|
24 |
* This product includes software developed at the University of
|
25 |
* Tennessee, Knoxville, Innovative Computing Laboratory.
|
26 |
*
|
27 |
* 4. The name of the University, the name of the Laboratory, or the
|
28 |
* names of its contributors may not be used to endorse or promote
|
29 |
* products derived from this software without specific written
|
30 |
* permission.
|
31 |
*
|
32 |
* -- Disclaimer:
|
33 |
*
|
34 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
35 |
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
36 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
37 |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
|
38 |
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
39 |
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
40 |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
41 |
* DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
42 |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
43 |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
44 |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
45 |
* ---------------------------------------------------------------------
|
46 |
*/
|
47 |
/*
|
48 |
* Include files
|
49 |
*/
|
50 |
#include "hpl.h" |
51 |
|
52 |
#ifndef HPL_dgemv
|
53 |
|
54 |
#ifdef HPL_CALL_VSIPL
|
55 |
|
56 |
#ifdef STDC_HEADERS
|
57 |
static void HPL_dgemv0 |
58 |
( |
59 |
const enum HPL_TRANS TRANS, |
60 |
const int M, |
61 |
const int N, |
62 |
const double ALPHA, |
63 |
const double * A, |
64 |
const int LDA, |
65 |
const double * X, |
66 |
const int INCX, |
67 |
const double BETA, |
68 |
double * Y,
|
69 |
const int INCY |
70 |
) |
71 |
#else
|
72 |
static void HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) |
73 |
const enum HPL_TRANS TRANS; |
74 |
const int INCX, INCY, LDA, M, N; |
75 |
const double ALPHA, BETA; |
76 |
const double * A, * X; |
77 |
double * Y;
|
78 |
#endif
|
79 |
{ |
80 |
/*
|
81 |
* .. Local Variables ..
|
82 |
*/
|
83 |
int i, iaij, ix, iy, j, jaj, jx, jy;
|
84 |
register double t0; |
85 |
/* ..
|
86 |
* .. Executable Statements ..
|
87 |
*/
|
88 |
if( ( M == 0 ) || ( N == 0 ) || |
89 |
( ( ALPHA == HPL_rzero ) && ( BETA == HPL_rone ) ) ) return;
|
90 |
|
91 |
if( ALPHA == HPL_rzero ) { HPL_dscal( M, BETA, Y, INCY ); return; } |
92 |
|
93 |
if( TRANS == HplNoTrans )
|
94 |
{ |
95 |
HPL_dscal( M, BETA, Y, INCY ); |
96 |
for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX ) |
97 |
{ |
98 |
t0 = ALPHA * X[jx]; |
99 |
for( i = 0, iaij = jaj, iy = 0; i < M; i++, iaij += 1, iy += INCY ) |
100 |
{ Y[iy] += A[iaij] * t0; } |
101 |
} |
102 |
} |
103 |
else
|
104 |
{ |
105 |
for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY ) |
106 |
{ |
107 |
t0 = HPL_rzero; |
108 |
for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX ) |
109 |
{ t0 += A[iaij] * X[ix]; } |
110 |
if( BETA == HPL_rzero ) Y[jy] = ALPHA * t0;
|
111 |
else Y[jy] = BETA * Y[jy] + ALPHA * t0;
|
112 |
} |
113 |
} |
114 |
} |
115 |
#endif
|
116 |
|
117 |
#ifdef STDC_HEADERS
|
118 |
void HPL_dgemv
|
119 |
( |
120 |
const enum HPL_ORDER ORDER, |
121 |
const enum HPL_TRANS TRANS, |
122 |
const int M, |
123 |
const int N, |
124 |
const double ALPHA, |
125 |
const double * A, |
126 |
const int LDA, |
127 |
const double * X, |
128 |
const int INCX, |
129 |
const double BETA, |
130 |
double * Y,
|
131 |
const int INCY |
132 |
) |
133 |
#else
|
134 |
void HPL_dgemv
|
135 |
( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) |
136 |
const enum HPL_ORDER ORDER; |
137 |
const enum HPL_TRANS TRANS; |
138 |
const int M; |
139 |
const int N; |
140 |
const double ALPHA; |
141 |
const double * A; |
142 |
const int LDA; |
143 |
const double * X; |
144 |
const int INCX; |
145 |
const double BETA; |
146 |
double * Y;
|
147 |
const int INCY; |
148 |
#endif
|
149 |
{ |
150 |
/*
|
151 |
* Purpose
|
152 |
* =======
|
153 |
*
|
154 |
* HPL_dgemv performs one of the matrix-vector operations
|
155 |
*
|
156 |
* y := alpha * op( A ) * x + beta * y,
|
157 |
*
|
158 |
* where op( X ) is one of
|
159 |
*
|
160 |
* op( X ) = X or op( X ) = X^T.
|
161 |
*
|
162 |
* where alpha and beta are scalars, x and y are vectors and A is an m
|
163 |
* by n matrix.
|
164 |
*
|
165 |
* Arguments
|
166 |
* =========
|
167 |
*
|
168 |
* ORDER (local input) const enum HPL_ORDER
|
169 |
* On entry, ORDER specifies the storage format of the operands
|
170 |
* as follows:
|
171 |
* ORDER = HplRowMajor,
|
172 |
* ORDER = HplColumnMajor.
|
173 |
*
|
174 |
* TRANS (local input) const enum HPL_TRANS
|
175 |
* On entry, TRANS specifies the operation to be performed as
|
176 |
* follows:
|
177 |
* TRANS = HplNoTrans y := alpha*A *x + beta*y,
|
178 |
* TRANS = HplTrans y := alpha*A^T*x + beta*y.
|
179 |
*
|
180 |
* M (local input) const int
|
181 |
* On entry, M specifies the number of rows of the matrix A.
|
182 |
* M must be at least zero.
|
183 |
*
|
184 |
* N (local input) const int
|
185 |
* On entry, N specifies the number of columns of the matrix A.
|
186 |
* N must be at least zero.
|
187 |
*
|
188 |
* ALPHA (local input) const double
|
189 |
* On entry, ALPHA specifies the scalar alpha. When ALPHA is
|
190 |
* supplied as zero then A and X need not be set on input.
|
191 |
*
|
192 |
* A (local input) const double *
|
193 |
* On entry, A points to an array of size equal to or greater
|
194 |
* than LDA * n. Before entry, the leading m by n part of the
|
195 |
* array A must contain the matrix coefficients.
|
196 |
*
|
197 |
* LDA (local input) const int
|
198 |
* On entry, LDA specifies the leading dimension of A as
|
199 |
* declared in the calling (sub) program. LDA must be at
|
200 |
* least MAX(1,m).
|
201 |
*
|
202 |
* X (local input) const double *
|
203 |
* On entry, X is an incremented array of dimension at least
|
204 |
* ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x.
|
205 |
*
|
206 |
* INCX (local input) const int
|
207 |
* On entry, INCX specifies the increment for the elements of X.
|
208 |
* INCX must not be zero.
|
209 |
*
|
210 |
* BETA (local input) const double
|
211 |
* On entry, BETA specifies the scalar beta. When ALPHA is
|
212 |
* supplied as zero then Y need not be set on input.
|
213 |
*
|
214 |
* Y (local input/output) double *
|
215 |
* On entry, Y is an incremented array of dimension at least
|
216 |
* ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y.
|
217 |
* Before entry with BETA non-zero, the incremented array Y must
|
218 |
* contain the vector y. On exit, Y is overwritten by the
|
219 |
* updated vector y.
|
220 |
*
|
221 |
* INCY (local input) const int
|
222 |
* On entry, INCY specifies the increment for the elements of Y.
|
223 |
* INCY must not be zero.
|
224 |
*
|
225 |
* ---------------------------------------------------------------------
|
226 |
*/
|
227 |
#ifdef HPL_CALL_CBLAS
|
228 |
cblas_dgemv( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); |
229 |
#endif
|
230 |
#ifdef HPL_CALL_VSIPL
|
231 |
if( ORDER == HplColumnMajor )
|
232 |
{ |
233 |
HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); |
234 |
} |
235 |
else
|
236 |
{ |
237 |
HPL_dgemv0( ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ), |
238 |
N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); |
239 |
} |
240 |
#endif
|
241 |
|
242 |
|
243 |
#ifdef HPL_CALL_FBLAS
|
244 |
double alpha = ALPHA, beta = BETA;
|
245 |
#ifdef StringSunStyle
|
246 |
#ifdef HPL_USE_F77_INTEGER_DEF
|
247 |
F77_INTEGER IONE = 1;
|
248 |
#else
|
249 |
int IONE = 1; |
250 |
#endif
|
251 |
#endif
|
252 |
#ifdef StringStructVal
|
253 |
F77_CHAR ftran; |
254 |
#endif
|
255 |
#ifdef StringStructPtr
|
256 |
F77_CHAR ftran; |
257 |
#endif
|
258 |
#ifdef StringCrayStyle
|
259 |
F77_CHAR ftran; |
260 |
#endif
|
261 |
|
262 |
#ifdef HPL_USE_F77_INTEGER_DEF
|
263 |
const F77_INTEGER F77M = M, F77N = N,
|
264 |
F77lda = LDA, F77incx = INCX, F77incy = INCY; |
265 |
#else
|
266 |
#define F77M M
|
267 |
#define F77N N
|
268 |
#define F77lda LDA
|
269 |
#define F77incx INCX
|
270 |
#define F77incy INCY
|
271 |
#endif
|
272 |
char ctran;
|
273 |
|
274 |
if( ORDER == HplColumnMajor )
|
275 |
{ |
276 |
ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); |
277 |
|
278 |
#ifdef StringSunStyle
|
279 |
F77dgemv( &ctran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, |
280 |
&beta, Y, &F77incy, IONE ); |
281 |
#endif
|
282 |
#ifdef StringCrayStyle
|
283 |
ftran = HPL_C2F_CHAR( ctran ); |
284 |
F77dgemv( ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, |
285 |
&beta, Y, &F77incy ); |
286 |
#endif
|
287 |
#ifdef StringStructVal
|
288 |
ftran.len = 1; ftran.cp = &ctran;
|
289 |
F77dgemv( ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, |
290 |
&beta, Y, &F77incy ); |
291 |
#endif
|
292 |
#ifdef StringStructPtr
|
293 |
ftran.len = 1; ftran.cp = &ctran;
|
294 |
F77dgemv( &ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, |
295 |
&beta, Y, &F77incy ); |
296 |
#endif
|
297 |
} |
298 |
else
|
299 |
{ |
300 |
ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); |
301 |
#ifdef StringSunStyle
|
302 |
F77dgemv( &ctran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, |
303 |
&beta, Y, &F77incy, IONE ); |
304 |
#endif
|
305 |
#ifdef StringCrayStyle
|
306 |
ftran = HPL_C2F_CHAR( ctran ); |
307 |
F77dgemv( ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, |
308 |
&beta, Y, &F77incy ); |
309 |
#endif
|
310 |
#ifdef StringStructVal
|
311 |
ftran.len = 1; ftran.cp = &ctran;
|
312 |
F77dgemv( ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, |
313 |
&beta, Y, &F77incy ); |
314 |
#endif
|
315 |
#ifdef StringStructPtr
|
316 |
ftran.len = 1; ftran.cp = &ctran;
|
317 |
F77dgemv( &ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, |
318 |
&beta, Y, &F77incy ); |
319 |
#endif
|
320 |
} |
321 |
|
322 |
#endif
|
323 |
|
324 |
#ifdef HPL_CALL_CUBLAS
|
325 |
double alpha = ALPHA, beta = BETA;
|
326 |
|
327 |
int IONE = 1; |
328 |
|
329 |
#define CUBLASM M
|
330 |
#define CUBLASN N
|
331 |
#define CUBLASlda LDA
|
332 |
#define CUBLASincx INCX
|
333 |
#define CUBLASincy INCY
|
334 |
|
335 |
char ctran;
|
336 |
|
337 |
if( ORDER == HplColumnMajor )
|
338 |
{ |
339 |
ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); |
340 |
|
341 |
CUBLAS_DGEMV( &ctran, &CUBLASM, &CUBLASN, |
342 |
&alpha, A, &CUBLASlda, X, &CUBLASincx, |
343 |
&beta, Y, &CUBLASincy, IONE ); |
344 |
} |
345 |
else
|
346 |
{ |
347 |
ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); |
348 |
|
349 |
CUBLAS_DGEMV( &ctran, &CUBLASN, &CUBLASM, |
350 |
&alpha, A, &CUBLASlda, X, &CUBLASincx, |
351 |
&beta, Y, &CUBLASincy, IONE ); |
352 |
} |
353 |
|
354 |
#endif
|
355 |
/*
|
356 |
* End of HPL_dgemv
|
357 |
*/
|
358 |
} |
359 |
|
360 |
#endif
|