root / src / blas / HPL_dgemv.c @ 9
Historique | Voir | Annoter | Télécharger (12,58 ko)
1 | 1 | equemene | /*
|
---|---|---|---|
2 | 1 | equemene | * -- High Performance Computing Linpack Benchmark (HPL)
|
3 | 1 | equemene | * HPL - 2.0 - September 10, 2008
|
4 | 1 | equemene | * Antoine P. Petitet
|
5 | 1 | equemene | * University of Tennessee, Knoxville
|
6 | 1 | equemene | * Innovative Computing Laboratory
|
7 | 1 | equemene | * (C) Copyright 2000-2008 All Rights Reserved
|
8 | 1 | equemene | *
|
9 | 1 | equemene | * -- Copyright notice and Licensing terms:
|
10 | 1 | equemene | *
|
11 | 1 | equemene | * Redistribution and use in source and binary forms, with or without
|
12 | 1 | equemene | * modification, are permitted provided that the following conditions
|
13 | 1 | equemene | * are met:
|
14 | 1 | equemene | *
|
15 | 1 | equemene | * 1. Redistributions of source code must retain the above copyright
|
16 | 1 | equemene | * notice, this list of conditions and the following disclaimer.
|
17 | 1 | equemene | *
|
18 | 1 | equemene | * 2. Redistributions in binary form must reproduce the above copyright
|
19 | 1 | equemene | * notice, this list of conditions, and the following disclaimer in the
|
20 | 1 | equemene | * documentation and/or other materials provided with the distribution.
|
21 | 1 | equemene | *
|
22 | 1 | equemene | * 3. All advertising materials mentioning features or use of this
|
23 | 1 | equemene | * software must display the following acknowledgement:
|
24 | 1 | equemene | * This product includes software developed at the University of
|
25 | 1 | equemene | * Tennessee, Knoxville, Innovative Computing Laboratory.
|
26 | 1 | equemene | *
|
27 | 1 | equemene | * 4. The name of the University, the name of the Laboratory, or the
|
28 | 1 | equemene | * names of its contributors may not be used to endorse or promote
|
29 | 1 | equemene | * products derived from this software without specific written
|
30 | 1 | equemene | * permission.
|
31 | 1 | equemene | *
|
32 | 1 | equemene | * -- Disclaimer:
|
33 | 1 | equemene | *
|
34 | 1 | equemene | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
35 | 1 | equemene | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
36 | 1 | equemene | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
37 | 1 | equemene | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
|
38 | 1 | equemene | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
39 | 1 | equemene | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
40 | 1 | equemene | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
41 | 1 | equemene | * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
42 | 1 | equemene | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
43 | 1 | equemene | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
44 | 1 | equemene | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
45 | 1 | equemene | * ---------------------------------------------------------------------
|
46 | 1 | equemene | */
|
47 | 1 | equemene | /*
|
48 | 1 | equemene | * Include files
|
49 | 1 | equemene | */
|
50 | 1 | equemene | #include "hpl.h" |
51 | 1 | equemene | |
52 | 1 | equemene | #ifndef HPL_dgemv
|
53 | 1 | equemene | |
54 | 1 | equemene | #ifdef HPL_CALL_VSIPL
|
55 | 1 | equemene | |
56 | 1 | equemene | #ifdef STDC_HEADERS
|
57 | 1 | equemene | static void HPL_dgemv0 |
58 | 1 | equemene | ( |
59 | 1 | equemene | const enum HPL_TRANS TRANS, |
60 | 1 | equemene | const int M, |
61 | 1 | equemene | const int N, |
62 | 1 | equemene | const double ALPHA, |
63 | 1 | equemene | const double * A, |
64 | 1 | equemene | const int LDA, |
65 | 1 | equemene | const double * X, |
66 | 1 | equemene | const int INCX, |
67 | 1 | equemene | const double BETA, |
68 | 1 | equemene | double * Y,
|
69 | 1 | equemene | const int INCY |
70 | 1 | equemene | ) |
71 | 1 | equemene | #else
|
72 | 1 | equemene | static void HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) |
73 | 1 | equemene | const enum HPL_TRANS TRANS; |
74 | 1 | equemene | const int INCX, INCY, LDA, M, N; |
75 | 1 | equemene | const double ALPHA, BETA; |
76 | 1 | equemene | const double * A, * X; |
77 | 1 | equemene | double * Y;
|
78 | 1 | equemene | #endif
|
79 | 1 | equemene | { |
80 | 1 | equemene | /*
|
81 | 1 | equemene | * .. Local Variables ..
|
82 | 1 | equemene | */
|
83 | 1 | equemene | int i, iaij, ix, iy, j, jaj, jx, jy;
|
84 | 1 | equemene | register double t0; |
85 | 1 | equemene | /* ..
|
86 | 1 | equemene | * .. Executable Statements ..
|
87 | 1 | equemene | */
|
88 | 1 | equemene | if( ( M == 0 ) || ( N == 0 ) || |
89 | 1 | equemene | ( ( ALPHA == HPL_rzero ) && ( BETA == HPL_rone ) ) ) return;
|
90 | 1 | equemene | |
91 | 1 | equemene | if( ALPHA == HPL_rzero ) { HPL_dscal( M, BETA, Y, INCY ); return; } |
92 | 1 | equemene | |
93 | 1 | equemene | if( TRANS == HplNoTrans )
|
94 | 1 | equemene | { |
95 | 1 | equemene | HPL_dscal( M, BETA, Y, INCY ); |
96 | 1 | equemene | for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX ) |
97 | 1 | equemene | { |
98 | 1 | equemene | t0 = ALPHA * X[jx]; |
99 | 1 | equemene | for( i = 0, iaij = jaj, iy = 0; i < M; i++, iaij += 1, iy += INCY ) |
100 | 1 | equemene | { Y[iy] += A[iaij] * t0; } |
101 | 1 | equemene | } |
102 | 1 | equemene | } |
103 | 1 | equemene | else
|
104 | 1 | equemene | { |
105 | 1 | equemene | for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY ) |
106 | 1 | equemene | { |
107 | 1 | equemene | t0 = HPL_rzero; |
108 | 1 | equemene | for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX ) |
109 | 1 | equemene | { t0 += A[iaij] * X[ix]; } |
110 | 1 | equemene | if( BETA == HPL_rzero ) Y[jy] = ALPHA * t0;
|
111 | 1 | equemene | else Y[jy] = BETA * Y[jy] + ALPHA * t0;
|
112 | 1 | equemene | } |
113 | 1 | equemene | } |
114 | 1 | equemene | } |
115 | 1 | equemene | #endif
|
116 | 1 | equemene | |
117 | 1 | equemene | #ifdef STDC_HEADERS
|
118 | 1 | equemene | void HPL_dgemv
|
119 | 1 | equemene | ( |
120 | 1 | equemene | const enum HPL_ORDER ORDER, |
121 | 1 | equemene | const enum HPL_TRANS TRANS, |
122 | 1 | equemene | const int M, |
123 | 1 | equemene | const int N, |
124 | 1 | equemene | const double ALPHA, |
125 | 1 | equemene | const double * A, |
126 | 1 | equemene | const int LDA, |
127 | 1 | equemene | const double * X, |
128 | 1 | equemene | const int INCX, |
129 | 1 | equemene | const double BETA, |
130 | 1 | equemene | double * Y,
|
131 | 1 | equemene | const int INCY |
132 | 1 | equemene | ) |
133 | 1 | equemene | #else
|
134 | 1 | equemene | void HPL_dgemv
|
135 | 1 | equemene | ( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) |
136 | 1 | equemene | const enum HPL_ORDER ORDER; |
137 | 1 | equemene | const enum HPL_TRANS TRANS; |
138 | 1 | equemene | const int M; |
139 | 1 | equemene | const int N; |
140 | 1 | equemene | const double ALPHA; |
141 | 1 | equemene | const double * A; |
142 | 1 | equemene | const int LDA; |
143 | 1 | equemene | const double * X; |
144 | 1 | equemene | const int INCX; |
145 | 1 | equemene | const double BETA; |
146 | 1 | equemene | double * Y;
|
147 | 1 | equemene | const int INCY; |
148 | 1 | equemene | #endif
|
149 | 1 | equemene | { |
150 | 1 | equemene | /*
|
151 | 1 | equemene | * Purpose
|
152 | 1 | equemene | * =======
|
153 | 1 | equemene | *
|
154 | 1 | equemene | * HPL_dgemv performs one of the matrix-vector operations
|
155 | 1 | equemene | *
|
156 | 1 | equemene | * y := alpha * op( A ) * x + beta * y,
|
157 | 1 | equemene | *
|
158 | 1 | equemene | * where op( X ) is one of
|
159 | 1 | equemene | *
|
160 | 1 | equemene | * op( X ) = X or op( X ) = X^T.
|
161 | 1 | equemene | *
|
162 | 1 | equemene | * where alpha and beta are scalars, x and y are vectors and A is an m
|
163 | 1 | equemene | * by n matrix.
|
164 | 1 | equemene | *
|
165 | 1 | equemene | * Arguments
|
166 | 1 | equemene | * =========
|
167 | 1 | equemene | *
|
168 | 1 | equemene | * ORDER (local input) const enum HPL_ORDER
|
169 | 1 | equemene | * On entry, ORDER specifies the storage format of the operands
|
170 | 1 | equemene | * as follows:
|
171 | 1 | equemene | * ORDER = HplRowMajor,
|
172 | 1 | equemene | * ORDER = HplColumnMajor.
|
173 | 1 | equemene | *
|
174 | 1 | equemene | * TRANS (local input) const enum HPL_TRANS
|
175 | 1 | equemene | * On entry, TRANS specifies the operation to be performed as
|
176 | 1 | equemene | * follows:
|
177 | 1 | equemene | * TRANS = HplNoTrans y := alpha*A *x + beta*y,
|
178 | 1 | equemene | * TRANS = HplTrans y := alpha*A^T*x + beta*y.
|
179 | 1 | equemene | *
|
180 | 1 | equemene | * M (local input) const int
|
181 | 1 | equemene | * On entry, M specifies the number of rows of the matrix A.
|
182 | 1 | equemene | * M must be at least zero.
|
183 | 1 | equemene | *
|
184 | 1 | equemene | * N (local input) const int
|
185 | 1 | equemene | * On entry, N specifies the number of columns of the matrix A.
|
186 | 1 | equemene | * N must be at least zero.
|
187 | 1 | equemene | *
|
188 | 1 | equemene | * ALPHA (local input) const double
|
189 | 1 | equemene | * On entry, ALPHA specifies the scalar alpha. When ALPHA is
|
190 | 1 | equemene | * supplied as zero then A and X need not be set on input.
|
191 | 1 | equemene | *
|
192 | 1 | equemene | * A (local input) const double *
|
193 | 1 | equemene | * On entry, A points to an array of size equal to or greater
|
194 | 1 | equemene | * than LDA * n. Before entry, the leading m by n part of the
|
195 | 1 | equemene | * array A must contain the matrix coefficients.
|
196 | 1 | equemene | *
|
197 | 1 | equemene | * LDA (local input) const int
|
198 | 1 | equemene | * On entry, LDA specifies the leading dimension of A as
|
199 | 1 | equemene | * declared in the calling (sub) program. LDA must be at
|
200 | 1 | equemene | * least MAX(1,m).
|
201 | 1 | equemene | *
|
202 | 1 | equemene | * X (local input) const double *
|
203 | 1 | equemene | * On entry, X is an incremented array of dimension at least
|
204 | 1 | equemene | * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x.
|
205 | 1 | equemene | *
|
206 | 1 | equemene | * INCX (local input) const int
|
207 | 1 | equemene | * On entry, INCX specifies the increment for the elements of X.
|
208 | 1 | equemene | * INCX must not be zero.
|
209 | 1 | equemene | *
|
210 | 1 | equemene | * BETA (local input) const double
|
211 | 1 | equemene | * On entry, BETA specifies the scalar beta. When ALPHA is
|
212 | 1 | equemene | * supplied as zero then Y need not be set on input.
|
213 | 1 | equemene | *
|
214 | 1 | equemene | * Y (local input/output) double *
|
215 | 1 | equemene | * On entry, Y is an incremented array of dimension at least
|
216 | 1 | equemene | * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y.
|
217 | 1 | equemene | * Before entry with BETA non-zero, the incremented array Y must
|
218 | 1 | equemene | * contain the vector y. On exit, Y is overwritten by the
|
219 | 1 | equemene | * updated vector y.
|
220 | 1 | equemene | *
|
221 | 1 | equemene | * INCY (local input) const int
|
222 | 1 | equemene | * On entry, INCY specifies the increment for the elements of Y.
|
223 | 1 | equemene | * INCY must not be zero.
|
224 | 1 | equemene | *
|
225 | 1 | equemene | * ---------------------------------------------------------------------
|
226 | 1 | equemene | */
|
227 | 1 | equemene | #ifdef HPL_CALL_CBLAS
|
228 | 1 | equemene | cblas_dgemv( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); |
229 | 1 | equemene | #endif
|
230 | 1 | equemene | #ifdef HPL_CALL_VSIPL
|
231 | 1 | equemene | if( ORDER == HplColumnMajor )
|
232 | 1 | equemene | { |
233 | 1 | equemene | HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); |
234 | 1 | equemene | } |
235 | 1 | equemene | else
|
236 | 1 | equemene | { |
237 | 1 | equemene | HPL_dgemv0( ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ), |
238 | 1 | equemene | N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); |
239 | 1 | equemene | } |
240 | 1 | equemene | #endif
|
241 | 9 | equemene | |
242 | 9 | equemene | |
243 | 1 | equemene | #ifdef HPL_CALL_FBLAS
|
244 | 1 | equemene | double alpha = ALPHA, beta = BETA;
|
245 | 1 | equemene | #ifdef StringSunStyle
|
246 | 1 | equemene | #ifdef HPL_USE_F77_INTEGER_DEF
|
247 | 1 | equemene | F77_INTEGER IONE = 1;
|
248 | 1 | equemene | #else
|
249 | 1 | equemene | int IONE = 1; |
250 | 1 | equemene | #endif
|
251 | 1 | equemene | #endif
|
252 | 1 | equemene | #ifdef StringStructVal
|
253 | 1 | equemene | F77_CHAR ftran; |
254 | 1 | equemene | #endif
|
255 | 1 | equemene | #ifdef StringStructPtr
|
256 | 1 | equemene | F77_CHAR ftran; |
257 | 1 | equemene | #endif
|
258 | 1 | equemene | #ifdef StringCrayStyle
|
259 | 1 | equemene | F77_CHAR ftran; |
260 | 1 | equemene | #endif
|
261 | 1 | equemene | |
262 | 1 | equemene | #ifdef HPL_USE_F77_INTEGER_DEF
|
263 | 1 | equemene | const F77_INTEGER F77M = M, F77N = N,
|
264 | 1 | equemene | F77lda = LDA, F77incx = INCX, F77incy = INCY; |
265 | 1 | equemene | #else
|
266 | 1 | equemene | #define F77M M
|
267 | 1 | equemene | #define F77N N
|
268 | 1 | equemene | #define F77lda LDA
|
269 | 1 | equemene | #define F77incx INCX
|
270 | 1 | equemene | #define F77incy INCY
|
271 | 1 | equemene | #endif
|
272 | 1 | equemene | char ctran;
|
273 | 1 | equemene | |
274 | 1 | equemene | if( ORDER == HplColumnMajor )
|
275 | 1 | equemene | { |
276 | 1 | equemene | ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); |
277 | 1 | equemene | |
278 | 1 | equemene | #ifdef StringSunStyle
|
279 | 1 | equemene | F77dgemv( &ctran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, |
280 | 1 | equemene | &beta, Y, &F77incy, IONE ); |
281 | 1 | equemene | #endif
|
282 | 1 | equemene | #ifdef StringCrayStyle
|
283 | 1 | equemene | ftran = HPL_C2F_CHAR( ctran ); |
284 | 1 | equemene | F77dgemv( ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, |
285 | 1 | equemene | &beta, Y, &F77incy ); |
286 | 1 | equemene | #endif
|
287 | 1 | equemene | #ifdef StringStructVal
|
288 | 1 | equemene | ftran.len = 1; ftran.cp = &ctran;
|
289 | 1 | equemene | F77dgemv( ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, |
290 | 1 | equemene | &beta, Y, &F77incy ); |
291 | 1 | equemene | #endif
|
292 | 1 | equemene | #ifdef StringStructPtr
|
293 | 1 | equemene | ftran.len = 1; ftran.cp = &ctran;
|
294 | 1 | equemene | F77dgemv( &ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, |
295 | 1 | equemene | &beta, Y, &F77incy ); |
296 | 1 | equemene | #endif
|
297 | 1 | equemene | } |
298 | 1 | equemene | else
|
299 | 1 | equemene | { |
300 | 1 | equemene | ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); |
301 | 1 | equemene | #ifdef StringSunStyle
|
302 | 1 | equemene | F77dgemv( &ctran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, |
303 | 1 | equemene | &beta, Y, &F77incy, IONE ); |
304 | 1 | equemene | #endif
|
305 | 1 | equemene | #ifdef StringCrayStyle
|
306 | 1 | equemene | ftran = HPL_C2F_CHAR( ctran ); |
307 | 1 | equemene | F77dgemv( ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, |
308 | 1 | equemene | &beta, Y, &F77incy ); |
309 | 1 | equemene | #endif
|
310 | 1 | equemene | #ifdef StringStructVal
|
311 | 1 | equemene | ftran.len = 1; ftran.cp = &ctran;
|
312 | 1 | equemene | F77dgemv( ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, |
313 | 1 | equemene | &beta, Y, &F77incy ); |
314 | 1 | equemene | #endif
|
315 | 1 | equemene | #ifdef StringStructPtr
|
316 | 1 | equemene | ftran.len = 1; ftran.cp = &ctran;
|
317 | 1 | equemene | F77dgemv( &ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, |
318 | 1 | equemene | &beta, Y, &F77incy ); |
319 | 1 | equemene | #endif
|
320 | 1 | equemene | } |
321 | 1 | equemene | |
322 | 1 | equemene | #endif
|
323 | 9 | equemene | |
324 | 9 | equemene | #ifdef HPL_CALL_CUBLAS
|
325 | 9 | equemene | double alpha = ALPHA, beta = BETA;
|
326 | 9 | equemene | |
327 | 9 | equemene | int IONE = 1; |
328 | 9 | equemene | |
329 | 9 | equemene | #define CUBLASM M
|
330 | 9 | equemene | #define CUBLASN N
|
331 | 9 | equemene | #define CUBLASlda LDA
|
332 | 9 | equemene | #define CUBLASincx INCX
|
333 | 9 | equemene | #define CUBLASincy INCY
|
334 | 9 | equemene | |
335 | 9 | equemene | char ctran;
|
336 | 9 | equemene | |
337 | 9 | equemene | if( ORDER == HplColumnMajor )
|
338 | 9 | equemene | { |
339 | 9 | equemene | ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); |
340 | 9 | equemene | |
341 | 9 | equemene | CUBLAS_DGEMV( &ctran, &CUBLASM, &CUBLASN, |
342 | 9 | equemene | &alpha, A, &CUBLASlda, X, &CUBLASincx, |
343 | 9 | equemene | &beta, Y, &CUBLASincy, IONE ); |
344 | 9 | equemene | } |
345 | 9 | equemene | else
|
346 | 9 | equemene | { |
347 | 9 | equemene | ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); |
348 | 9 | equemene | |
349 | 9 | equemene | CUBLAS_DGEMV( &ctran, &CUBLASN, &CUBLASM, |
350 | 9 | equemene | &alpha, A, &CUBLASlda, X, &CUBLASincx, |
351 | 9 | equemene | &beta, Y, &CUBLASincy, IONE ); |
352 | 9 | equemene | } |
353 | 9 | equemene | |
354 | 9 | equemene | #endif
|
355 | 1 | equemene | /*
|
356 | 1 | equemene | * End of HPL_dgemv
|
357 | 1 | equemene | */
|
358 | 1 | equemene | } |
359 | 1 | equemene | |
360 | 1 | equemene | #endif |