root / src / blas / HPL_daxpy.c @ 9
Historique | Voir | Annoter | Télécharger (3,87 ko)
1 |
/*
|
---|---|
2 |
* Include files
|
3 |
*/
|
4 |
#include "hpl.h" |
5 |
|
6 |
#ifndef HPL_daxpy
|
7 |
|
8 |
#ifdef STDC_HEADERS
|
9 |
void HPL_daxpy
|
10 |
( |
11 |
const int N, |
12 |
const double ALPHA, |
13 |
const double * X, |
14 |
const int INCX, |
15 |
double * Y,
|
16 |
const int INCY |
17 |
) |
18 |
#else
|
19 |
void HPL_daxpy
|
20 |
( N, ALPHA, X, INCX, Y, INCY ) |
21 |
const int N; |
22 |
const double ALPHA; |
23 |
const double * X; |
24 |
const int INCX; |
25 |
double * Y;
|
26 |
const int INCY; |
27 |
#endif
|
28 |
{ |
29 |
/*
|
30 |
* Purpose
|
31 |
* =======
|
32 |
*
|
33 |
* HPL_daxpy scales the vector x by alpha and adds it to y.
|
34 |
*
|
35 |
*
|
36 |
* Arguments
|
37 |
* =========
|
38 |
*
|
39 |
* N (local input) const int
|
40 |
* On entry, N specifies the length of the vectors x and y. N
|
41 |
* must be at least zero.
|
42 |
*
|
43 |
* ALPHA (local input) const double
|
44 |
* On entry, ALPHA specifies the scalar alpha. When ALPHA is
|
45 |
* supplied as zero, then the entries of the incremented array X
|
46 |
* need not be set on input.
|
47 |
*
|
48 |
* X (local input) const double *
|
49 |
* On entry, X is an incremented array of dimension at least
|
50 |
* ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x.
|
51 |
*
|
52 |
* INCX (local input) const int
|
53 |
* On entry, INCX specifies the increment for the elements of X.
|
54 |
* INCX must not be zero.
|
55 |
*
|
56 |
* Y (local input/output) double *
|
57 |
* On entry, Y is an incremented array of dimension at least
|
58 |
* ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y.
|
59 |
* On exit, the entries of the incremented array Y are updated
|
60 |
* with the scaled entries of the incremented array X.
|
61 |
*
|
62 |
* INCY (local input) const int
|
63 |
* On entry, INCY specifies the increment for the elements of Y.
|
64 |
* INCY must not be zero.
|
65 |
*
|
66 |
* ---------------------------------------------------------------------
|
67 |
*/
|
68 |
#ifdef HPL_CALL_CBLAS
|
69 |
cblas_daxpy( N, ALPHA, X, INCX, Y, INCY ); |
70 |
#endif
|
71 |
#ifdef HPL_CALL_VSIPL
|
72 |
register const double alpha = ALPHA; |
73 |
register double x0, x1, x2, x3, y0, y1, y2, y3; |
74 |
const double * StX; |
75 |
register int i; |
76 |
int nu;
|
77 |
const int incX2 = 2 * INCX, incY2 = 2 * INCY, |
78 |
incX3 = 3 * INCX, incY3 = 3 * INCY, |
79 |
incX4 = 4 * INCX, incY4 = 4 * INCY; |
80 |
|
81 |
if( ( N > 0 ) && ( alpha != HPL_rzero ) ) |
82 |
{ |
83 |
if( ( nu = ( N >> 2 ) << 2 ) != 0 ) |
84 |
{ |
85 |
StX = X + nu * INCX; |
86 |
|
87 |
do
|
88 |
{ |
89 |
x0 = (*X); y0 = (*Y); x1 = X[INCX ]; y1 = Y[INCY ]; |
90 |
x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3]; |
91 |
|
92 |
*Y = y0 + alpha * x0; Y[INCY ] = y1 + alpha * x1; |
93 |
Y[incY2] = y2 + alpha * x2; Y[incY3] = y3 + alpha * x3; |
94 |
|
95 |
X += incX4; |
96 |
Y += incY4; |
97 |
|
98 |
} while( X != StX );
|
99 |
} |
100 |
|
101 |
for( i = N - nu; i != 0; i-- ) |
102 |
{ |
103 |
x0 = (*X); |
104 |
y0 = (*Y); |
105 |
|
106 |
*Y = y0 + alpha * x0; |
107 |
|
108 |
X += INCX; |
109 |
Y += INCY; |
110 |
} |
111 |
} |
112 |
#endif
|
113 |
|
114 |
#ifdef HPL_CALL_FBLAS
|
115 |
double alpha = ALPHA;
|
116 |
#ifdef HPL_USE_F77_INTEGER_DEF
|
117 |
const F77_INTEGER F77N = N, F77incx = INCX, F77incy = INCY;
|
118 |
#else
|
119 |
#define F77N N
|
120 |
#define F77incx INCX
|
121 |
#define F77incy INCY
|
122 |
#endif
|
123 |
F77daxpy( &F77N, &alpha, X, &F77incx, Y, &F77incy ); |
124 |
#endif
|
125 |
|
126 |
#ifdef HPL_CALL_CUBLAS
|
127 |
double alpha = ALPHA;
|
128 |
#define CUBLASN N
|
129 |
#define CUBLASincx INCX
|
130 |
#define CUBLASincy INCY
|
131 |
CUBLAS_DAXPY( &CUBLASN, &alpha, X, &CUBLASincx, Y, &CUBLASincy ); |
132 |
#endif
|
133 |
/*
|
134 |
* End of HPL_daxpy
|
135 |
*/
|
136 |
} |
137 |
|
138 |
#endif
|