Statistiques
| Révision :

root / src / blas / HPL_dcopy.c @ 12

Historique | Voir | Annoter | Télécharger (6,96 ko)

1 1 equemene
/*
2 1 equemene
 * -- High Performance Computing Linpack Benchmark (HPL)
3 1 equemene
 *    HPL - 2.0 - September 10, 2008
4 1 equemene
 *    Antoine P. Petitet
5 1 equemene
 *    University of Tennessee, Knoxville
6 1 equemene
 *    Innovative Computing Laboratory
7 1 equemene
 *    (C) Copyright 2000-2008 All Rights Reserved
8 1 equemene
 *
9 1 equemene
 * -- Copyright notice and Licensing terms:
10 1 equemene
 *
11 1 equemene
 * Redistribution  and  use in  source and binary forms, with or without
12 1 equemene
 * modification, are  permitted provided  that the following  conditions
13 1 equemene
 * are met:
14 1 equemene
 *
15 1 equemene
 * 1. Redistributions  of  source  code  must retain the above copyright
16 1 equemene
 * notice, this list of conditions and the following disclaimer.
17 1 equemene
 *
18 1 equemene
 * 2. Redistributions in binary form must reproduce  the above copyright
19 1 equemene
 * notice, this list of conditions,  and the following disclaimer in the
20 1 equemene
 * documentation and/or other materials provided with the distribution.
21 1 equemene
 *
22 1 equemene
 * 3. All  advertising  materials  mentioning  features  or  use of this
23 1 equemene
 * software must display the following acknowledgement:
24 1 equemene
 * This  product  includes  software  developed  at  the  University  of
25 1 equemene
 * Tennessee, Knoxville, Innovative Computing Laboratory.
26 1 equemene
 *
27 1 equemene
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28 1 equemene
 * names  of  its  contributors  may  not  be used to endorse or promote
29 1 equemene
 * products  derived   from   this  software  without  specific  written
30 1 equemene
 * permission.
31 1 equemene
 *
32 1 equemene
 * -- Disclaimer:
33 1 equemene
 *
34 1 equemene
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 1 equemene
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36 1 equemene
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 1 equemene
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38 1 equemene
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39 1 equemene
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40 1 equemene
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 1 equemene
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42 1 equemene
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43 1 equemene
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 1 equemene
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 1 equemene
 * ---------------------------------------------------------------------
46 1 equemene
 */
47 1 equemene
/*
48 1 equemene
 * Include files
49 1 equemene
 */
50 1 equemene
#include "hpl.h"
51 1 equemene
52 1 equemene
#ifndef HPL_dcopy
53 1 equemene
54 1 equemene
#ifdef STDC_HEADERS
55 1 equemene
void HPL_dcopy
56 1 equemene
(
57 1 equemene
   const int                        N,
58 1 equemene
   const double *                   X,
59 1 equemene
   const int                        INCX,
60 1 equemene
   double *                         Y,
61 1 equemene
   const int                        INCY
62 1 equemene
)
63 1 equemene
#else
64 1 equemene
void HPL_dcopy
65 1 equemene
( N, X, INCX, Y, INCY )
66 1 equemene
   const int                        N;
67 1 equemene
   const double *                   X;
68 1 equemene
   const int                        INCX;
69 1 equemene
   double *                         Y;
70 1 equemene
   const int                        INCY;
71 1 equemene
#endif
72 1 equemene
{
73 1 equemene
/*
74 1 equemene
 * Purpose
75 1 equemene
 * =======
76 1 equemene
 *
77 1 equemene
 * HPL_dcopy copies the vector x into the vector y.
78 1 equemene
 *
79 1 equemene
 *
80 1 equemene
 * Arguments
81 1 equemene
 * =========
82 1 equemene
 *
83 1 equemene
 * N       (local input)                 const int
84 1 equemene
 *         On entry, N specifies the length of the vectors  x  and  y. N
85 1 equemene
 *         must be at least zero.
86 1 equemene
 *
87 1 equemene
 * X       (local input)                 const double *
88 1 equemene
 *         On entry,  X  is an incremented array of dimension  at  least
89 1 equemene
 *         ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
90 1 equemene
 *
91 1 equemene
 * INCX    (local input)                 const int
92 1 equemene
 *         On entry, INCX specifies the increment for the elements of X.
93 1 equemene
 *         INCX must not be zero.
94 1 equemene
 *
95 1 equemene
 * Y       (local input/output)          double *
96 1 equemene
 *         On entry,  Y  is an incremented array of dimension  at  least
97 1 equemene
 *         ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
98 1 equemene
 *         On exit, the entries of the incremented array  Y  are updated
99 1 equemene
 *         with the entries of the incremented array X.
100 1 equemene
 *
101 1 equemene
 * INCY    (local input)                 const int
102 1 equemene
 *         On entry, INCY specifies the increment for the elements of Y.
103 1 equemene
 *         INCY must not be zero.
104 1 equemene
 *
105 1 equemene
 * ---------------------------------------------------------------------
106 1 equemene
 */
107 1 equemene
#ifdef HPL_CALL_CBLAS
108 1 equemene
   cblas_dcopy( N, X, INCX, Y, INCY );
109 1 equemene
#endif
110 12 equemene
#ifdef HPL_CALL_GSLCBLAS
111 12 equemene
   cblas_dcopy( N, X, INCX, Y, INCY );
112 12 equemene
#endif
113 1 equemene
#ifdef HPL_CALL_VSIPL
114 1 equemene
   register double           x0, x1, x2, x3, x4, x5, x6, x7;
115 1 equemene
   const double              * StX;
116 1 equemene
   register int              i;
117 1 equemene
   int                       nu;
118 1 equemene
   const int                 incX2 = 2 * INCX, incY2 = 2 * INCY,
119 1 equemene
                             incX3 = 3 * INCX, incY3 = 3 * INCY,
120 1 equemene
                             incX4 = 4 * INCX, incY4 = 4 * INCY,
121 1 equemene
                             incX5 = 5 * INCX, incY5 = 5 * INCY,
122 1 equemene
                             incX6 = 6 * INCX, incY6 = 6 * INCY,
123 1 equemene
                             incX7 = 7 * INCX, incY7 = 7 * INCY,
124 1 equemene
                             incX8 = 8 * INCX, incY8 = 8 * INCY;
125 1 equemene
126 1 equemene
   if( N > 0 )
127 1 equemene
   {
128 1 equemene
      if( ( nu = ( N >> 3 ) << 3 ) != 0 )
129 1 equemene
      {
130 1 equemene
         StX = X + nu * INCX;
131 1 equemene
132 1 equemene
         do
133 1 equemene
         {
134 1 equemene
            x0 = (*X);     x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5];
135 1 equemene
            x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7];
136 1 equemene
137 1 equemene
            *Y       = x0; Y[incY4] = x4; Y[INCY ] = x1; Y[incY5] = x5;
138 1 equemene
            Y[incY2] = x2; Y[incY6] = x6; Y[incY3] = x3; Y[incY7] = x7;
139 1 equemene
140 1 equemene
            X  += incX8;
141 1 equemene
            Y  += incY8;
142 1 equemene
143 1 equemene
         } while( X != StX );
144 1 equemene
      }
145 1 equemene
146 1 equemene
      for( i = N - nu; i != 0; i-- )
147 1 equemene
      {
148 1 equemene
         x0  = (*X);
149 1 equemene
         *Y  = x0;
150 1 equemene
151 1 equemene
         X  += INCX;
152 1 equemene
         Y  += INCY;
153 1 equemene
      }
154 1 equemene
   }
155 1 equemene
#endif
156 12 equemene
157 1 equemene
#ifdef HPL_CALL_FBLAS
158 1 equemene
#ifdef HPL_USE_F77_INTEGER_DEF
159 1 equemene
   const F77_INTEGER         F77N = N, F77incx = INCX, F77incy = INCY;
160 1 equemene
#else
161 1 equemene
#define F77N                 N
162 1 equemene
#define F77incx              INCX
163 1 equemene
#define F77incy              INCY
164 1 equemene
#endif
165 1 equemene
   F77dcopy( &F77N, X, &F77incx, Y, &F77incy );
166 1 equemene
#endif
167 9 equemene
168 9 equemene
#ifdef HPL_CALL_CUBLAS
169 9 equemene
170 9 equemene
#define CUBLASN                 N
171 9 equemene
#define CUBLASincx              INCX
172 9 equemene
#define CUBLASincy              INCY
173 9 equemene
174 9 equemene
   CUBLAS_DCOPY( &CUBLASN, X, &CUBLASincx, Y, &CUBLASincy );
175 9 equemene
#endif
176 10 equemene
177 10 equemene
#ifdef HPL_CALL_ACML
178 10 equemene
179 10 equemene
#define ACMLN                 N
180 10 equemene
#define ACMLincx              INCX
181 10 equemene
#define ACMLincy              INCY
182 10 equemene
183 10 equemene
   dcopy_( &ACMLN, X, &ACMLincx, Y, &ACMLincy );
184 10 equemene
#endif
185 1 equemene
/*
186 1 equemene
 * End of HPL_dcopy
187 1 equemene
 */
188 1 equemene
}
189 1 equemene
190 1 equemene
#endif