Statistiques
| Révision :

root / src / pauxil / HPL_dlaswp05T.c @ 1

Historique | Voir | Annoter | Télécharger (8,06 ko)

1 1 equemene
/*
2 1 equemene
 * -- High Performance Computing Linpack Benchmark (HPL)
3 1 equemene
 *    HPL - 2.0 - September 10, 2008
4 1 equemene
 *    Antoine P. Petitet
5 1 equemene
 *    University of Tennessee, Knoxville
6 1 equemene
 *    Innovative Computing Laboratory
7 1 equemene
 *    (C) Copyright 2000-2008 All Rights Reserved
8 1 equemene
 *
9 1 equemene
 * -- Copyright notice and Licensing terms:
10 1 equemene
 *
11 1 equemene
 * Redistribution  and  use in  source and binary forms, with or without
12 1 equemene
 * modification, are  permitted provided  that the following  conditions
13 1 equemene
 * are met:
14 1 equemene
 *
15 1 equemene
 * 1. Redistributions  of  source  code  must retain the above copyright
16 1 equemene
 * notice, this list of conditions and the following disclaimer.
17 1 equemene
 *
18 1 equemene
 * 2. Redistributions in binary form must reproduce  the above copyright
19 1 equemene
 * notice, this list of conditions,  and the following disclaimer in the
20 1 equemene
 * documentation and/or other materials provided with the distribution.
21 1 equemene
 *
22 1 equemene
 * 3. All  advertising  materials  mentioning  features  or  use of this
23 1 equemene
 * software must display the following acknowledgement:
24 1 equemene
 * This  product  includes  software  developed  at  the  University  of
25 1 equemene
 * Tennessee, Knoxville, Innovative Computing Laboratory.
26 1 equemene
 *
27 1 equemene
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28 1 equemene
 * names  of  its  contributors  may  not  be used to endorse or promote
29 1 equemene
 * products  derived   from   this  software  without  specific  written
30 1 equemene
 * permission.
31 1 equemene
 *
32 1 equemene
 * -- Disclaimer:
33 1 equemene
 *
34 1 equemene
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 1 equemene
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36 1 equemene
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 1 equemene
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38 1 equemene
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39 1 equemene
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40 1 equemene
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 1 equemene
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42 1 equemene
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43 1 equemene
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 1 equemene
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 1 equemene
 * ---------------------------------------------------------------------
46 1 equemene
 */
47 1 equemene
/*
48 1 equemene
 * Include files
49 1 equemene
 */
50 1 equemene
#include "hpl.h"
51 1 equemene
/*
52 1 equemene
 * Define default value for unrolling factor
53 1 equemene
 */
54 1 equemene
#ifndef HPL_LASWP05T_DEPTH
55 1 equemene
#define    HPL_LASWP05T_DEPTH       32
56 1 equemene
#define    HPL_LASWP05T_LOG2_DEPTH   5
57 1 equemene
#endif
58 1 equemene
59 1 equemene
#ifdef STDC_HEADERS
60 1 equemene
void HPL_dlaswp05T
61 1 equemene
(
62 1 equemene
   const int                        M,
63 1 equemene
   const int                        N,
64 1 equemene
   double *                         A,
65 1 equemene
   const int                        LDA,
66 1 equemene
   const double *                   U,
67 1 equemene
   const int                        LDU,
68 1 equemene
   const int *                      LINDXA,
69 1 equemene
   const int *                      LINDXAU
70 1 equemene
)
71 1 equemene
#else
72 1 equemene
void HPL_dlaswp05T
73 1 equemene
( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
74 1 equemene
   const int                        M;
75 1 equemene
   const int                        N;
76 1 equemene
   double *                         A;
77 1 equemene
   const int                        LDA;
78 1 equemene
   const double *                   U;
79 1 equemene
   const int                        LDU;
80 1 equemene
   const int *                      LINDXA;
81 1 equemene
   const int *                      LINDXAU;
82 1 equemene
#endif
83 1 equemene
{
84 1 equemene
/*
85 1 equemene
 * Purpose
86 1 equemene
 * =======
87 1 equemene
 *
88 1 equemene
 * HPL_dlaswp05T copies columns of  U of global offset LINDXAU into rows
89 1 equemene
 * of A at positions indicated by LINDXA.
90 1 equemene
 *
91 1 equemene
 * Arguments
92 1 equemene
 * =========
93 1 equemene
 *
94 1 equemene
 * M       (local input)                 const int
95 1 equemene
 *         On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
96 1 equemene
 *
97 1 equemene
 * N       (local input)                 const int
98 1 equemene
 *         On entry, N specifies the length of the columns of U that will
99 1 equemene
 *         be copied into rows of A. N must be at least zero.
100 1 equemene
 *
101 1 equemene
 * A       (local output)                double *
102 1 equemene
 *         On entry, A points to an array of dimension (LDA,N). On exit,
103 1 equemene
 *         the  rows of this array specified by  LINDXA  are replaced by
104 1 equemene
 *         columns of U indicated by LINDXAU.
105 1 equemene
 *
106 1 equemene
 * LDA     (local input)                 const int
107 1 equemene
 *         On entry, LDA specifies the leading dimension of the array A.
108 1 equemene
 *         LDA must be at least MAX(1,M).
109 1 equemene
 *
110 1 equemene
 * U       (local input/output)          const double *
111 1 equemene
 *         On entry,  U  points  to an array of dimension (LDU,*).  This
112 1 equemene
 *         array contains the columns that are to be copied into rows of
113 1 equemene
 *         A.
114 1 equemene
 *
115 1 equemene
 * LDU     (local input)                 const int
116 1 equemene
 *         On entry, LDU specifies the leading dimension of the array U.
117 1 equemene
 *         LDU must be at least MAX(1,N).
118 1 equemene
 *
119 1 equemene
 * LINDXA  (local input)                 const int *
120 1 equemene
 *         On entry, LINDXA is an array of dimension M that contains the
121 1 equemene
 *         local row indexes of A that should be copied from U.
122 1 equemene
 *
123 1 equemene
 * LINDXAU (local input)                 const int *
124 1 equemene
 *         On entry, LINDXAU  is an array of dimension  M that  contains
125 1 equemene
 *         the local column indexes of U that should be copied in A.
126 1 equemene
 *
127 1 equemene
 * ---------------------------------------------------------------------
128 1 equemene
 */
129 1 equemene
/*
130 1 equemene
 * .. Local Variables ..
131 1 equemene
 */
132 1 equemene
   const double               * U0 = U, * u0;
133 1 equemene
   double                     * a0;
134 1 equemene
   const int                  incA = (int)( (unsigned int)(LDA) <<
135 1 equemene
                                            HPL_LASWP05T_LOG2_DEPTH ),
136 1 equemene
                              incU = ( 1 << HPL_LASWP05T_LOG2_DEPTH );
137 1 equemene
   int                        nr, nu;
138 1 equemene
   register int               i, j;
139 1 equemene
/* ..
140 1 equemene
 * .. Executable Statements ..
141 1 equemene
 */
142 1 equemene
   if( ( M <= 0 ) || ( N <= 0 ) ) return;
143 1 equemene
144 1 equemene
   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05T_LOG2_DEPTH ) <<
145 1 equemene
                            HPL_LASWP05T_LOG2_DEPTH ) );
146 1 equemene
147 1 equemene
   for( j = 0; j < nu; j += HPL_LASWP05T_DEPTH, A += incA, U0 += incU )
148 1 equemene
   {
149 1 equemene
      for( i = 0; i < M; i++ )
150 1 equemene
      {
151 1 equemene
         a0 = A  + (size_t)(LINDXA[ i]);
152 1 equemene
         u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU);
153 1 equemene
154 1 equemene
         *a0 = u0[ 0]; a0 += LDA;
155 1 equemene
#if ( HPL_LASWP05T_DEPTH >  1 )
156 1 equemene
         *a0 = u0[ 1]; a0 += LDA;
157 1 equemene
#endif
158 1 equemene
#if ( HPL_LASWP05T_DEPTH >  2 )
159 1 equemene
         *a0 = u0[ 2]; a0 += LDA; *a0 = u0[ 3]; a0 += LDA;
160 1 equemene
#endif
161 1 equemene
#if ( HPL_LASWP05T_DEPTH >  4 )
162 1 equemene
         *a0 = u0[ 4]; a0 += LDA; *a0 = u0[ 5]; a0 += LDA;
163 1 equemene
         *a0 = u0[ 6]; a0 += LDA; *a0 = u0[ 7]; a0 += LDA;
164 1 equemene
#endif
165 1 equemene
#if ( HPL_LASWP05T_DEPTH >  8 )
166 1 equemene
         *a0 = u0[ 8]; a0 += LDA; *a0 = u0[ 9]; a0 += LDA;
167 1 equemene
         *a0 = u0[10]; a0 += LDA; *a0 = u0[11]; a0 += LDA;
168 1 equemene
         *a0 = u0[12]; a0 += LDA; *a0 = u0[13]; a0 += LDA;
169 1 equemene
         *a0 = u0[14]; a0 += LDA; *a0 = u0[15]; a0 += LDA;
170 1 equemene
#endif
171 1 equemene
#if ( HPL_LASWP05T_DEPTH > 16 )
172 1 equemene
         *a0 = u0[16]; a0 += LDA; *a0 = u0[17]; a0 += LDA;
173 1 equemene
         *a0 = u0[18]; a0 += LDA; *a0 = u0[19]; a0 += LDA;
174 1 equemene
         *a0 = u0[20]; a0 += LDA; *a0 = u0[21]; a0 += LDA;
175 1 equemene
         *a0 = u0[22]; a0 += LDA; *a0 = u0[23]; a0 += LDA;
176 1 equemene
         *a0 = u0[24]; a0 += LDA; *a0 = u0[25]; a0 += LDA;
177 1 equemene
         *a0 = u0[26]; a0 += LDA; *a0 = u0[27]; a0 += LDA;
178 1 equemene
         *a0 = u0[28]; a0 += LDA; *a0 = u0[29]; a0 += LDA;
179 1 equemene
         *a0 = u0[30]; a0 += LDA; *a0 = u0[31]; a0 += LDA;
180 1 equemene
#endif
181 1 equemene
      }
182 1 equemene
   }
183 1 equemene
184 1 equemene
   if( nr > 0 )
185 1 equemene
   {
186 1 equemene
      for( i = 0; i < M; i++ )
187 1 equemene
      {
188 1 equemene
         a0 = A  + (size_t)(LINDXA[ i]);
189 1 equemene
         u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU);
190 1 equemene
         for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; }
191 1 equemene
      }
192 1 equemene
   }
193 1 equemene
/*
194 1 equemene
 * End of HPL_dlaswp05T
195 1 equemene
 */
196 1 equemene
}