Statistiques
| Révision :

root / src / pauxil / HPL_dlaswp05T.c @ 1

Historique | Voir | Annoter | Télécharger (8,06 ko)

1
/* 
2
 * -- High Performance Computing Linpack Benchmark (HPL)                
3
 *    HPL - 2.0 - September 10, 2008                          
4
 *    Antoine P. Petitet                                                
5
 *    University of Tennessee, Knoxville                                
6
 *    Innovative Computing Laboratory                                 
7
 *    (C) Copyright 2000-2008 All Rights Reserved                       
8
 *                                                                      
9
 * -- Copyright notice and Licensing terms:                             
10
 *                                                                      
11
 * Redistribution  and  use in  source and binary forms, with or without
12
 * modification, are  permitted provided  that the following  conditions
13
 * are met:                                                             
14
 *                                                                      
15
 * 1. Redistributions  of  source  code  must retain the above copyright
16
 * notice, this list of conditions and the following disclaimer.        
17
 *                                                                      
18
 * 2. Redistributions in binary form must reproduce  the above copyright
19
 * notice, this list of conditions,  and the following disclaimer in the
20
 * documentation and/or other materials provided with the distribution. 
21
 *                                                                      
22
 * 3. All  advertising  materials  mentioning  features  or  use of this
23
 * software must display the following acknowledgement:                 
24
 * This  product  includes  software  developed  at  the  University  of
25
 * Tennessee, Knoxville, Innovative Computing Laboratory.             
26
 *                                                                      
27
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28
 * names  of  its  contributors  may  not  be used to endorse or promote
29
 * products  derived   from   this  software  without  specific  written
30
 * permission.                                                          
31
 *                                                                      
32
 * -- Disclaimer:                                                       
33
 *                                                                      
34
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
45
 * ---------------------------------------------------------------------
46
 */ 
47
/*
48
 * Include files
49
 */
50
#include "hpl.h"
51
/*
52
 * Define default value for unrolling factor
53
 */
54
#ifndef HPL_LASWP05T_DEPTH
55
#define    HPL_LASWP05T_DEPTH       32
56
#define    HPL_LASWP05T_LOG2_DEPTH   5
57
#endif
58

    
59
#ifdef STDC_HEADERS
60
void HPL_dlaswp05T
61
(
62
   const int                        M,
63
   const int                        N,
64
   double *                         A,
65
   const int                        LDA,
66
   const double *                   U,
67
   const int                        LDU,
68
   const int *                      LINDXA,
69
   const int *                      LINDXAU
70
)
71
#else
72
void HPL_dlaswp05T
73
( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
74
   const int                        M;
75
   const int                        N;
76
   double *                         A;
77
   const int                        LDA;
78
   const double *                   U;
79
   const int                        LDU;
80
   const int *                      LINDXA;
81
   const int *                      LINDXAU;
82
#endif
83
{
84
/* 
85
 * Purpose
86
 * =======
87
 *
88
 * HPL_dlaswp05T copies columns of  U of global offset LINDXAU into rows
89
 * of A at positions indicated by LINDXA.
90
 *
91
 * Arguments
92
 * =========
93
 *
94
 * M       (local input)                 const int
95
 *         On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
96
 *
97
 * N       (local input)                 const int
98
 *         On entry, N specifies the length of the columns of U that will
99
 *         be copied into rows of A. N must be at least zero.
100
 *
101
 * A       (local output)                double *
102
 *         On entry, A points to an array of dimension (LDA,N). On exit,
103
 *         the  rows of this array specified by  LINDXA  are replaced by
104
 *         columns of U indicated by LINDXAU.
105
 *
106
 * LDA     (local input)                 const int
107
 *         On entry, LDA specifies the leading dimension of the array A.
108
 *         LDA must be at least MAX(1,M).
109
 *
110
 * U       (local input/output)          const double *
111
 *         On entry,  U  points  to an array of dimension (LDU,*).  This
112
 *         array contains the columns that are to be copied into rows of
113
 *         A.
114
 *
115
 * LDU     (local input)                 const int
116
 *         On entry, LDU specifies the leading dimension of the array U.
117
 *         LDU must be at least MAX(1,N).
118
 *
119
 * LINDXA  (local input)                 const int *
120
 *         On entry, LINDXA is an array of dimension M that contains the
121
 *         local row indexes of A that should be copied from U.
122
 *
123
 * LINDXAU (local input)                 const int *
124
 *         On entry, LINDXAU  is an array of dimension  M that  contains
125
 *         the local column indexes of U that should be copied in A.
126
 *
127
 * ---------------------------------------------------------------------
128
 */ 
129
/*
130
 * .. Local Variables ..
131
 */
132
   const double               * U0 = U, * u0;
133
   double                     * a0;
134
   const int                  incA = (int)( (unsigned int)(LDA) <<
135
                                            HPL_LASWP05T_LOG2_DEPTH ),
136
                              incU = ( 1 << HPL_LASWP05T_LOG2_DEPTH );
137
   int                        nr, nu;
138
   register int               i, j;
139
/* ..
140
 * .. Executable Statements ..
141
 */
142
   if( ( M <= 0 ) || ( N <= 0 ) ) return;
143

    
144
   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05T_LOG2_DEPTH ) <<
145
                            HPL_LASWP05T_LOG2_DEPTH ) );
146

    
147
   for( j = 0; j < nu; j += HPL_LASWP05T_DEPTH, A += incA, U0 += incU )
148
   {
149
      for( i = 0; i < M; i++ )
150
      {
151
         a0 = A  + (size_t)(LINDXA[ i]);
152
         u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU);
153

    
154
         *a0 = u0[ 0]; a0 += LDA;
155
#if ( HPL_LASWP05T_DEPTH >  1 )
156
         *a0 = u0[ 1]; a0 += LDA;
157
#endif
158
#if ( HPL_LASWP05T_DEPTH >  2 )
159
         *a0 = u0[ 2]; a0 += LDA; *a0 = u0[ 3]; a0 += LDA;
160
#endif
161
#if ( HPL_LASWP05T_DEPTH >  4 )
162
         *a0 = u0[ 4]; a0 += LDA; *a0 = u0[ 5]; a0 += LDA;
163
         *a0 = u0[ 6]; a0 += LDA; *a0 = u0[ 7]; a0 += LDA;
164
#endif
165
#if ( HPL_LASWP05T_DEPTH >  8 )
166
         *a0 = u0[ 8]; a0 += LDA; *a0 = u0[ 9]; a0 += LDA;
167
         *a0 = u0[10]; a0 += LDA; *a0 = u0[11]; a0 += LDA;
168
         *a0 = u0[12]; a0 += LDA; *a0 = u0[13]; a0 += LDA;
169
         *a0 = u0[14]; a0 += LDA; *a0 = u0[15]; a0 += LDA;
170
#endif
171
#if ( HPL_LASWP05T_DEPTH > 16 )
172
         *a0 = u0[16]; a0 += LDA; *a0 = u0[17]; a0 += LDA;
173
         *a0 = u0[18]; a0 += LDA; *a0 = u0[19]; a0 += LDA;
174
         *a0 = u0[20]; a0 += LDA; *a0 = u0[21]; a0 += LDA;
175
         *a0 = u0[22]; a0 += LDA; *a0 = u0[23]; a0 += LDA;
176
         *a0 = u0[24]; a0 += LDA; *a0 = u0[25]; a0 += LDA;
177
         *a0 = u0[26]; a0 += LDA; *a0 = u0[27]; a0 += LDA;
178
         *a0 = u0[28]; a0 += LDA; *a0 = u0[29]; a0 += LDA;
179
         *a0 = u0[30]; a0 += LDA; *a0 = u0[31]; a0 += LDA;
180
#endif
181
      }
182
   }
183

    
184
   if( nr > 0 )
185
   {
186
      for( i = 0; i < M; i++ )
187
      {
188
         a0 = A  + (size_t)(LINDXA[ i]);
189
         u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU);
190
         for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; }
191
      }
192
   }
193
/*
194
 * End of HPL_dlaswp05T
195
 */
196
}