Statistiques
| Révision :

root / src / pauxil / HPL_dlaswp03T.c @ 1

Historique | Voir | Annoter | Télécharger (7,51 ko)

1
/* 
2
 * -- High Performance Computing Linpack Benchmark (HPL)                
3
 *    HPL - 2.0 - September 10, 2008                          
4
 *    Antoine P. Petitet                                                
5
 *    University of Tennessee, Knoxville                                
6
 *    Innovative Computing Laboratory                                 
7
 *    (C) Copyright 2000-2008 All Rights Reserved                       
8
 *                                                                      
9
 * -- Copyright notice and Licensing terms:                             
10
 *                                                                      
11
 * Redistribution  and  use in  source and binary forms, with or without
12
 * modification, are  permitted provided  that the following  conditions
13
 * are met:                                                             
14
 *                                                                      
15
 * 1. Redistributions  of  source  code  must retain the above copyright
16
 * notice, this list of conditions and the following disclaimer.        
17
 *                                                                      
18
 * 2. Redistributions in binary form must reproduce  the above copyright
19
 * notice, this list of conditions,  and the following disclaimer in the
20
 * documentation and/or other materials provided with the distribution. 
21
 *                                                                      
22
 * 3. All  advertising  materials  mentioning  features  or  use of this
23
 * software must display the following acknowledgement:                 
24
 * This  product  includes  software  developed  at  the  University  of
25
 * Tennessee, Knoxville, Innovative Computing Laboratory.             
26
 *                                                                      
27
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28
 * names  of  its  contributors  may  not  be used to endorse or promote
29
 * products  derived   from   this  software  without  specific  written
30
 * permission.                                                          
31
 *                                                                      
32
 * -- Disclaimer:                                                       
33
 *                                                                      
34
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
45
 * ---------------------------------------------------------------------
46
 */ 
47
/*
48
 * Include files
49
 */
50
#include "hpl.h"
51
/*
52
 * Define default value for unrolling factor
53
 */
54
#ifndef HPL_LASWP03T_DEPTH
55
#define    HPL_LASWP03T_DEPTH       32
56
#define    HPL_LASWP03T_LOG2_DEPTH   5
57
#endif
58

    
59
#ifdef STDC_HEADERS
60
void HPL_dlaswp03T
61
(
62
   const int                        M,
63
   const int                        N,
64
   double *                         U,
65
   const int                        LDU,
66
   const double *                   W0,
67
   const double *                   W,
68
   const int                        LDW
69
)
70
#else
71
void HPL_dlaswp03T
72
( M, N, U, LDU, W0, W, LDW )
73
   const int                        M;
74
   const int                        N;
75
   double *                         U;
76
   const int                        LDU;
77
   const double *                   W0;
78
   const double *                   W;
79
   const int                        LDW;
80
#endif
81
{
82
/* 
83
 * Purpose
84
 * =======
85
 *
86
 * HPL_dlaswp03T copies  columns of W into an array U.  The  destination
87
 * in U of these columns contained in W is stored within W0.
88
 *
89
 * Arguments
90
 * =========
91
 *
92
 * M       (local input)                 const int
93
 *         On entry, M  specifies  the  number  of columns of  W  stored
94
 *         contiguously that should be copied into U. M must be at least
95
 *         zero.
96
 *
97
 * N       (local input)                 const int
98
 *         On entry,  N  specifies  the  length of columns of  W  stored
99
 *         contiguously that should be copied into U. N must be at least
100
 *         zero.
101
 *
102
 * U       (local input/output)          double *
103
 *         On entry, U points to an array of dimension (LDU,M).  Columns
104
 *         of W are copied within the array U at the positions specified
105
 *         in W0.
106
 *
107
 * LDU     (local input)                 const int
108
 *         On entry, LDU specifies the leading dimension of the array U.
109
 *         LDU must be at least MAX(1,N).
110
 *
111
 * W0      (local input)                 const double *
112
 *         On entry,  W0  is an array of size (M-1)*LDW+1, that contains
113
 *         the destination offset  in U where the columns of W should be
114
 *         copied.
115
 *
116
 * W       (local input)                 const double *
117
 *         On entry, W  is an array of size (LDW,M),  that contains data
118
 *         to be copied into U. For i in [0..M),  entries W(:,i)  should
119
 *         be copied into the row or column W0(i*LDW) of U.
120
 *
121
 * LDW     (local input)                 const int
122
 *         On entry, LDW specifies the leading dimension of the array W.
123
 *         LDW must be at least MAX(1,N+1).
124
 *
125
 * ---------------------------------------------------------------------
126
 */ 
127
/*
128
 * .. Local Variables ..
129
 */
130
   const double               * w = W, * w0; 
131
   double                     * u0;
132
   const int                  incU = ( 1 << HPL_LASWP03T_LOG2_DEPTH );
133
   int                        nr, nu;
134
   register int               i, j;
135
/* ..
136
 * .. Executable Statements ..
137
 */
138
   if( ( M <= 0 ) || ( N <= 0 ) ) return;
139

    
140
   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03T_LOG2_DEPTH ) <<
141
                          HPL_LASWP03T_LOG2_DEPTH ) );
142

    
143
   for( j = 0; j < nu;
144
        j += HPL_LASWP03T_DEPTH, U += incU, w += HPL_LASWP03T_DEPTH )
145
   {
146
      for( i = 0; i < M; i++ )
147
      {
148
         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU);
149
         w0 = w + (size_t)(i) * (size_t)(LDW);
150

    
151
         u0[ 0] = w0[ 0];
152
#if ( HPL_LASWP03T_DEPTH >  1 )
153
         u0[ 1] = w0[ 1];
154
#endif
155
#if ( HPL_LASWP03T_DEPTH >  2 )
156
         u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3];
157
#endif
158
#if ( HPL_LASWP03T_DEPTH >  4 )
159
         u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7];
160
#endif
161
#if ( HPL_LASWP03T_DEPTH >  8 )
162
         u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11];
163
         u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15];
164
#endif
165
#if ( HPL_LASWP03T_DEPTH > 16 )
166
         u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19];
167
         u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23];
168
         u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27];
169
         u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31];
170
#endif
171
      }
172
   }
173

    
174
   if( nr > 0 )
175
   {
176
      for( i = 0; i < M; i++ )
177
      {
178
         u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU);
179
         w0 = w + (size_t)(i) * (size_t)(LDW);
180
         for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; }
181
      }
182
   }
183
/*
184
 * End of HPL_dlaswp03T
185
 */
186
}