Statistiques
| Révision :

root / src / pauxil / HPL_dlaswp00N.c @ 1

Historique | Voir | Annoter | Télécharger (8,26 ko)

1
/* 
2
 * -- High Performance Computing Linpack Benchmark (HPL)                
3
 *    HPL - 2.0 - September 10, 2008                          
4
 *    Antoine P. Petitet                                                
5
 *    University of Tennessee, Knoxville                                
6
 *    Innovative Computing Laboratory                                 
7
 *    (C) Copyright 2000-2008 All Rights Reserved                       
8
 *                                                                      
9
 * -- Copyright notice and Licensing terms:                             
10
 *                                                                      
11
 * Redistribution  and  use in  source and binary forms, with or without
12
 * modification, are  permitted provided  that the following  conditions
13
 * are met:                                                             
14
 *                                                                      
15
 * 1. Redistributions  of  source  code  must retain the above copyright
16
 * notice, this list of conditions and the following disclaimer.        
17
 *                                                                      
18
 * 2. Redistributions in binary form must reproduce  the above copyright
19
 * notice, this list of conditions,  and the following disclaimer in the
20
 * documentation and/or other materials provided with the distribution. 
21
 *                                                                      
22
 * 3. All  advertising  materials  mentioning  features  or  use of this
23
 * software must display the following acknowledgement:                 
24
 * This  product  includes  software  developed  at  the  University  of
25
 * Tennessee, Knoxville, Innovative Computing Laboratory.             
26
 *                                                                      
27
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28
 * names  of  its  contributors  may  not  be used to endorse or promote
29
 * products  derived   from   this  software  without  specific  written
30
 * permission.                                                          
31
 *                                                                      
32
 * -- Disclaimer:                                                       
33
 *                                                                      
34
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
45
 * ---------------------------------------------------------------------
46
 */ 
47
/*
48
 * Include files
49
 */
50
#include "hpl.h"
51
/*
52
 * Define default value for unrolling factor
53
 */
54
#ifndef HPL_LASWP00N_DEPTH
55
#define    HPL_LASWP00N_DEPTH       32
56
#define    HPL_LASWP00N_LOG2_DEPTH   5
57
#endif
58

    
59
#ifdef STDC_HEADERS
60
void HPL_dlaswp00N
61
(
62
   const int                        M,
63
   const int                        N,
64
   double *                         A,
65
   const int                        LDA,
66
   const int *                      IPIV
67
)
68
#else
69
void HPL_dlaswp00N
70
( M, N, A, LDA, IPIV )
71
   const int                        M;
72
   const int                        N;
73
   double *                         A;
74
   const int                        LDA;
75
   const int *                      IPIV;
76
#endif
77
{
78
/* 
79
 * Purpose
80
 * =======
81
 *
82
 * HPL_dlaswp00N performs a series of local row interchanges on a matrix
83
 * A. One row interchange is initiated for rows 0 through M-1 of A.
84
 *
85
 * Arguments
86
 * =========
87
 *
88
 * M       (local input)                 const int
89
 *         On entry, M specifies the number of rows of the array A to be
90
 *         interchanged. M must be at least zero.
91
 *
92
 * N       (local input)                 const int
93
 *         On entry, N  specifies  the number of columns of the array A.
94
 *         N must be at least zero.
95
 *
96
 * A       (local input/output)          double *
97
 *         On entry, A  points to an array of dimension (LDA,N) to which
98
 *         the row interchanges will be  applied.  On exit, the permuted
99
 *         matrix.
100
 *
101
 * LDA     (local input)                 const int
102
 *         On entry, LDA specifies the leading dimension of the array A.
103
 *         LDA must be at least MAX(1,M).
104
 *
105
 * IPIV    (local input)                 const int *
106
 *         On entry,  IPIV  is  an  array of size  M  that  contains the
107
 *         pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
108
 *         implies that local rows k and l are to be interchanged.
109
 *
110
 * ---------------------------------------------------------------------
111
 */ 
112
/*
113
 * .. Local Variables ..
114
 */
115
   register double            r;
116
   double                     * a0, * a1;
117
   const int                  incA = (int)( (unsigned int)(LDA) <<
118
                                            HPL_LASWP00N_LOG2_DEPTH );
119
   int                        ip, nr, nu;
120
   register int               i, j;
121
/* ..
122
 * .. Executable Statements ..
123
 */
124
   if( ( M <= 0 ) || ( N <= 0 ) ) return;
125

    
126
   nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP00N_LOG2_DEPTH )
127
                          << HPL_LASWP00N_LOG2_DEPTH ) );
128

    
129
   for( j = 0; j < nu; j += HPL_LASWP00N_DEPTH, A += incA )
130
   {
131
      for( i = 0; i < M; i++ )
132
      {
133
         if( i != ( ip = IPIV[i] ) )
134
         {
135
            a0 = A + i; a1 = A + ip;
136

    
137
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
138
#if ( HPL_LASWP00N_DEPTH >  1 )
139
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
140
#endif
141
#if ( HPL_LASWP00N_DEPTH >  2 )
142
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
143
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
144
#endif
145
#if ( HPL_LASWP00N_DEPTH >  4 )
146
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
147
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
148
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
149
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
150
#endif
151
#if ( HPL_LASWP00N_DEPTH >  8 )
152
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
153
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
154
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
155
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
156
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
157
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
158
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
159
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
160
#endif
161
#if ( HPL_LASWP00N_DEPTH > 16 )
162
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
163
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
164
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
165
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
166
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
167
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
168
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
169
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
170
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
171
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
172
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
173
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
174
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
175
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
176
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
177
            r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA;
178
#endif
179
         }
180
      }
181
   }
182

    
183
   if( nr > 0 )
184
   {
185
      for( i = 0; i < M; i++ )
186
      {
187
         if( i != ( ip = IPIV[i] ) )
188
         {
189
            a0 = A + i; a1 = A + ip;
190
            for( j = 0; j < nr; j++, a0 += LDA, a1 += LDA )
191
            { r = *a0; *a0 = *a1; *a1 = r; }
192
         }
193
      }
194
   }
195
/*
196
 * End of HPL_dlaswp00N
197
 */
198
}