root / src / pgesv / HPL_plindx0.c @ 9
Historique | Voir | Annoter | Télécharger (12,3 ko)
1 |
/*
|
---|---|
2 |
* -- High Performance Computing Linpack Benchmark (HPL)
|
3 |
* HPL - 2.0 - September 10, 2008
|
4 |
* Antoine P. Petitet
|
5 |
* University of Tennessee, Knoxville
|
6 |
* Innovative Computing Laboratory
|
7 |
* (C) Copyright 2000-2008 All Rights Reserved
|
8 |
*
|
9 |
* -- Copyright notice and Licensing terms:
|
10 |
*
|
11 |
* Redistribution and use in source and binary forms, with or without
|
12 |
* modification, are permitted provided that the following conditions
|
13 |
* are met:
|
14 |
*
|
15 |
* 1. Redistributions of source code must retain the above copyright
|
16 |
* notice, this list of conditions and the following disclaimer.
|
17 |
*
|
18 |
* 2. Redistributions in binary form must reproduce the above copyright
|
19 |
* notice, this list of conditions, and the following disclaimer in the
|
20 |
* documentation and/or other materials provided with the distribution.
|
21 |
*
|
22 |
* 3. All advertising materials mentioning features or use of this
|
23 |
* software must display the following acknowledgement:
|
24 |
* This product includes software developed at the University of
|
25 |
* Tennessee, Knoxville, Innovative Computing Laboratory.
|
26 |
*
|
27 |
* 4. The name of the University, the name of the Laboratory, or the
|
28 |
* names of its contributors may not be used to endorse or promote
|
29 |
* products derived from this software without specific written
|
30 |
* permission.
|
31 |
*
|
32 |
* -- Disclaimer:
|
33 |
*
|
34 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
35 |
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
36 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
37 |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
|
38 |
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
39 |
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
40 |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
41 |
* DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
42 |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
43 |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
44 |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
45 |
* ---------------------------------------------------------------------
|
46 |
*/
|
47 |
/*
|
48 |
* Include files
|
49 |
*/
|
50 |
#include "hpl.h" |
51 |
|
52 |
#ifdef STDC_HEADERS
|
53 |
void HPL_plindx0
|
54 |
( |
55 |
HPL_T_panel * PANEL, |
56 |
const int K, |
57 |
int * IPID,
|
58 |
int * LINDXA,
|
59 |
int * LINDXAU,
|
60 |
int * LLEN
|
61 |
) |
62 |
#else
|
63 |
void HPL_plindx0
|
64 |
( PANEL, K, IPID, LINDXA, LINDXAU, LLEN ) |
65 |
HPL_T_panel * PANEL; |
66 |
const int K; |
67 |
int * IPID;
|
68 |
int * LINDXA;
|
69 |
int * LINDXAU;
|
70 |
int * LLEN;
|
71 |
#endif
|
72 |
{ |
73 |
/*
|
74 |
* Purpose
|
75 |
* =======
|
76 |
*
|
77 |
* HPL_plindx0 computes two local arrays LINDXA and LINDXAU containing
|
78 |
* the local source and final destination position resulting from the
|
79 |
* application of row interchanges.
|
80 |
*
|
81 |
* On entry, the array IPID of length K is such that the row of global
|
82 |
* index IPID(i) should be mapped onto row of global index IPID(i+1).
|
83 |
* Let IA be the global index of the first row to be swapped. For k in
|
84 |
* [0..K/2), the row of global index IPID(2*k) should be mapped onto the
|
85 |
* row of global index IPID(2*k+1). The question then, is to determine
|
86 |
* which rows should ultimately be part of U.
|
87 |
*
|
88 |
* First, some rows of the process ICURROW may be swapped locally. One
|
89 |
* of this row belongs to U, the other one belongs to my local piece of
|
90 |
* A. The other rows of the current block are swapped with remote rows
|
91 |
* and are thus not part of U. These rows however should be sent along,
|
92 |
* and grabbed by the other processes as we progress in the exchange
|
93 |
* phase.
|
94 |
*
|
95 |
* So, assume that I am ICURROW and consider a row of index IPID(2*i)
|
96 |
* that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less
|
97 |
* than N, this row is locally swapped and should be copied into U at
|
98 |
* the position IPID(2*i+1) - IA. No row will be exchanged for this one.
|
99 |
* If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be
|
100 |
* locally copied into my local piece of A at the position corresponding
|
101 |
* to the row of global index IPID(2*i+1).
|
102 |
*
|
103 |
* If the process ICURROW does not own IPID(2*i+1), then row IPID(2*i)
|
104 |
* is to be swapped away and strictly speaking does not belong to U, but
|
105 |
* to A remotely. Since this process will however send this array U,
|
106 |
* this row is copied into U, exactly where the row IPID(2*i+1) should
|
107 |
* go. For this, we search IPID for k1, such that IPID(2*k1) is equal to
|
108 |
* IPID(2*i+1); and row IPID(2*i) is to be copied in U at the position
|
109 |
* IPID(2*k1+1)-IA.
|
110 |
*
|
111 |
* It is thus important to put the rows that go into U, i.e., such that
|
112 |
* IPID(2*i+1) - IA is less than N at the begining of the array IPID. By
|
113 |
* doing so, U is formed, and the local copy is performed in just one
|
114 |
* sweep.
|
115 |
*
|
116 |
* Two lists LINDXA and LINDXAU are built. LINDXA contains the local
|
117 |
* index of the rows I have that should be copied. LINDXAU contains the
|
118 |
* local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A
|
119 |
* is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k)
|
120 |
* of A should be locally copied into A(-LINDXAU(k),:). In the process
|
121 |
* ICURROW, the initial packing algorithm proceeds as follows.
|
122 |
*
|
123 |
* for all entries in IPID,
|
124 |
* if IPID(2*i) is in ICURROW,
|
125 |
* if IPID(2*i+1) is in ICURROW,
|
126 |
* if( IPID(2*i+1) - IA < N )
|
127 |
* save corresponding local position
|
128 |
* of this row (LINDXA);
|
129 |
* save local position (LINDXAU) in U
|
130 |
* where this row goes;
|
131 |
* [copy row IPID(2*i) in U at position
|
132 |
* IPID(2*i+1)-IA; ];
|
133 |
* else
|
134 |
* save corresponding local position of
|
135 |
* this row (LINDXA);
|
136 |
* save local position (-LINDXAU) in A
|
137 |
* where this row goes;
|
138 |
* [copy row IPID(2*i) in my piece of A
|
139 |
* at IPID(2*i+1);]
|
140 |
* end if
|
141 |
* else
|
142 |
* find k1 such that IPID(2*k1) = IPID(2*i+1);
|
143 |
* copy row IPID(2*i) in U at position
|
144 |
* IPID(2*k1+1)-IA;
|
145 |
* save corresponding local position of this
|
146 |
* row (LINDXA);
|
147 |
* save local position (LINDXAU) in U where
|
148 |
* this row goes;
|
149 |
* end if
|
150 |
* end if
|
151 |
* end for
|
152 |
*
|
153 |
* Second, if I am not the current row process ICURROW, all source rows
|
154 |
* in IPID that I own are part of U. Indeed, they are swapped with one
|
155 |
* row of the current block of rows, and the main factorization
|
156 |
* algorithm proceeds one row after each other. The processes different
|
157 |
* from ICURROW, should exchange and accumulate those rows until they
|
158 |
* receive some data previously owned by the process ICURROW.
|
159 |
*
|
160 |
* In processes different from ICURROW, the initial packing algorithm
|
161 |
* proceeds as follows. Consider a row of global index IPID(2*i) that I
|
162 |
* own. When I will be receiving data previously owned by ICURROW, i.e.,
|
163 |
* U, row IPID(2*i) should replace the row in U at pos. IPID(2*i+1)-IA,
|
164 |
* and this particular row of U should be first copied into my piece of
|
165 |
* A, at A(il,:), where il is the local row index corresponding to
|
166 |
* IPID(2*i). Now,initially, this row will be packed into workspace, say
|
167 |
* as the kth row of that work array. The following algorithm sets
|
168 |
* LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row
|
169 |
* should be copied. LINDXA(k) stores the local index in A where this
|
170 |
* row of U should be copied, i.e il.
|
171 |
*
|
172 |
* for all entries in IPID,
|
173 |
* if IPID(2*i) is not in ICURROW,
|
174 |
* copy row IPID(2*i) in work array;
|
175 |
* save corresponding local position
|
176 |
* of this row (LINDXA);
|
177 |
* save position (LINDXAU) in U where
|
178 |
* this row should be copied;
|
179 |
* end if
|
180 |
* end for
|
181 |
*
|
182 |
* Since we are at it, we also globally figure out how many rows every
|
183 |
* process has. That is necessary, because it would rather be cumbersome
|
184 |
* to figure it on the fly during the bi-directional exchange phase.
|
185 |
* This information is kept in the array LLEN of size NPROW. Also note
|
186 |
* that the arrays LINDXA and LINDXAU are of max length equal to 2*N.
|
187 |
*
|
188 |
* Arguments
|
189 |
* =========
|
190 |
*
|
191 |
* PANEL (local input/output) HPL_T_panel *
|
192 |
* On entry, PANEL points to the data structure containing the
|
193 |
* panel information.
|
194 |
*
|
195 |
* K (global input) const int
|
196 |
* On entry, K specifies the number of entries in IPID. K is at
|
197 |
* least 2*N, and at most 4*N.
|
198 |
*
|
199 |
* IPID (global input) int *
|
200 |
* On entry, IPID is an array of length K. The first K entries
|
201 |
* of that array contain the src and final destination resulting
|
202 |
* from the application of the interchanges.
|
203 |
*
|
204 |
* LINDXA (local output) int *
|
205 |
* On entry, LINDXA is an array of dimension 2*N. On exit, this
|
206 |
* array contains the local indexes of the rows of A I have that
|
207 |
* should be copied into U.
|
208 |
*
|
209 |
* LINDXAU (local output) int *
|
210 |
* On exit, LINDXAU is an array of dimension 2*N. On exit, this
|
211 |
* array contains the local destination information encoded as
|
212 |
* follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be
|
213 |
* copied in U at position LINDXAU(k). Otherwise, row LINDXA(k)
|
214 |
* of A should be locally copied into A(-LINDXAU(k),:).
|
215 |
*
|
216 |
* LLEN (global output) int *
|
217 |
* On entry, LLEN is an array of length NPROW. On exit, it
|
218 |
* contains how many rows every process has.
|
219 |
*
|
220 |
* ---------------------------------------------------------------------
|
221 |
*/
|
222 |
/*
|
223 |
* .. Local Variables ..
|
224 |
*/
|
225 |
int dst, dstrow, fndd, i, ia, icurrow, il,
|
226 |
ip=0, iroff, j, jb, myrow, nb, nprow,
|
227 |
src, srcrow; |
228 |
/* ..
|
229 |
* .. Executable Statements ..
|
230 |
*/
|
231 |
/*
|
232 |
* Compute the local arrays LINDXA and LINDXAU containing the local
|
233 |
* source and final destination position resulting from the application
|
234 |
* of N interchanges.
|
235 |
*/
|
236 |
myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; |
237 |
icurrow = PANEL->prow; jb = PANEL->jb; |
238 |
nb = PANEL->nb; ia = PANEL->ia; |
239 |
iroff = PANEL->ii; |
240 |
|
241 |
for( i = 0; i < nprow; i++ ) LLEN[i] = 0; |
242 |
|
243 |
for( i = 0; i < K; i += 2 ) |
244 |
{ |
245 |
src = IPID[i]; |
246 |
Mindxg2p( src, nb, nb, srcrow, 0, nprow ); LLEN[ srcrow ]++;
|
247 |
|
248 |
if( myrow == srcrow )
|
249 |
{ |
250 |
Mindxg2l( il, src, nb, nb, myrow, 0, nprow );
|
251 |
LINDXA[ip] = il - iroff; dst = IPID[i+1];
|
252 |
|
253 |
if( myrow == icurrow )
|
254 |
{ |
255 |
Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
|
256 |
if( dstrow == icurrow )
|
257 |
{ |
258 |
if( dst - ia < jb ) { LINDXAU[ip] = dst - ia; }
|
259 |
else
|
260 |
{ |
261 |
Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
|
262 |
LINDXAU[ip] = iroff - il; |
263 |
} |
264 |
} |
265 |
else
|
266 |
{ |
267 |
j = 0;
|
268 |
do { fndd = ( dst == IPID[j] ); j+=2; } |
269 |
while( !fndd && ( j < K ) );
|
270 |
LINDXAU[ip] = IPID[j-1] - ia;
|
271 |
} |
272 |
} |
273 |
else { LINDXAU[ip] = dst - ia; }
|
274 |
|
275 |
ip++; |
276 |
} |
277 |
} |
278 |
/*
|
279 |
* End of HPL_plindx0
|
280 |
*/
|
281 |
} |