root / src / pauxil / HPL_infog2l.c @ 1
Historique | Voir | Annoter | Télécharger (14,02 ko)
1 |
/*
|
---|---|
2 |
* -- High Performance Computing Linpack Benchmark (HPL)
|
3 |
* HPL - 2.0 - September 10, 2008
|
4 |
* Antoine P. Petitet
|
5 |
* University of Tennessee, Knoxville
|
6 |
* Innovative Computing Laboratory
|
7 |
* (C) Copyright 2000-2008 All Rights Reserved
|
8 |
*
|
9 |
* -- Copyright notice and Licensing terms:
|
10 |
*
|
11 |
* Redistribution and use in source and binary forms, with or without
|
12 |
* modification, are permitted provided that the following conditions
|
13 |
* are met:
|
14 |
*
|
15 |
* 1. Redistributions of source code must retain the above copyright
|
16 |
* notice, this list of conditions and the following disclaimer.
|
17 |
*
|
18 |
* 2. Redistributions in binary form must reproduce the above copyright
|
19 |
* notice, this list of conditions, and the following disclaimer in the
|
20 |
* documentation and/or other materials provided with the distribution.
|
21 |
*
|
22 |
* 3. All advertising materials mentioning features or use of this
|
23 |
* software must display the following acknowledgement:
|
24 |
* This product includes software developed at the University of
|
25 |
* Tennessee, Knoxville, Innovative Computing Laboratory.
|
26 |
*
|
27 |
* 4. The name of the University, the name of the Laboratory, or the
|
28 |
* names of its contributors may not be used to endorse or promote
|
29 |
* products derived from this software without specific written
|
30 |
* permission.
|
31 |
*
|
32 |
* -- Disclaimer:
|
33 |
*
|
34 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
35 |
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
36 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
37 |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
|
38 |
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
39 |
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
40 |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
41 |
* DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
42 |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
43 |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
44 |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
45 |
* ---------------------------------------------------------------------
|
46 |
*/
|
47 |
/*
|
48 |
* Include files
|
49 |
*/
|
50 |
#include "hpl.h" |
51 |
|
52 |
#ifdef STDC_HEADERS
|
53 |
void HPL_infog2l
|
54 |
( |
55 |
int I,
|
56 |
int J,
|
57 |
const int IMB, |
58 |
const int MB, |
59 |
const int INB, |
60 |
const int NB, |
61 |
const int RSRC, |
62 |
const int CSRC, |
63 |
const int MYROW, |
64 |
const int MYCOL, |
65 |
const int NPROW, |
66 |
const int NPCOL, |
67 |
int * II,
|
68 |
int * JJ,
|
69 |
int * PROW,
|
70 |
int * PCOL
|
71 |
) |
72 |
#else
|
73 |
void HPL_infog2l
|
74 |
( I, J, IMB, MB, INB, NB, RSRC, CSRC, MYROW, MYCOL, NPROW, NPCOL, II, JJ, PROW, PCOL ) |
75 |
int I;
|
76 |
int J;
|
77 |
const int IMB; |
78 |
const int MB; |
79 |
const int INB; |
80 |
const int NB; |
81 |
const int RSRC; |
82 |
const int CSRC; |
83 |
const int MYROW; |
84 |
const int MYCOL; |
85 |
const int NPROW; |
86 |
const int NPCOL; |
87 |
int * II;
|
88 |
int * JJ;
|
89 |
int * PROW;
|
90 |
int * PCOL;
|
91 |
#endif
|
92 |
{ |
93 |
/*
|
94 |
* Purpose
|
95 |
* =======
|
96 |
*
|
97 |
* HPL_infog2l computes the starting local index II, JJ corresponding to
|
98 |
* the submatrix starting globally at the entry pointed by I, J. This
|
99 |
* routine returns the coordinates in the grid of the process owning the
|
100 |
* matrix entry of global indexes I, J, namely PROW and PCOL.
|
101 |
*
|
102 |
* Arguments
|
103 |
* =========
|
104 |
*
|
105 |
* I (global input) int
|
106 |
* On entry, I specifies the global row index of the matrix
|
107 |
* entry. I must be at least zero.
|
108 |
*
|
109 |
* J (global input) int
|
110 |
* On entry, J specifies the global column index of the matrix
|
111 |
* entry. J must be at least zero.
|
112 |
*
|
113 |
* IMB (global input) const int
|
114 |
* On entry, IMB specifies the size of the first row block of
|
115 |
* the global matrix. IMB must be at least one.
|
116 |
*
|
117 |
* MB (global input) const int
|
118 |
* On entry, MB specifies the blocking factor used to partition
|
119 |
* and distribute the rows of the matrix A. MB must be larger
|
120 |
* than one.
|
121 |
*
|
122 |
* INB (global input) const int
|
123 |
* On entry, INB specifies the size of the first column block of
|
124 |
* the global matrix. INB must be at least one.
|
125 |
*
|
126 |
* NB (global input) const int
|
127 |
* On entry, NB specifies the blocking factor used to partition
|
128 |
* and distribute the columns of the matrix A. NB must be larger
|
129 |
* than one.
|
130 |
*
|
131 |
* RSRC (global input) const int
|
132 |
* On entry, RSRC specifies the row coordinate of the process
|
133 |
* that possesses the row I. RSRC must be at least zero and
|
134 |
* strictly less than NPROW.
|
135 |
*
|
136 |
* CSRC (global input) const int
|
137 |
* On entry, CSRC specifies the column coordinate of the process
|
138 |
* that possesses the column J. CSRC must be at least zero and
|
139 |
* strictly less than NPCOL.
|
140 |
*
|
141 |
* MYROW (local input) const int
|
142 |
* On entry, MYROW specifies my row process coordinate in the
|
143 |
* grid. MYROW is greater than or equal to zero and less than
|
144 |
* NPROW.
|
145 |
*
|
146 |
* MYCOL (local input) const int
|
147 |
* On entry, MYCOL specifies my column process coordinate in the
|
148 |
* grid. MYCOL is greater than or equal to zero and less than
|
149 |
* NPCOL.
|
150 |
*
|
151 |
* NPROW (global input) const int
|
152 |
* On entry, NPROW specifies the number of process rows in the
|
153 |
* grid. NPROW is at least one.
|
154 |
*
|
155 |
* NPCOL (global input) const int
|
156 |
* On entry, NPCOL specifies the number of process columns in
|
157 |
* the grid. NPCOL is at least one.
|
158 |
*
|
159 |
* II (local output) int *
|
160 |
* On exit, II specifies the local starting row index of the
|
161 |
* submatrix. On exit, II is at least 0.
|
162 |
*
|
163 |
* JJ (local output) int *
|
164 |
* On exit, JJ specifies the local starting column index of the
|
165 |
* submatrix. On exit, JJ is at least 0.
|
166 |
*
|
167 |
* PROW (global output) int *
|
168 |
* On exit, PROW is the row coordinate of the process owning the
|
169 |
* entry specified by the global index I. PROW is at least zero
|
170 |
* and less than NPROW.
|
171 |
*
|
172 |
* PCOL (global output) int *
|
173 |
* On exit, PCOL is the column coordinate of the process owning
|
174 |
* the entry specified by the global index J. PCOL is at least
|
175 |
* zero and less than NPCOL.
|
176 |
*
|
177 |
* ---------------------------------------------------------------------
|
178 |
*/
|
179 |
/*
|
180 |
* .. Local Variables ..
|
181 |
*/
|
182 |
int ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc;
|
183 |
/* ..
|
184 |
* .. Executable Statements ..
|
185 |
*/
|
186 |
imb = IMB; |
187 |
*PROW = RSRC; |
188 |
|
189 |
if( ( *PROW == -1 ) || ( NPROW == 1 ) ) |
190 |
{ |
191 |
/*
|
192 |
* The data is not distributed, or there is just one process row in the
|
193 |
* grid.
|
194 |
*/
|
195 |
*II = I; |
196 |
} |
197 |
else if( I < imb ) |
198 |
{ |
199 |
/*
|
200 |
* I refers to an entry in the first block of rows
|
201 |
*/
|
202 |
*II = ( MYROW == *PROW ? I : 0 );
|
203 |
} |
204 |
else
|
205 |
{ |
206 |
mb = MB; |
207 |
rsrc = *PROW; |
208 |
/*
|
209 |
* The discussion goes as follows: compute my distance from the source
|
210 |
* process so that within this process coordinate system, the source
|
211 |
* process is the process such that mydist = 0, or equivalently
|
212 |
* MYROW == rsrc.
|
213 |
*
|
214 |
* Find out the global coordinate of the block I belongs to (nblocks),
|
215 |
* as well as the minimum local number of blocks that every process has.
|
216 |
*
|
217 |
* when mydist < nblocks-ilocblk*NPROCS, I own ilocblk + 1 full blocks,
|
218 |
* when mydist > nblocks-ilocblk*NPROCS, I own ilocblk full blocks,
|
219 |
* when mydist = nblocks-ilocblk*NPROCS, I own ilocblk full blocks
|
220 |
* but not I, or I own ilocblk + 1 blocks and the entry I refers to.
|
221 |
*/
|
222 |
if( MYROW == rsrc )
|
223 |
{ |
224 |
/*
|
225 |
* I refers to an entry that is not in the first block, find out which
|
226 |
* process has it.
|
227 |
*/
|
228 |
nblocks = ( I - imb ) / mb + 1;
|
229 |
*PROW += nblocks; |
230 |
*PROW -= ( *PROW / NPROW ) * NPROW; |
231 |
/*
|
232 |
* Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only
|
233 |
* three possible cases:
|
234 |
*
|
235 |
* 1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I do not own
|
236 |
* I, in which case II = IMB + ( ilocblk - 1 ) * MB. Note that this
|
237 |
* case cannot happen when ilocblk is zero, since nblocks is at
|
238 |
* least one.
|
239 |
*
|
240 |
* 2) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in
|
241 |
* which case I and II can respectively be written as IMB +
|
242 |
* (nblocks-1)*NB + IL and IMB + (ilocblk-1) * MB + IL. That is
|
243 |
* II = I + (ilocblk-nblocks)*MB. Note that this case cannot happen
|
244 |
* when ilocblk is zero, since nblocks is at least one.
|
245 |
*
|
246 |
* 3) mydist = 0 < nblocks - ilocblk * NPROW, the source process owns
|
247 |
* ilocblk+1 full blocks, and therefore II = IMB + ilocblk * MB.
|
248 |
* Note that when ilocblk is zero, II is just IMB.
|
249 |
*/
|
250 |
if( nblocks < NPROW )
|
251 |
{ |
252 |
*II = imb; |
253 |
} |
254 |
else
|
255 |
{ |
256 |
ilocblk = nblocks / NPROW; |
257 |
if( ilocblk * NPROW >= nblocks )
|
258 |
{ |
259 |
*II = ( ( MYROW == *PROW ) ? |
260 |
I + ( ilocblk - nblocks ) * mb : |
261 |
imb + ( ilocblk - 1 ) * mb );
|
262 |
} |
263 |
else
|
264 |
{ |
265 |
*II = imb + ilocblk * mb; |
266 |
} |
267 |
} |
268 |
} |
269 |
else
|
270 |
{ |
271 |
/*
|
272 |
* I refers to an entry that is not in the first block, find out which
|
273 |
* process has it.
|
274 |
*/
|
275 |
nblocks = ( I -= imb ) / mb + 1;
|
276 |
*PROW += nblocks; |
277 |
*PROW -= ( *PROW / NPROW ) * NPROW; |
278 |
/*
|
279 |
* Compute my distance from the source process so that within this pro-
|
280 |
* cess coordinate system, the source process is the process such that
|
281 |
* mydist=0.
|
282 |
*/
|
283 |
if( ( mydist = MYROW - rsrc ) < 0 ) mydist += NPROW; |
284 |
/*
|
285 |
* When mydist < nblocks - ilocblk * NPROW, I own ilocblk+1 full blocks
|
286 |
* of size MB since I am not the source process, i.e. II=(ilocblk+1)*MB.
|
287 |
* When mydist>=nblocks-ilocblk*NPROW and I do not own I, I own ilocblk
|
288 |
* full blocks of size MB, i.e. II = ilocblk*MB, otherwise I own ilocblk
|
289 |
* blocks and I, in which case I can be written as IMB + (nblocks-1)*MB
|
290 |
* + IL and II = ilocblk*MB + IL = I - IMB + (ilocblk - nblocks + 1)*MB.
|
291 |
*/
|
292 |
if( nblocks < NPROW )
|
293 |
{ |
294 |
mydist -= nblocks; |
295 |
*II = ( ( mydist < 0 ) ? mb :
|
296 |
( ( MYROW == *PROW ) ? |
297 |
I + ( 1 - nblocks ) * mb : 0 ) ); |
298 |
} |
299 |
else
|
300 |
{ |
301 |
ilocblk = nblocks / NPROW; |
302 |
mydist -= nblocks - ilocblk * NPROW; |
303 |
*II = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb : |
304 |
( ( MYROW == *PROW ) ? |
305 |
( ilocblk - nblocks + 1 ) * mb + I :
|
306 |
ilocblk * mb ) ); |
307 |
} |
308 |
} |
309 |
} |
310 |
/*
|
311 |
* Idem for the columns
|
312 |
*/
|
313 |
inb = INB; |
314 |
*PCOL = CSRC; |
315 |
|
316 |
if( ( *PCOL == -1 ) || ( NPCOL == 1 ) ) |
317 |
{ |
318 |
*JJ = J; |
319 |
} |
320 |
else if( J < inb ) |
321 |
{ |
322 |
*JJ = ( MYCOL == *PCOL ? J : 0 );
|
323 |
} |
324 |
else
|
325 |
{ |
326 |
nb = NB; |
327 |
csrc = *PCOL; |
328 |
|
329 |
if( MYCOL == csrc )
|
330 |
{ |
331 |
nblocks = ( J - inb ) / nb + 1;
|
332 |
*PCOL += nblocks; |
333 |
*PCOL -= ( *PCOL / NPCOL ) * NPCOL; |
334 |
|
335 |
if( nblocks < NPCOL )
|
336 |
{ |
337 |
*JJ = inb; |
338 |
} |
339 |
else
|
340 |
{ |
341 |
ilocblk = nblocks / NPCOL; |
342 |
if( ilocblk * NPCOL >= nblocks )
|
343 |
{ |
344 |
*JJ = ( ( MYCOL == *PCOL ) ? |
345 |
J + ( ilocblk - nblocks ) * nb : |
346 |
inb + ( ilocblk - 1 ) * nb );
|
347 |
} |
348 |
else
|
349 |
{ |
350 |
*JJ = inb + ilocblk * nb; |
351 |
} |
352 |
} |
353 |
} |
354 |
else
|
355 |
{ |
356 |
nblocks = ( J -= inb ) / nb + 1;
|
357 |
*PCOL += nblocks; |
358 |
*PCOL -= ( *PCOL / NPCOL ) * NPCOL; |
359 |
|
360 |
if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL; |
361 |
|
362 |
if( nblocks < NPCOL )
|
363 |
{ |
364 |
mydist -= nblocks; |
365 |
*JJ = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ?
|
366 |
J + ( 1 - nblocks )*nb : 0 ) ); |
367 |
} |
368 |
else
|
369 |
{ |
370 |
ilocblk = nblocks / NPCOL; |
371 |
mydist -= nblocks - ilocblk * NPCOL; |
372 |
*JJ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb : |
373 |
( ( MYCOL == *PCOL ) ? |
374 |
( ilocblk - nblocks + 1 ) * nb + J :
|
375 |
ilocblk * nb ) ); |
376 |
} |
377 |
} |
378 |
} |
379 |
/*
|
380 |
* End of HPL_infog2l
|
381 |
*/
|
382 |
} |