Statistiques
| Révision :

root / src / pauxil / HPL_infog2l.c @ 1

Historique | Voir | Annoter | Télécharger (14,02 ko)

1
/* 
2
 * -- High Performance Computing Linpack Benchmark (HPL)                
3
 *    HPL - 2.0 - September 10, 2008                          
4
 *    Antoine P. Petitet                                                
5
 *    University of Tennessee, Knoxville                                
6
 *    Innovative Computing Laboratory                                 
7
 *    (C) Copyright 2000-2008 All Rights Reserved                       
8
 *                                                                      
9
 * -- Copyright notice and Licensing terms:                             
10
 *                                                                      
11
 * Redistribution  and  use in  source and binary forms, with or without
12
 * modification, are  permitted provided  that the following  conditions
13
 * are met:                                                             
14
 *                                                                      
15
 * 1. Redistributions  of  source  code  must retain the above copyright
16
 * notice, this list of conditions and the following disclaimer.        
17
 *                                                                      
18
 * 2. Redistributions in binary form must reproduce  the above copyright
19
 * notice, this list of conditions,  and the following disclaimer in the
20
 * documentation and/or other materials provided with the distribution. 
21
 *                                                                      
22
 * 3. All  advertising  materials  mentioning  features  or  use of this
23
 * software must display the following acknowledgement:                 
24
 * This  product  includes  software  developed  at  the  University  of
25
 * Tennessee, Knoxville, Innovative Computing Laboratory.             
26
 *                                                                      
27
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28
 * names  of  its  contributors  may  not  be used to endorse or promote
29
 * products  derived   from   this  software  without  specific  written
30
 * permission.                                                          
31
 *                                                                      
32
 * -- Disclaimer:                                                       
33
 *                                                                      
34
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
45
 * ---------------------------------------------------------------------
46
 */ 
47
/*
48
 * Include files
49
 */
50
#include "hpl.h"
51

    
52
#ifdef STDC_HEADERS
53
void HPL_infog2l
54
(
55
   int                              I,
56
   int                              J,
57
   const int                        IMB,
58
   const int                        MB,
59
   const int                        INB,
60
   const int                        NB,
61
   const int                        RSRC,
62
   const int                        CSRC,
63
   const int                        MYROW,
64
   const int                        MYCOL,
65
   const int                        NPROW,
66
   const int                        NPCOL,
67
   int *                            II,
68
   int *                            JJ,
69
   int *                            PROW,
70
   int *                            PCOL
71
)
72
#else
73
void HPL_infog2l
74
( I, J, IMB, MB, INB, NB, RSRC, CSRC, MYROW, MYCOL, NPROW, NPCOL, II, JJ, PROW, PCOL )
75
   int                              I;
76
   int                              J;
77
   const int                        IMB;
78
   const int                        MB;
79
   const int                        INB;
80
   const int                        NB;
81
   const int                        RSRC;
82
   const int                        CSRC;
83
   const int                        MYROW;
84
   const int                        MYCOL;
85
   const int                        NPROW;
86
   const int                        NPCOL;
87
   int *                            II;
88
   int *                            JJ;
89
   int *                            PROW;
90
   int *                            PCOL;
91
#endif 
92
{
93
/* 
94
 * Purpose
95
 * =======
96
 *
97
 * HPL_infog2l computes the starting local index II, JJ corresponding to
98
 * the submatrix starting globally at the entry pointed by  I,  J.  This
99
 * routine returns the coordinates in the grid of the process owning the
100
 * matrix entry of global indexes I, J, namely PROW and PCOL.
101
 *
102
 * Arguments
103
 * =========
104
 *
105
 * I       (global input)                int
106
 *         On entry,  I  specifies  the  global  row index of the matrix
107
 *         entry. I must be at least zero.
108
 *
109
 * J       (global input)                int
110
 *         On entry,  J  specifies the global column index of the matrix
111
 *         entry. J must be at least zero.
112
 *
113
 * IMB     (global input)                const int
114
 *         On entry,  IMB  specifies  the size of the first row block of
115
 *         the global matrix. IMB must be at least one.
116
 *
117
 * MB      (global input)                const int
118
 *         On entry,  MB specifies the blocking factor used to partition
119
 *         and  distribute the rows of the matrix A.  MB  must be larger
120
 *         than one.
121
 *
122
 * INB     (global input)                const int
123
 *         On entry, INB specifies the size of the first column block of
124
 *         the global matrix. INB must be at least one.
125
 *
126
 * NB      (global input)                const int
127
 *         On entry,  NB specifies the blocking factor used to partition
128
 *         and distribute the columns of the matrix A. NB must be larger
129
 *         than one.
130
 *
131
 * RSRC    (global input)                const int
132
 *         On entry,  RSRC  specifies  the row coordinate of the process
133
 *         that possesses the row  I.  RSRC  must  be at least zero  and
134
 *         strictly less than NPROW.
135
 *
136
 * CSRC    (global input)                const int
137
 *         On entry, CSRC specifies the column coordinate of the process
138
 *         that possesses the column J. CSRC  must be at least zero  and
139
 *         strictly less than NPCOL.
140
 *
141
 * MYROW   (local input)                 const int
142
 *         On entry, MYROW  specifies my  row process  coordinate in the
143
 *         grid. MYROW is greater than or equal  to zero  and  less than
144
 *         NPROW.
145
 *
146
 * MYCOL   (local input)                 const int
147
 *         On entry, MYCOL specifies my column process coordinate in the
148
 *         grid. MYCOL is greater than or equal  to zero  and  less than
149
 *         NPCOL.
150
 *
151
 * NPROW   (global input)                const int
152
 *         On entry,  NPROW  specifies the number of process rows in the
153
 *         grid. NPROW is at least one.
154
 *
155
 * NPCOL   (global input)                const int
156
 *         On entry,  NPCOL  specifies  the number of process columns in
157
 *         the grid. NPCOL is at least one.
158
 *
159
 * II      (local output)                int *
160
 *         On exit, II  specifies the  local  starting  row index of the
161
 *         submatrix. On exit, II is at least 0.
162
 *
163
 * JJ      (local output)                int *
164
 *         On exit, JJ  specifies the local starting column index of the
165
 *         submatrix. On exit, JJ is at least 0.
166
 *
167
 * PROW    (global output)               int *
168
 *         On exit, PROW is the row coordinate of the process owning the
169
 *         entry specified by the global index I.  PROW is at least zero
170
 *         and less than NPROW.
171
 *
172
 * PCOL    (global output)               int *
173
 *         On exit, PCOL  is the column coordinate of the process owning
174
 *         the entry specified by the global index J.  PCOL  is at least
175
 *         zero and less than NPCOL.
176
 *
177
 * ---------------------------------------------------------------------
178
 */ 
179
/*
180
 * .. Local Variables ..
181
 */
182
   int            ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc;
183
/* ..
184
 * .. Executable Statements ..
185
 */
186
   imb   = IMB;
187
   *PROW = RSRC;
188

    
189
   if( ( *PROW == -1 ) || ( NPROW == 1 ) )
190
   {
191
/*
192
 * The data is not distributed,  or there is just one process row in the
193
 * grid.
194
 */
195
     *II = I;
196
   }
197
   else if( I < imb )
198
   {
199
/*
200
 * I refers to an entry in the first block of rows
201
 */
202
     *II = ( MYROW == *PROW ? I : 0 );
203
   }
204
   else
205
   {
206
      mb   = MB;
207
      rsrc = *PROW;
208
/*
209
 * The discussion goes as follows:  compute  my distance from the source
210
 * process so that  within  this process coordinate system,  the  source
211
 * process   is  the  process  such  that  mydist = 0,  or  equivalently
212
 * MYROW == rsrc.
213
 *
214
 * Find  out  the global coordinate of the block I belongs to (nblocks),
215
 * as well as the minimum local number of blocks that every process has.
216
 *
217
 * when mydist < nblocks-ilocblk*NPROCS,  I own ilocblk + 1 full blocks,
218
 * when mydist > nblocks-ilocblk*NPROCS,  I own ilocblk     full blocks,
219
 * when mydist = nblocks-ilocblk*NPROCS,  I own ilocblk     full blocks
220
 * but not I, or I own ilocblk + 1 blocks and the entry I refers to.
221
 */
222
      if( MYROW == rsrc )
223
      {
224
/*
225
 * I refers  to an entry  that is not in the first block, find out which
226
 * process has it.
227
 */
228
         nblocks = ( I - imb ) / mb + 1;
229
         *PROW  += nblocks;
230
         *PROW  -= ( *PROW / NPROW ) * NPROW;
231
/*
232
 * Since  mydist = 0  and nblocks - ilocblk * NPROW >= 0, there are only
233
 * three possible cases:
234
 *
235
 *   1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I do not own
236
 *      I, in which case II = IMB + ( ilocblk - 1 ) * MB. Note that this
237
 *      case  cannot  happen  when  ilocblk is zero, since nblocks is at
238
 *      least one.
239
 *
240
 *   2) When  0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in
241
 *      which  case  I  and  II  can  respectively  be  written as IMB + 
242
 *      (nblocks-1)*NB + IL  and  IMB + (ilocblk-1) * MB + IL.  That  is
243
 *      II = I + (ilocblk-nblocks)*MB. Note that this case cannot happen
244
 *      when ilocblk is zero, since nblocks is at least one.
245
 *
246
 *   3) mydist = 0 < nblocks - ilocblk * NPROW,  the source process owns
247
 *      ilocblk+1 full blocks,  and  therefore  II = IMB + ilocblk * MB.
248
 *      Note that when ilocblk is zero, II is just IMB.
249
 */
250
         if( nblocks < NPROW )
251
         {
252
            *II = imb;
253
         }
254
         else
255
         {
256
            ilocblk = nblocks / NPROW;
257
            if( ilocblk * NPROW >= nblocks )
258
            {
259
               *II = ( ( MYROW == *PROW ) ?
260
                       I   + ( ilocblk - nblocks ) * mb :
261
                       imb + ( ilocblk - 1       ) * mb );
262
            }
263
            else
264
            {
265
               *II =  imb + ilocblk * mb;
266
            }
267
         }
268
      }
269
      else
270
      {
271
/*
272
 * I refers  to  an entry that is not in the first block, find out which
273
 * process has it.
274
 */
275
         nblocks = ( I -= imb ) / mb + 1;
276
         *PROW  += nblocks;
277
         *PROW  -= ( *PROW / NPROW ) * NPROW;
278
/*
279
 * Compute  my distance from the source process so that within this pro-
280
 * cess coordinate system,  the  source process is the process such that
281
 * mydist=0.
282
 */
283
         if( ( mydist  = MYROW - rsrc ) < 0 ) mydist += NPROW;
284
/*
285
 * When mydist <  nblocks - ilocblk * NPROW, I own ilocblk+1 full blocks
286
 * of size MB since I am not the source process, i.e. II=(ilocblk+1)*MB.
287
 * When mydist>=nblocks-ilocblk*NPROW and I do not own I,  I own ilocblk
288
 * full blocks of size MB, i.e. II = ilocblk*MB, otherwise I own ilocblk
289
 * blocks and I,  in which case I can be written as IMB + (nblocks-1)*MB
290
 * + IL and II = ilocblk*MB + IL = I - IMB + (ilocblk - nblocks + 1)*MB.
291
 */
292
         if( nblocks < NPROW )
293
         {
294
            mydist -= nblocks;
295
            *II     = ( ( mydist < 0 ) ? mb :
296
                        ( ( MYROW == *PROW ) ?
297
                          I + ( 1 - nblocks ) * mb : 0 ) );
298
         }
299
         else
300
         {
301
            ilocblk = nblocks / NPROW;
302
            mydist -= nblocks - ilocblk * NPROW;
303
            *II     = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb :
304
                        ( ( MYROW == *PROW ) ?
305
                          ( ilocblk - nblocks + 1 ) * mb + I :
306
                          ilocblk * mb ) );
307
         }
308
      }
309
   }
310
/*
311
 * Idem for the columns
312
 */
313
   inb   = INB;
314
   *PCOL = CSRC;
315

    
316
   if( ( *PCOL == -1 ) || ( NPCOL == 1 ) )
317
   {
318
      *JJ = J;
319
   }
320
   else if( J < inb )
321
   {
322
      *JJ = ( MYCOL == *PCOL ? J : 0 );
323
   }
324
   else
325
   {
326
      nb   = NB;
327
      csrc = *PCOL;
328

    
329
      if( MYCOL == csrc )
330
      {
331
         nblocks = ( J - inb ) / nb + 1;
332
         *PCOL  += nblocks;
333
         *PCOL  -= ( *PCOL / NPCOL ) * NPCOL;
334

    
335
         if( nblocks < NPCOL )
336
         {
337
            *JJ = inb;
338
         }
339
         else
340
         {
341
            ilocblk = nblocks / NPCOL;
342
            if( ilocblk * NPCOL >= nblocks )
343
            {
344
               *JJ = ( ( MYCOL == *PCOL ) ?
345
                       J   + ( ilocblk - nblocks ) * nb :
346
                       inb + ( ilocblk - 1       ) * nb );
347
            }
348
            else
349
            {
350
               *JJ = inb + ilocblk * nb;
351
            }
352
         }
353
      }
354
      else
355
      {
356
         nblocks = ( J -= inb ) / nb + 1;
357
         *PCOL  += nblocks;
358
         *PCOL  -= ( *PCOL / NPCOL ) * NPCOL;
359

    
360
         if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL;
361

    
362
         if( nblocks < NPCOL )
363
         {
364
            mydist -= nblocks;
365
            *JJ     = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ?
366
                        J + ( 1 - nblocks )*nb : 0 ) );
367
         }
368
         else
369
         {
370
            ilocblk = nblocks / NPCOL;
371
            mydist -= nblocks - ilocblk * NPCOL;
372
            *JJ     = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb :
373
                        ( ( MYCOL == *PCOL ) ?
374
                          ( ilocblk - nblocks + 1 ) * nb + J :
375
                          ilocblk * nb ) );
376
         }
377
      }
378
   }
379
/*
380
 * End of HPL_infog2l
381
 */
382
}