Statistiques
| Révision :

root / src / pgesv / HPL_plindx1.c

Historique | Voir | Annoter | Télécharger (11,39 ko)

1
/* 
2
 * -- High Performance Computing Linpack Benchmark (HPL)                
3
 *    HPL - 2.0 - September 10, 2008                          
4
 *    Antoine P. Petitet                                                
5
 *    University of Tennessee, Knoxville                                
6
 *    Innovative Computing Laboratory                                 
7
 *    (C) Copyright 2000-2008 All Rights Reserved                       
8
 *                                                                      
9
 * -- Copyright notice and Licensing terms:                             
10
 *                                                                      
11
 * Redistribution  and  use in  source and binary forms, with or without
12
 * modification, are  permitted provided  that the following  conditions
13
 * are met:                                                             
14
 *                                                                      
15
 * 1. Redistributions  of  source  code  must retain the above copyright
16
 * notice, this list of conditions and the following disclaimer.        
17
 *                                                                      
18
 * 2. Redistributions in binary form must reproduce  the above copyright
19
 * notice, this list of conditions,  and the following disclaimer in the
20
 * documentation and/or other materials provided with the distribution. 
21
 *                                                                      
22
 * 3. All  advertising  materials  mentioning  features  or  use of this
23
 * software must display the following acknowledgement:                 
24
 * This  product  includes  software  developed  at  the  University  of
25
 * Tennessee, Knoxville, Innovative Computing Laboratory.             
26
 *                                                                      
27
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28
 * names  of  its  contributors  may  not  be used to endorse or promote
29
 * products  derived   from   this  software  without  specific  written
30
 * permission.                                                          
31
 *                                                                      
32
 * -- Disclaimer:                                                       
33
 *                                                                      
34
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
45
 * ---------------------------------------------------------------------
46
 */ 
47
/*
48
 * Include files
49
 */
50
#include "hpl.h"
51

    
52
#ifdef STDC_HEADERS
53
void HPL_plindx1
54
(
55
   HPL_T_panel *                    PANEL,
56
   const int                        K,
57
   const int *                      IPID,
58
   int *                            IPA,
59
   int *                            LINDXA,
60
   int *                            LINDXAU,
61
   int *                            IPLEN,
62
   int *                            IPMAP,
63
   int *                            IPMAPM1,
64
   int *                            PERMU,
65
   int *                            IWORK
66
)
67
#else
68
void HPL_plindx1
69
( PANEL, K, IPID, IPA, LINDXA, LINDXAU, IPLEN, IPMAP, IPMAPM1, PERMU, IWORK )
70
   HPL_T_panel *                    PANEL;
71
   const int                        K;
72
   const int *                      IPID;
73
   int *                            IPA;
74
   int *                            LINDXA;
75
   int *                            LINDXAU;
76
   int *                            IPLEN;
77
   int *                            IPMAP;
78
   int *                            IPMAPM1;
79
   int *                            PERMU;
80
   int *                            IWORK;
81
#endif
82
{
83
/* 
84
 * Purpose
85
 * =======
86
 *
87
 * HPL_plindx1 computes two local arrays  LINDXA and  LINDXAU  containing
88
 * the  local  source and final destination position  resulting from the
89
 * application of row interchanges.  In addition, this function computes
90
 * three arrays IPLEN, IPMAP and IPMAPM1  that contain  the  logarithmic
91
 * mapping information for the spreading phase.
92
 *
93
 * Arguments
94
 * =========
95
 *
96
 * PANEL   (local input/output)          HPL_T_panel *
97
 *         On entry,  PANEL  points to the data structure containing the
98
 *         panel information.
99
 *
100
 * K       (global input)                const int
101
 *         On entry, K specifies the number of entries in IPID.  K is at
102
 *         least 2*N, and at most 4*N.
103
 *
104
 * IPID    (global input)                const int *
105
 *         On entry,  IPID  is an array of length K. The first K entries
106
 *         of that array contain the src and final destination resulting
107
 *         from the application of the interchanges.
108
 *
109
 * IPA     (global output)               int *
110
 *         On exit,  IPA  specifies  the number of rows that the current
111
 *         process row has that either belong to U  or should be swapped
112
 *         with remote rows of A.
113
 *
114
 * LINDXA  (global output)               int *
115
 *         On entry, LINDXA  is an array of dimension 2*N. On exit, this
116
 *         array contains the local indexes of the rows of A I have that
117
 *         should be copied into U.
118
 *
119
 * LINDXAU (global output)               int *
120
 *         On exit, LINDXAU  is an array of dimension 2*N. On exit, this
121
 *         array contains  the local destination  information encoded as
122
 *         follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
123
 *         copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
124
 *         of A should be locally copied into A(-LINDXAU(k),:).
125
 *
126
 * IPLEN   (global output)               int *
127
 *         On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
128
 *         this array is such that  IPLEN[i]  is the number of rows of A
129
 *         in  the  processes  before  process  IPMAP[i]  after the sort
130
 *         with the convention that IPLEN[nprow]  is the total number of
131
 *         rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
132
 *         local number of rows of A that should be moved to the process
133
 *         IPMAP[i]. IPLEN is such that the number of rows of the source
134
 *         process  row can be computed as  IPLEN[1] - IPLEN[0], and the
135
 *         remaining  entries  of  this  array  are  sorted  so that the
136
 *         quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
137
 *
138
 * IPMAP   (global output)               int *
139
 *         On entry, IPMAP is an array of dimension NPROW. On exit, this
140
 *         array contains  the logarithmic mapping of the processes.  In
141
 *         other words, IPMAP[myrow] is the corresponding sorted process
142
 *         coordinate.
143
 *
144
 * IPMAPM1 (global output)               int *
145
 *         On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
146
 *         this  array  contains  the inverse of the logarithmic mapping
147
 *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
148
 *         [0.. NPROCS)
149
 *
150
 * PERMU   (global output)               int *
151
 *         On entry,  PERMU  is an array of dimension JB. On exit, PERMU
152
 *         contains  a sequence of permutations,  that should be applied
153
 *         in increasing order to permute in place the row panel U.
154
 *
155
 * IWORK   (workspace)                   int *
156
 *         On entry, IWORK is a workarray of dimension 2*JB.
157
 *
158
 * ---------------------------------------------------------------------
159
 */ 
160
/*
161
 * .. Local Variables ..
162
 */
163
   int                        * iwork;
164
   int                        dst, dstrow, fndd, i, ia, icurrow, il,
165
                              ip, ipU, iroff, j, jb, myrow, nb, nprow,
166
                              src, srcrow;
167
/* ..
168
 * .. Executable Statements ..
169
 */
170
/*
171
 * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1
172
 */
173
   HPL_plindx10( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 );
174
/*
175
 * Compute the local arrays  LINDXA  and  LINDXAU  containing  the local
176
 * source and final destination position resulting from  the application
177
 * of N interchanges. Compute LINDXA and LINDXAU in icurrow,  and LINDXA
178
 * elsewhere and PERMU in every process.
179
 */
180
   myrow = PANEL->grid->myrow; nprow   = PANEL->grid->nprow;
181
   jb    = PANEL->jb;          nb      = PANEL->nb;     ia = PANEL->ia;
182
   iroff = PANEL->ii;          icurrow = PANEL->prow;
183

    
184
   iwork = IWORK + jb;
185
 
186
   if( myrow == icurrow )
187
   {
188
      for( i = 0, ip = 0, ipU = 0; i < K; i += 2 )
189
      {
190
         src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
191
 
192
         if( srcrow == icurrow )
193
         {
194
            dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
195
 
196
            Mindxg2l( il, src, nb, nb, myrow, 0, nprow );
197
            LINDXA[ip] = il - iroff;
198
 
199
            if( ( dstrow == icurrow ) && ( dst - ia < jb ) )
200
            {
201
               PERMU[ipU] = dst - ia;  il = IPMAPM1[dstrow];
202
               j          = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j;
203
               IPLEN[il]++; ipU++;
204
            }
205
            else if( dstrow != icurrow )
206
            {
207
               j = 0;
208
               do { fndd = ( dst == IPID[j] ); j+=2; }
209
               while( !fndd && ( j < K ) );
210
 
211
               PERMU[ipU] = IPID[j-1]-ia; il = IPMAPM1[dstrow];
212
               j          = IPLEN[il];    iwork[ipU] = LINDXAU[ip] = j;
213
               IPLEN[il]++; ipU++;
214
            }
215
            else if( ( dstrow == icurrow ) && ( dst - ia >= jb ) )
216
            {
217
               Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
218
               LINDXAU[ip] = iroff - il;
219
            }
220
            ip++;
221
         }
222
      }
223
      *IPA = ip;
224
   }
225
   else
226
   {
227
      for( i = 0, ip = 0, ipU = 0; i < K; i += 2 )
228
      {
229
         src = IPID[i  ]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
230
         dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
231
/*
232
 * LINDXA[i] is the local index of the row of A that belongs into U
233
 */
234
         if( myrow == dstrow )
235
         {
236
            Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
237
            LINDXA[ip] = il - iroff; ip++;
238
         }
239
/*
240
 * iwork[i] is the local (current) position  index in U
241
 * PERMU[i] is the local (final) destination index in U
242
 */
243
         if( srcrow == icurrow )
244
         {
245
            if( ( dstrow == icurrow ) && ( dst - ia < jb ) )
246
            {
247
               PERMU[ipU] = dst - ia;  il = IPMAPM1[dstrow];
248
               iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++;
249
            }
250
            else if( dstrow != icurrow )
251
            {
252
               j = 0;
253
               do { fndd = ( dst == IPID[j] ); j+=2; }
254
               while( !fndd && ( j < K ) );
255
               PERMU[ipU] = IPID[j-1] - ia; il = IPMAPM1[dstrow];
256
               iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++;
257
            }
258
         }
259
      }
260
      *IPA = 0;
261
   }
262
/*
263
 * Simplify iwork and PERMU, return in PERMU the sequence of permutation
264
 * that need to be apply to U after it has been broadcast.
265
 */
266
   HPL_perm( jb, iwork, PERMU, IWORK );
267
/*
268
 * Reset IPLEN to its correct value
269
 */
270
   for( i = nprow; i > 0; i-- ) IPLEN[i] = IPLEN[i-1];
271
   IPLEN[0] = 0; 
272
/*
273
 * End of HPL_plindx1
274
 */
275
}