Statistiques
| Révision :

root / src / pgesv / HPL_pdlaswp00T.c @ 9

Historique | Voir | Annoter | Télécharger (16,8 ko)

1 1 equemene
/*
2 1 equemene
 * -- High Performance Computing Linpack Benchmark (HPL)
3 1 equemene
 *    HPL - 2.0 - September 10, 2008
4 1 equemene
 *    Antoine P. Petitet
5 1 equemene
 *    University of Tennessee, Knoxville
6 1 equemene
 *    Innovative Computing Laboratory
7 1 equemene
 *    (C) Copyright 2000-2008 All Rights Reserved
8 1 equemene
 *
9 1 equemene
 * -- Copyright notice and Licensing terms:
10 1 equemene
 *
11 1 equemene
 * Redistribution  and  use in  source and binary forms, with or without
12 1 equemene
 * modification, are  permitted provided  that the following  conditions
13 1 equemene
 * are met:
14 1 equemene
 *
15 1 equemene
 * 1. Redistributions  of  source  code  must retain the above copyright
16 1 equemene
 * notice, this list of conditions and the following disclaimer.
17 1 equemene
 *
18 1 equemene
 * 2. Redistributions in binary form must reproduce  the above copyright
19 1 equemene
 * notice, this list of conditions,  and the following disclaimer in the
20 1 equemene
 * documentation and/or other materials provided with the distribution.
21 1 equemene
 *
22 1 equemene
 * 3. All  advertising  materials  mentioning  features  or  use of this
23 1 equemene
 * software must display the following acknowledgement:
24 1 equemene
 * This  product  includes  software  developed  at  the  University  of
25 1 equemene
 * Tennessee, Knoxville, Innovative Computing Laboratory.
26 1 equemene
 *
27 1 equemene
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28 1 equemene
 * names  of  its  contributors  may  not  be used to endorse or promote
29 1 equemene
 * products  derived   from   this  software  without  specific  written
30 1 equemene
 * permission.
31 1 equemene
 *
32 1 equemene
 * -- Disclaimer:
33 1 equemene
 *
34 1 equemene
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 1 equemene
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36 1 equemene
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 1 equemene
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38 1 equemene
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39 1 equemene
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40 1 equemene
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 1 equemene
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42 1 equemene
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43 1 equemene
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 1 equemene
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 1 equemene
 * ---------------------------------------------------------------------
46 1 equemene
 */
47 1 equemene
/*
48 1 equemene
 * Include files
49 1 equemene
 */
50 1 equemene
#include "hpl.h"
51 1 equemene
52 1 equemene
#ifdef STDC_HEADERS
53 1 equemene
void HPL_pdlaswp00T
54 1 equemene
(
55 1 equemene
   HPL_T_panel *                    PBCST,
56 1 equemene
   int *                            IFLAG,
57 1 equemene
   HPL_T_panel *                    PANEL,
58 1 equemene
   const int                        NN
59 1 equemene
)
60 1 equemene
#else
61 1 equemene
void HPL_pdlaswp00T
62 1 equemene
( PBCST, IFLAG, PANEL, NN )
63 1 equemene
   HPL_T_panel *                    PBCST;
64 1 equemene
   int *                            IFLAG;
65 1 equemene
   HPL_T_panel *                    PANEL;
66 1 equemene
   const int                        NN;
67 1 equemene
#endif
68 1 equemene
{
69 1 equemene
/*
70 1 equemene
 * Purpose
71 1 equemene
 * =======
72 1 equemene
 *
73 1 equemene
 * HPL_pdlaswp00T applies the  NB  row interchanges to  NN columns of the
74 1 equemene
 * trailing submatrix and broadcast a column panel.
75 1 equemene
 *
76 1 equemene
 * Bi-directional  exchange  is used to perform the  swap :: broadcast of
77 1 equemene
 * the row  panel U at once, resulting in a lower number of messages than
78 1 equemene
 * usual as well as a lower communication volume. With P process rows and
79 1 equemene
 * assuming  bi-directional links,  the running time of this function can
80 1 equemene
 * be approximated by:
81 1 equemene
 *
82 1 equemene
 *    log_2(P) * (lat + NB*LocQ(N) / bdwth)
83 1 equemene
 *
84 1 equemene
 * where  NB  is the number of rows of the row panel U,  N is the global
85 1 equemene
 * number of columns being updated,  lat and bdwth  are the latency  and
86 1 equemene
 * bandwidth  of  the  network  for  double  precision real words.  Mono
87 1 equemene
 * directional links will double this communication cost.
88 1 equemene
 *
89 1 equemene
 * Arguments
90 1 equemene
 * =========
91 1 equemene
 *
92 1 equemene
 * PBCST   (local input/output)          HPL_T_panel *
93 1 equemene
 *         On entry,  PBCST  points to the data structure containing the
94 1 equemene
 *         panel (to be broadcast) information.
95 1 equemene
 *
96 1 equemene
 * IFLAG   (local input/output)          int *
97 1 equemene
 *         On entry, IFLAG  indicates  whether or not  the broadcast has
98 1 equemene
 *         already been completed.  If not,  probing will occur, and the
99 1 equemene
 *         outcome will be contained in IFLAG on exit.
100 1 equemene
 *
101 1 equemene
 * PANEL   (local input/output)          HPL_T_panel *
102 1 equemene
 *         On entry,  PANEL  points to the data structure containing the
103 1 equemene
 *         panel (to be broadcast and swapped) information.
104 1 equemene
 *
105 1 equemene
 * NN      (local input)                 const int
106 1 equemene
 *         On entry, NN specifies  the  local  number  of columns of the
107 1 equemene
 *         trailing  submatrix  to  be swapped and broadcast starting at
108 1 equemene
 *         the current position. NN must be at least zero.
109 1 equemene
 *
110 1 equemene
 * ---------------------------------------------------------------------
111 1 equemene
 */
112 1 equemene
/*
113 1 equemene
 * .. Local Variables ..
114 1 equemene
 */
115 1 equemene
   MPI_Comm                  comm;
116 1 equemene
   HPL_T_grid                * grid;
117 1 equemene
   double                    * A, * U, * W;
118 1 equemene
   void                       * vptr = NULL;
119 1 equemene
   int                       * ipID, * lindxA, * lindxAU, * llen,
120 1 equemene
                             * llen_sv;
121 1 equemene
   unsigned int              ip2, ip2_=1, ipdist, ipow=1, mask=1,
122 1 equemene
                             mydist, mydis_;
123 1 equemene
   int                       Cmsgid=MSGID_BEGIN_PFACT, Np2, align,
124 1 equemene
                             hdim, i, icurrow, *iflag, ipA, ipW, *ipl,
125 1 equemene
                             iprow, jb, k, lda, ldW, myrow, n, nprow,
126 1 equemene
                             partner, root, size_, usize;
127 1 equemene
#define LDU                  n
128 1 equemene
/* ..
129 1 equemene
 * .. Executable Statements ..
130 1 equemene
 */
131 1 equemene
   n = Mmin( NN, PANEL->n ); jb = PANEL->jb;
132 1 equemene
/*
133 1 equemene
 * Quick return if there is nothing to do
134 1 equemene
 */
135 1 equemene
   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
136 1 equemene
137 1 equemene
#ifdef HPL_DETAILED_TIMING
138 1 equemene
   HPL_ptimer( HPL_TIMING_LASWP );
139 1 equemene
#endif
140 1 equemene
/*
141 1 equemene
 * Retrieve parameters from the PANEL data structure
142 1 equemene
 */
143 1 equemene
   grid  = PANEL->grid;    nprow   = grid->nprow; myrow = grid->myrow;
144 1 equemene
   comm  = grid->col_comm; ip2     = (unsigned int)grid->row_ip2;
145 1 equemene
   hdim  = grid->row_hdim; align   = PANEL->algo->align;
146 1 equemene
   A     = PANEL->A;       U       = PANEL->U;    iflag = PANEL->IWORK;
147 1 equemene
   lda   = PANEL->lda;     icurrow = PANEL->prow; usize = jb * n;
148 1 equemene
   ldW   = n + 1;
149 1 equemene
/*
150 1 equemene
 * Allocate space for temporary W (ldW * jb)
151 1 equemene
 */
152 1 equemene
   vptr = (void*)malloc( ( (size_t)(align) +
153 1 equemene
                           ((size_t)(jb) * (size_t)(ldW))) *
154 1 equemene
                           sizeof(double) );
155 1 equemene
   if( vptr == NULL )
156 1 equemene
   { HPL_pabort( __LINE__, "HPL_pdlaswp00T", "Memory allocation failed" ); }
157 1 equemene
158 1 equemene
   W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) );
159 1 equemene
/*
160 1 equemene
 * Construct ipID and its local counter parts lindxA, lindxAU -  llen is
161 1 equemene
 * the number of rows/columns that I have in workspace and that I should
162 1 equemene
 * send.  Compute  lindx_, ipA, llen if it has not already been done for
163 1 equemene
 * this panel;
164 1 equemene
 */
165 1 equemene
   k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1;
166 1 equemene
   lindxA  = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k;
167 1 equemene
   llen    = lindxAU + k; llen_sv = llen + nprow;
168 1 equemene
169 1 equemene
   if( *iflag == -1 )    /* no index arrays have been computed so far */
170 1 equemene
   {
171 1 equemene
      HPL_pipid(   PANEL,  ipl, ipID );
172 1 equemene
      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
173 1 equemene
      *iflag = 0;
174 1 equemene
   }
175 1 equemene
   else if( *iflag == 1 ) /* HPL_pdlaswp01T called before: reuse ipID */
176 1 equemene
   {
177 1 equemene
      HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv );
178 1 equemene
      *iflag = 0;
179 1 equemene
   }
180 1 equemene
/*
181 1 equemene
 * Copy the llen_sv into llen - Reset ipA to its correct value
182 1 equemene
 */
183 1 equemene
   ipA = llen_sv[myrow];
184 1 equemene
   for( i = 0; i < nprow; i++ ) { llen[i]  = llen_sv[i]; }
185 1 equemene
/*
186 1 equemene
 * For i in [0..2*jb),  lindxA[i] is the offset in A of a row that ulti-
187 1 equemene
 * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ).  In icurrow,
188 1 equemene
 * we directly pack into U, otherwise we pack into workspace. The  first
189 1 equemene
 * entry of each column packed in workspace is in fact the row or column
190 1 equemene
 * offset in U where it should go to.
191 1 equemene
 */
192 1 equemene
   if( myrow == icurrow )
193 1 equemene
   {
194 1 equemene
      HPL_dlaswp01T( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
195 1 equemene
   }
196 1 equemene
   else
197 1 equemene
   {
198 1 equemene
      HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU );
199 1 equemene
   }
200 1 equemene
/*
201 1 equemene
 * Probe for column panel - forward it when available
202 1 equemene
 */
203 1 equemene
   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
204 1 equemene
/*
205 1 equemene
 * Algorithm for bi-directional data exchange:
206 1 equemene
 *
207 1 equemene
 * As long as I have not talked to a process that  already  had the data
208 1 equemene
 * from icurrow,  I will be sending the workspace,  otherwise  I will be
209 1 equemene
 * sending U. Note that the columns in workspace contain the local index
210 1 equemene
 * in U they should go to.
211 1 equemene
 *
212 1 equemene
 * If I am receiving from a process that  has the data from  icurrow,  I
213 1 equemene
 * will be receiving in  U, copy the data of  U  that stays into  A, and
214 1 equemene
 * then the columns I have in workspace into U; otherwise  I will be re-
215 1 equemene
 * ceiving in the remaining workspace.  If I am one  of  those processes
216 1 equemene
 * that already has the data from icurrow, I will be immediately copying
217 1 equemene
 * the data I have in my workspace into U.
218 1 equemene
 *
219 1 equemene
 * When I receive U, some of U should be copied in my piece of A  before
220 1 equemene
 * I can copy the rows I have in my workspace into  U.  This information
221 1 equemene
 * is kept in the lists  lindx_:  the row lindxAU[i] should be copied in
222 1 equemene
 * the row  lindxA[i] of my piece of  A, just as in the reversed initial
223 1 equemene
 * packing operation. Those rows are thus the first ones in the work ar-
224 1 equemene
 * ray.  After  this  operation  has  been  performed,  I will not  need
225 1 equemene
 * those lindx arrays,  and  I  will  always be sending a buffer of size
226 1 equemene
 * jb x n, or n x jb, that is, U.
227 1 equemene
 *
228 1 equemene
 * At  every  step  of  the algorithm, it is necesary to update the list
229 1 equemene
 * llen,  so that I can figure out how large the next messages I will be
230 1 equemene
 * sending/receiving are.  It is  obvious when I am sending U. It is not
231 1 equemene
 * otherwise.
232 1 equemene
 *
233 1 equemene
 * We  choose  icurrow  to be the source of the bi-directional exchange.
234 1 equemene
 * This allows the processes in the non-power 2 part to receive U at the
235 1 equemene
 * first exchange,  and  then  broadcast internally this U so that those
236 1 equemene
 * processes can grab their piece of A.
237 1 equemene
 */
238 1 equemene
   if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; }
239 1 equemene
   ipW    = ipA;
240 1 equemene
   Np2    = ( ( size_ = nprow - ip2 ) != 0 );
241 1 equemene
   mydist = (unsigned int)MModSub( myrow, icurrow, nprow );
242 1 equemene
/*
243 1 equemene
 * bi-directional exchange:   If nprow is not a power of 2,  proc[i-ip2]
244 1 equemene
 * receives local data from proc[i] for all i in  [ip2..nprow);  icurrow
245 1 equemene
 * is the source, these last process indexes are relative to icurrow.
246 1 equemene
 */
247 1 equemene
   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
248 1 equemene
   {
249 1 equemene
      partner = MModAdd( icurrow, partner, nprow );
250 1 equemene
251 1 equemene
      if( mydist == 0 )  /* I am the current row: I send U and recv W */
252 1 equemene
      {
253 1 equemene
         (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW,
254 1 equemene
                          Cmsgid, partner, comm );
255 1 equemene
         if( llen[partner] > 0 )
256 1 equemene
            HPL_dlaswp03T( llen[partner], n, U, LDU, W, W+1, ldW );
257 1 equemene
      }
258 1 equemene
      else if( mydist == ip2 )
259 1 equemene
      {                      /* I recv U for later Bcast, I send my W */
260 1 equemene
         (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
261 1 equemene
                          Cmsgid, partner, comm );
262 1 equemene
      }
263 1 equemene
      else               /* None of us is icurrow, we exchange our Ws */
264 1 equemene
      {
265 1 equemene
         if( ( mydist & ip2 ) != 0 )
266 1 equemene
         {
267 1 equemene
            (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm );
268 1 equemene
         }
269 1 equemene
         else
270 1 equemene
         {
271 1 equemene
            (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW,
272 1 equemene
                             partner, Cmsgid, comm );
273 1 equemene
            if( llen[partner] > 0 ) ipW += llen[partner];
274 1 equemene
         }
275 1 equemene
      }
276 1 equemene
   }
277 1 equemene
/*
278 1 equemene
 * Update llen
279 1 equemene
 */
280 1 equemene
   for( i = 1; i < size_; i++ )
281 1 equemene
   {
282 1 equemene
      iprow   = MModAdd( icurrow, i,          nprow );
283 1 equemene
      partner = MModAdd( iprow,   (int)(ip2), nprow );
284 1 equemene
      llen[ iprow ] += llen[ partner ];
285 1 equemene
   }
286 1 equemene
/*
287 1 equemene
 * Probe for column panel - forward it when available
288 1 equemene
 */
289 1 equemene
   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
290 1 equemene
/*
291 1 equemene
 * power of 2 part of the processes collection:  only processes [0..ip2)
292 1 equemene
 * are working;  some of them  (mydist >> (k+1) == 0) either send or re-
293 1 equemene
 * ceive U.  At every step k, k is in [0 .. hdim),  of the algorithm,  a
294 1 equemene
 * process pair that exchanges  U  is such that  (mydist >> (k+1) == 0).
295 1 equemene
 * Among  those  processes,  the  ones  that are sending U are such that
296 1 equemene
 * mydist >> k == 0.
297 1 equemene
 */
298 1 equemene
   if( mydist < ip2 )
299 1 equemene
   {
300 1 equemene
      k = 0;
301 1 equemene
302 1 equemene
      while( k < hdim )
303 1 equemene
      {
304 1 equemene
         partner = (int)(mydist ^ ipow);
305 1 equemene
         partner = MModAdd( icurrow, partner, nprow );
306 1 equemene
/*
307 1 equemene
 * Exchange and combine the local results - If I receive U,  then I must
308 1 equemene
 * copy from U the rows that belong to my piece of A, and then update  U
309 1 equemene
 * by  copying in it the rows I have accumulated in W.  Otherwise, I re-
310 1 equemene
 * ceive W.  In this later case, and I have U, I shall update my copy of
311 1 equemene
 * U by copying in it the rows I have accumulated in  W.  If  I  did not
312 1 equemene
 * have U before, I simply need to update my pointer in W for later use.
313 1 equemene
 */
314 1 equemene
         if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 )
315 1 equemene
         {
316 1 equemene
            if( ( mydist >> (unsigned int)(k) ) == 0 )
317 1 equemene
            {
318 1 equemene
               (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW,
319 1 equemene
                                ldW ), llen[partner]*ldW, Cmsgid,
320 1 equemene
                                partner, comm );
321 1 equemene
               HPL_dlaswp03T( llen[partner], n, U, LDU, Mptr( W, 0, ipW,
322 1 equemene
                              ldW ), Mptr( W, 1, ipW, ldW ), ldW );
323 1 equemene
               ipW += llen[partner];
324 1 equemene
            }
325 1 equemene
            else
326 1 equemene
            {
327 1 equemene
               (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize,
328 1 equemene
                                Cmsgid, partner, comm );
329 1 equemene
               HPL_dlaswp04T( ipA, llen[myrow], n, U, LDU, A, lda, W,
330 1 equemene
                              W+1, ldW, lindxA, lindxAU );
331 1 equemene
            }
332 1 equemene
         }
333 1 equemene
         else
334 1 equemene
         {
335 1 equemene
            (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0,
336 1 equemene
                             ipW, ldW ), llen[partner]*ldW, Cmsgid,
337 1 equemene
                             partner, comm );
338 1 equemene
            ipW += llen[partner];
339 1 equemene
         }
340 1 equemene
/*
341 1 equemene
 * Update llen - Go to next process pairs
342 1 equemene
 */
343 1 equemene
         iprow = icurrow; ipdist = 0;
344 1 equemene
         do
345 1 equemene
         {
346 1 equemene
            if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist )
347 1 equemene
            {
348 1 equemene
               partner = MModAdd( icurrow, partner, nprow );
349 1 equemene
               llen[iprow]  += llen[partner];
350 1 equemene
               llen[partner] = llen[iprow];
351 1 equemene
            }
352 1 equemene
            iprow = MModAdd( iprow, 1, nprow ); ipdist++;
353 1 equemene
354 1 equemene
         } while( ipdist < ip2 );
355 1 equemene
356 1 equemene
         ipow <<= 1; k++;
357 1 equemene
/*
358 1 equemene
 * Probe for column panel - forward it when available
359 1 equemene
 */
360 1 equemene
         if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
361 1 equemene
      }
362 1 equemene
   }
363 1 equemene
   else
364 1 equemene
   {
365 1 equemene
/*
366 1 equemene
 * non power of 2 part of the process collection:  proc[ip2] broadcast U
367 1 equemene
 * to procs[ip2..nprow) (relatively to icurrow).
368 1 equemene
 */
369 1 equemene
      if( size_ > 1 )
370 1 equemene
      {
371 1 equemene
         k = size_ - 1;
372 1 equemene
         while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; }
373 1 equemene
         root   = MModAdd( icurrow, (int)(ip2), nprow );
374 1 equemene
         mydis_ = (unsigned int)MModSub( myrow,  root, nprow );
375 1 equemene
376 1 equemene
         do
377 1 equemene
         {
378 1 equemene
            mask ^= ip2_;
379 1 equemene
            if( ( mydis_ & mask ) == 0 )
380 1 equemene
            {
381 1 equemene
               partner = (int)(mydis_ ^ ip2_);
382 1 equemene
               if( ( mydis_ & ip2_ ) != 0 )
383 1 equemene
               {
384 1 equemene
                  (void) HPL_recv( U, usize, MModAdd( root, partner,
385 1 equemene
                                   nprow ), Cmsgid, comm );
386 1 equemene
387 1 equemene
               }
388 1 equemene
               else if( partner < size_ )
389 1 equemene
               {
390 1 equemene
                  (void) HPL_send( U, usize, MModAdd( root, partner,
391 1 equemene
                                   nprow ), Cmsgid, comm );
392 1 equemene
               }
393 1 equemene
            }
394 1 equemene
            ip2_ >>= 1;
395 1 equemene
/*
396 1 equemene
 * Probe for column panel - forward it when available
397 1 equemene
 */
398 1 equemene
            if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
399 1 equemene
400 1 equemene
         } while( ip2_ > 0 );
401 1 equemene
      }
402 1 equemene
/*
403 1 equemene
 * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece
404 1 equemene
 * of A.
405 1 equemene
 */
406 1 equemene
      HPL_dlaswp05T( ipA, n, A, lda, U, LDU, lindxA, lindxAU );
407 1 equemene
   }
408 1 equemene
/*
409 1 equemene
 * If  nprow  is not a power of 2,  proc[i-ip2]  sends  global result to
410 1 equemene
 * proc[i] for all i in [ip2..nprow);
411 1 equemene
 */
412 1 equemene
   if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) )
413 1 equemene
   {
414 1 equemene
      partner = MModAdd( icurrow, partner, nprow );
415 1 equemene
      if( ( mydist & ip2 ) != 0 )
416 1 equemene
      { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); }
417 1 equemene
      else
418 1 equemene
      { (void) HPL_send( U, usize, partner, Cmsgid, comm ); }
419 1 equemene
   }
420 1 equemene
421 1 equemene
   if( vptr ) free( vptr );
422 1 equemene
/*
423 1 equemene
 * Probe for column panel - forward it when available
424 1 equemene
 */
425 1 equemene
   if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
426 1 equemene
427 1 equemene
#ifdef HPL_DETAILED_TIMING
428 1 equemene
   HPL_ptimer( HPL_TIMING_LASWP );
429 1 equemene
#endif
430 1 equemene
/*
431 1 equemene
 * End of HPL_pdlaswp00T
432 1 equemene
 */
433 1 equemene
}