Statistiques
| Révision :

root / src / pfact / HPL_dlocswpN.c @ 9

Historique | Voir | Annoter | Télécharger (17,35 ko)

1 1 equemene
/*
2 1 equemene
 * -- High Performance Computing Linpack Benchmark (HPL)
3 1 equemene
 *    HPL - 2.0 - September 10, 2008
4 1 equemene
 *    Antoine P. Petitet
5 1 equemene
 *    University of Tennessee, Knoxville
6 1 equemene
 *    Innovative Computing Laboratory
7 1 equemene
 *    (C) Copyright 2000-2008 All Rights Reserved
8 1 equemene
 *
9 1 equemene
 * -- Copyright notice and Licensing terms:
10 1 equemene
 *
11 1 equemene
 * Redistribution  and  use in  source and binary forms, with or without
12 1 equemene
 * modification, are  permitted provided  that the following  conditions
13 1 equemene
 * are met:
14 1 equemene
 *
15 1 equemene
 * 1. Redistributions  of  source  code  must retain the above copyright
16 1 equemene
 * notice, this list of conditions and the following disclaimer.
17 1 equemene
 *
18 1 equemene
 * 2. Redistributions in binary form must reproduce  the above copyright
19 1 equemene
 * notice, this list of conditions,  and the following disclaimer in the
20 1 equemene
 * documentation and/or other materials provided with the distribution.
21 1 equemene
 *
22 1 equemene
 * 3. All  advertising  materials  mentioning  features  or  use of this
23 1 equemene
 * software must display the following acknowledgement:
24 1 equemene
 * This  product  includes  software  developed  at  the  University  of
25 1 equemene
 * Tennessee, Knoxville, Innovative Computing Laboratory.
26 1 equemene
 *
27 1 equemene
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28 1 equemene
 * names  of  its  contributors  may  not  be used to endorse or promote
29 1 equemene
 * products  derived   from   this  software  without  specific  written
30 1 equemene
 * permission.
31 1 equemene
 *
32 1 equemene
 * -- Disclaimer:
33 1 equemene
 *
34 1 equemene
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 1 equemene
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36 1 equemene
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 1 equemene
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38 1 equemene
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39 1 equemene
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40 1 equemene
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 1 equemene
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42 1 equemene
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43 1 equemene
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 1 equemene
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 1 equemene
 * ---------------------------------------------------------------------
46 1 equemene
 */
47 1 equemene
/*
48 1 equemene
 * Include files
49 1 equemene
 */
50 1 equemene
#include "hpl.h"
51 1 equemene
/*
52 1 equemene
 * Define default value for unrolling factor
53 1 equemene
 */
54 1 equemene
#ifndef HPL_LOCSWP_DEPTH
55 1 equemene
#define    HPL_LOCSWP_DEPTH        32
56 1 equemene
#define    HPL_LOCSWP_LOG2_DEPTH    5
57 1 equemene
#endif
58 1 equemene
59 1 equemene
#ifdef STDC_HEADERS
60 1 equemene
void HPL_dlocswpN
61 1 equemene
(
62 1 equemene
   HPL_T_panel *                    PANEL,
63 1 equemene
   const int                        II,
64 1 equemene
   const int                        JJ,
65 1 equemene
   double *                         WORK
66 1 equemene
)
67 1 equemene
#else
68 1 equemene
void HPL_dlocswpN
69 1 equemene
( PANEL, II, JJ, WORK )
70 1 equemene
   HPL_T_panel *                    PANEL;
71 1 equemene
   const int                        II;
72 1 equemene
   const int                        JJ;
73 1 equemene
   double *                         WORK;
74 1 equemene
#endif
75 1 equemene
{
76 1 equemene
/*
77 1 equemene
 * Purpose
78 1 equemene
 * =======
79 1 equemene
 *
80 1 equemene
 * HPL_dlocswpN performs  the local swapping operations  within a panel.
81 1 equemene
 * The lower triangular  N0-by-N0  upper block of the panel is stored in
82 1 equemene
 * no-transpose form (i.e. just like the input matrix itself).
83 1 equemene
 *
84 1 equemene
 * Arguments
85 1 equemene
 * =========
86 1 equemene
 *
87 1 equemene
 * PANEL   (local input/output)          HPL_T_panel *
88 1 equemene
 *         On entry,  PANEL  points to the data structure containing the
89 1 equemene
 *         panel information.
90 1 equemene
 *
91 1 equemene
 * II      (local input)                 const int
92 1 equemene
 *         On entry, II  specifies the row offset where the column to be
93 1 equemene
 *         operated on starts with respect to the panel.
94 1 equemene
 *
95 1 equemene
 * JJ      (local input)                 const int
96 1 equemene
 *         On entry, JJ  specifies the column offset where the column to
97 1 equemene
 *         be operated on starts with respect to the panel.
98 1 equemene
 *
99 1 equemene
 * WORK    (local workspace)             double *
100 1 equemene
 *         On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
101 1 equemene
 *         WORK[0] contains  the  local  maximum  absolute value scalar,
102 1 equemene
 *         WORK[1] contains  the corresponding local row index,  WORK[2]
103 1 equemene
 *         contains the corresponding global row index, and  WORK[3]  is
104 1 equemene
 *         the coordinate of process owning this max.  The N0 length max
105 1 equemene
 *         row is stored in WORK[4:4+N0-1];  Note  that this is also the
106 1 equemene
 *         JJth row  (or column) of L1. The remaining part of this array
107 1 equemene
 *         is used as workspace.
108 1 equemene
 *
109 1 equemene
 * ---------------------------------------------------------------------
110 1 equemene
 */
111 1 equemene
/*
112 1 equemene
 * .. Local Variables ..
113 1 equemene
 */
114 1 equemene
   double                     gmax;
115 1 equemene
   double                     * A1, * A2, * L, * Wr0, * Wmx;
116 1 equemene
   int                        ilindx, lda, myrow, n0, nr, nu;
117 1 equemene
   register int               i;
118 1 equemene
/* ..
119 1 equemene
 * .. Executable Statements ..
120 1 equemene
 */
121 1 equemene
   myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda;
122 1 equemene
123 1 equemene
   Wr0   = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0];
124 1 equemene
   nu    = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH )
125 1 equemene
                  << HPL_LOCSWP_LOG2_DEPTH );
126 1 equemene
   nr    = n0 - nu;
127 1 equemene
/*
128 1 equemene
 * Replicated swap and copy of the current (new) row of A into L1
129 1 equemene
 */
130 1 equemene
   L  = Mptr( PANEL->L1, JJ, 0, n0  );
131 1 equemene
/*
132 1 equemene
 * If the pivot is non-zero ...
133 1 equemene
 */
134 1 equemene
   if( gmax != HPL_rzero )
135 1 equemene
   {
136 1 equemene
/*
137 1 equemene
 * and if I own the current row of A ...
138 1 equemene
 */
139 1 equemene
      if( myrow == PANEL->prow )
140 1 equemene
      {
141 1 equemene
/*
142 1 equemene
 * and if I also own the row to be swapped with the current row of A ...
143 1 equemene
 */
144 1 equemene
         if( myrow == (int)(WORK[3]) )
145 1 equemene
         {
146 1 equemene
/*
147 1 equemene
 * and if the current row of A is not to swapped with itself ...
148 1 equemene
 */
149 1 equemene
            if( ( ilindx = (int)(WORK[1]) ) != 0 )
150 1 equemene
            {
151 1 equemene
/*
152 1 equemene
 * then copy the max row into L1 and locally swap the 2 rows of A.
153 1 equemene
 */
154 1 equemene
               A1 = Mptr( PANEL->A,  II,     0, lda );
155 1 equemene
               A2 = Mptr( A1,        ilindx, 0, lda );
156 1 equemene
157 1 equemene
               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
158 1 equemene
                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH )
159 1 equemene
               {
160 1 equemene
                  *L=*A1=Wmx[ 0]; *A2=Wr0[ 0]; L+=n0; A1+=lda; A2+=lda;
161 1 equemene
#if ( HPL_LOCSWP_DEPTH >  1 )
162 1 equemene
                  *L=*A1=Wmx[ 1]; *A2=Wr0[ 1]; L+=n0; A1+=lda; A2+=lda;
163 1 equemene
#endif
164 1 equemene
#if ( HPL_LOCSWP_DEPTH >  2 )
165 1 equemene
                  *L=*A1=Wmx[ 2]; *A2=Wr0[ 2]; L+=n0; A1+=lda; A2+=lda;
166 1 equemene
                  *L=*A1=Wmx[ 3]; *A2=Wr0[ 3]; L+=n0; A1+=lda; A2+=lda;
167 1 equemene
#endif
168 1 equemene
#if ( HPL_LOCSWP_DEPTH >  4 )
169 1 equemene
                  *L=*A1=Wmx[ 4]; *A2=Wr0[ 4]; L+=n0; A1+=lda; A2+=lda;
170 1 equemene
                  *L=*A1=Wmx[ 5]; *A2=Wr0[ 5]; L+=n0; A1+=lda; A2+=lda;
171 1 equemene
                  *L=*A1=Wmx[ 6]; *A2=Wr0[ 6]; L+=n0; A1+=lda; A2+=lda;
172 1 equemene
                  *L=*A1=Wmx[ 7]; *A2=Wr0[ 7]; L+=n0; A1+=lda; A2+=lda;
173 1 equemene
#endif
174 1 equemene
#if ( HPL_LOCSWP_DEPTH >  8 )
175 1 equemene
                  *L=*A1=Wmx[ 8]; *A2=Wr0[ 8]; L+=n0; A1+=lda; A2+=lda;
176 1 equemene
                  *L=*A1=Wmx[ 9]; *A2=Wr0[ 9]; L+=n0; A1+=lda; A2+=lda;
177 1 equemene
                  *L=*A1=Wmx[10]; *A2=Wr0[10]; L+=n0; A1+=lda; A2+=lda;
178 1 equemene
                  *L=*A1=Wmx[11]; *A2=Wr0[11]; L+=n0; A1+=lda; A2+=lda;
179 1 equemene
                  *L=*A1=Wmx[12]; *A2=Wr0[12]; L+=n0; A1+=lda; A2+=lda;
180 1 equemene
                  *L=*A1=Wmx[13]; *A2=Wr0[13]; L+=n0; A1+=lda; A2+=lda;
181 1 equemene
                  *L=*A1=Wmx[14]; *A2=Wr0[14]; L+=n0; A1+=lda; A2+=lda;
182 1 equemene
                  *L=*A1=Wmx[15]; *A2=Wr0[15]; L+=n0; A1+=lda; A2+=lda;
183 1 equemene
#endif
184 1 equemene
#if ( HPL_LOCSWP_DEPTH > 16 )
185 1 equemene
                  *L=*A1=Wmx[16]; *A2=Wr0[16]; L+=n0; A1+=lda; A2+=lda;
186 1 equemene
                  *L=*A1=Wmx[17]; *A2=Wr0[17]; L+=n0; A1+=lda; A2+=lda;
187 1 equemene
                  *L=*A1=Wmx[18]; *A2=Wr0[18]; L+=n0; A1+=lda; A2+=lda;
188 1 equemene
                  *L=*A1=Wmx[19]; *A2=Wr0[19]; L+=n0; A1+=lda; A2+=lda;
189 1 equemene
                  *L=*A1=Wmx[20]; *A2=Wr0[20]; L+=n0; A1+=lda; A2+=lda;
190 1 equemene
                  *L=*A1=Wmx[21]; *A2=Wr0[21]; L+=n0; A1+=lda; A2+=lda;
191 1 equemene
                  *L=*A1=Wmx[22]; *A2=Wr0[22]; L+=n0; A1+=lda; A2+=lda;
192 1 equemene
                  *L=*A1=Wmx[23]; *A2=Wr0[23]; L+=n0; A1+=lda; A2+=lda;
193 1 equemene
                  *L=*A1=Wmx[24]; *A2=Wr0[24]; L+=n0; A1+=lda; A2+=lda;
194 1 equemene
                  *L=*A1=Wmx[25]; *A2=Wr0[25]; L+=n0; A1+=lda; A2+=lda;
195 1 equemene
                  *L=*A1=Wmx[26]; *A2=Wr0[26]; L+=n0; A1+=lda; A2+=lda;
196 1 equemene
                  *L=*A1=Wmx[27]; *A2=Wr0[27]; L+=n0; A1+=lda; A2+=lda;
197 1 equemene
                  *L=*A1=Wmx[28]; *A2=Wr0[28]; L+=n0; A1+=lda; A2+=lda;
198 1 equemene
                  *L=*A1=Wmx[29]; *A2=Wr0[29]; L+=n0; A1+=lda; A2+=lda;
199 1 equemene
                  *L=*A1=Wmx[30]; *A2=Wr0[30]; L+=n0; A1+=lda; A2+=lda;
200 1 equemene
                  *L=*A1=Wmx[31]; *A2=Wr0[31]; L+=n0; A1+=lda; A2+=lda;
201 1 equemene
#endif
202 1 equemene
               }
203 1 equemene
               for( i = 0; i < nr; i++, L += n0, A1 += lda, A2 += lda )
204 1 equemene
               { *L = *A1 = Wmx[i]; *A2 = Wr0[i]; }
205 1 equemene
            }
206 1 equemene
            else
207 1 equemene
            {
208 1 equemene
/*
209 1 equemene
 * otherwise the current row of  A  is swapped with itself, so just copy
210 1 equemene
 * the current of A into L1.
211 1 equemene
 */
212 1 equemene
               *Mptr( PANEL->A, II, JJ, lda ) = gmax;
213 1 equemene
214 1 equemene
               for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
215 1 equemene
                    Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH )
216 1 equemene
               {
217 1 equemene
                  *L = Wmx[ 0]; L+=n0;
218 1 equemene
#if ( HPL_LOCSWP_DEPTH >  1 )
219 1 equemene
                  *L = Wmx[ 1]; L+=n0;
220 1 equemene
#endif
221 1 equemene
#if ( HPL_LOCSWP_DEPTH >  2 )
222 1 equemene
                  *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0;
223 1 equemene
#endif
224 1 equemene
#if ( HPL_LOCSWP_DEPTH >  4 )
225 1 equemene
                  *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0;
226 1 equemene
                  *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0;
227 1 equemene
#endif
228 1 equemene
#if ( HPL_LOCSWP_DEPTH >  8 )
229 1 equemene
                  *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0;
230 1 equemene
                  *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0;
231 1 equemene
                  *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0;
232 1 equemene
                  *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0;
233 1 equemene
#endif
234 1 equemene
#if ( HPL_LOCSWP_DEPTH > 16 )
235 1 equemene
                  *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0;
236 1 equemene
                  *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0;
237 1 equemene
                  *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0;
238 1 equemene
                  *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0;
239 1 equemene
                  *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0;
240 1 equemene
                  *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0;
241 1 equemene
                  *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0;
242 1 equemene
                  *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0;
243 1 equemene
#endif
244 1 equemene
               }
245 1 equemene
               for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; }
246 1 equemene
            }
247 1 equemene
         }
248 1 equemene
         else
249 1 equemene
         {
250 1 equemene
/*
251 1 equemene
 * otherwise, the row to be swapped with the current row of A is in Wmx,
252 1 equemene
 * so copy Wmx into L1 and A.
253 1 equemene
 */
254 1 equemene
            A1 = Mptr( PANEL->A,  II, 0, lda );
255 1 equemene
256 1 equemene
            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
257 1 equemene
                 Wmx += HPL_LOCSWP_DEPTH )
258 1 equemene
            {
259 1 equemene
               *L = *A1 = Wmx[ 0]; L += n0; A1 += lda;
260 1 equemene
#if ( HPL_LOCSWP_DEPTH >  1 )
261 1 equemene
               *L = *A1 = Wmx[ 1]; L += n0; A1 += lda;
262 1 equemene
#endif
263 1 equemene
#if ( HPL_LOCSWP_DEPTH >  2 )
264 1 equemene
               *L = *A1 = Wmx[ 2]; L += n0; A1 += lda;
265 1 equemene
               *L = *A1 = Wmx[ 3]; L += n0; A1 += lda;
266 1 equemene
#endif
267 1 equemene
#if ( HPL_LOCSWP_DEPTH >  4 )
268 1 equemene
               *L = *A1 = Wmx[ 4]; L += n0; A1 += lda;
269 1 equemene
               *L = *A1 = Wmx[ 5]; L += n0; A1 += lda;
270 1 equemene
               *L = *A1 = Wmx[ 6]; L += n0; A1 += lda;
271 1 equemene
               *L = *A1 = Wmx[ 7]; L += n0; A1 += lda;
272 1 equemene
#endif
273 1 equemene
#if ( HPL_LOCSWP_DEPTH >  8 )
274 1 equemene
               *L = *A1 = Wmx[ 8]; L += n0; A1 += lda;
275 1 equemene
               *L = *A1 = Wmx[ 9]; L += n0; A1 += lda;
276 1 equemene
               *L = *A1 = Wmx[10]; L += n0; A1 += lda;
277 1 equemene
               *L = *A1 = Wmx[11]; L += n0; A1 += lda;
278 1 equemene
               *L = *A1 = Wmx[12]; L += n0; A1 += lda;
279 1 equemene
               *L = *A1 = Wmx[13]; L += n0; A1 += lda;
280 1 equemene
               *L = *A1 = Wmx[14]; L += n0; A1 += lda;
281 1 equemene
               *L = *A1 = Wmx[15]; L += n0; A1 += lda;
282 1 equemene
#endif
283 1 equemene
#if ( HPL_LOCSWP_DEPTH > 16 )
284 1 equemene
               *L = *A1 = Wmx[16]; L += n0; A1 += lda;
285 1 equemene
               *L = *A1 = Wmx[17]; L += n0; A1 += lda;
286 1 equemene
               *L = *A1 = Wmx[18]; L += n0; A1 += lda;
287 1 equemene
               *L = *A1 = Wmx[19]; L += n0; A1 += lda;
288 1 equemene
               *L = *A1 = Wmx[20]; L += n0; A1 += lda;
289 1 equemene
               *L = *A1 = Wmx[21]; L += n0; A1 += lda;
290 1 equemene
               *L = *A1 = Wmx[22]; L += n0; A1 += lda;
291 1 equemene
               *L = *A1 = Wmx[23]; L += n0; A1 += lda;
292 1 equemene
               *L = *A1 = Wmx[24]; L += n0; A1 += lda;
293 1 equemene
               *L = *A1 = Wmx[25]; L += n0; A1 += lda;
294 1 equemene
               *L = *A1 = Wmx[26]; L += n0; A1 += lda;
295 1 equemene
               *L = *A1 = Wmx[27]; L += n0; A1 += lda;
296 1 equemene
               *L = *A1 = Wmx[28]; L += n0; A1 += lda;
297 1 equemene
               *L = *A1 = Wmx[29]; L += n0; A1 += lda;
298 1 equemene
               *L = *A1 = Wmx[30]; L += n0; A1 += lda;
299 1 equemene
               *L = *A1 = Wmx[31]; L += n0; A1 += lda;
300 1 equemene
#endif
301 1 equemene
            }
302 1 equemene
303 1 equemene
            for( i = 0; i < nr; i++, L += n0, A1 += lda )
304 1 equemene
            { *L = *A1 = Wmx[i]; }
305 1 equemene
         }
306 1 equemene
      }
307 1 equemene
      else
308 1 equemene
      {
309 1 equemene
/*
310 1 equemene
 * otherwise I do not own the current row of A, so copy the max row  Wmx
311 1 equemene
 * into L1.
312 1 equemene
 */
313 1 equemene
         for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
314 1 equemene
              Wmx += HPL_LOCSWP_DEPTH )
315 1 equemene
         {
316 1 equemene
            *L = Wmx[ 0]; L+=n0;
317 1 equemene
#if ( HPL_LOCSWP_DEPTH >  1 )
318 1 equemene
            *L = Wmx[ 1]; L+=n0;
319 1 equemene
#endif
320 1 equemene
#if ( HPL_LOCSWP_DEPTH >  2 )
321 1 equemene
            *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0;
322 1 equemene
#endif
323 1 equemene
#if ( HPL_LOCSWP_DEPTH >  4 )
324 1 equemene
            *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0;
325 1 equemene
            *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0;
326 1 equemene
#endif
327 1 equemene
#if ( HPL_LOCSWP_DEPTH >  8 )
328 1 equemene
            *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0;
329 1 equemene
            *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0;
330 1 equemene
            *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0;
331 1 equemene
            *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0;
332 1 equemene
#endif
333 1 equemene
#if ( HPL_LOCSWP_DEPTH > 16 )
334 1 equemene
            *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0;
335 1 equemene
            *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0;
336 1 equemene
            *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0;
337 1 equemene
            *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0;
338 1 equemene
            *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0;
339 1 equemene
            *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0;
340 1 equemene
            *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0;
341 1 equemene
            *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0;
342 1 equemene
#endif
343 1 equemene
         }
344 1 equemene
         for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; }
345 1 equemene
/*
346 1 equemene
 * and if I own the max row, overwrite it with the current row Wr0.
347 1 equemene
 */
348 1 equemene
         if( myrow == (int)(WORK[3]) )
349 1 equemene
         {
350 1 equemene
            A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda );
351 1 equemene
352 1 equemene
            for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
353 1 equemene
                 Wr0 += HPL_LOCSWP_DEPTH )
354 1 equemene
            {
355 1 equemene
               *A2 = Wr0[ 0]; A2+=lda;
356 1 equemene
#if ( HPL_LOCSWP_DEPTH >  1 )
357 1 equemene
               *A2 = Wr0[ 1]; A2+=lda;
358 1 equemene
#endif
359 1 equemene
#if ( HPL_LOCSWP_DEPTH >  2 )
360 1 equemene
               *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda;
361 1 equemene
#endif
362 1 equemene
#if ( HPL_LOCSWP_DEPTH >  4 )
363 1 equemene
               *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda;
364 1 equemene
               *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda;
365 1 equemene
#endif
366 1 equemene
#if ( HPL_LOCSWP_DEPTH >  8 )
367 1 equemene
               *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda;
368 1 equemene
               *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda;
369 1 equemene
               *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda;
370 1 equemene
               *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda;
371 1 equemene
#endif
372 1 equemene
#if ( HPL_LOCSWP_DEPTH > 16 )
373 1 equemene
               *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda;
374 1 equemene
               *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda;
375 1 equemene
               *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda;
376 1 equemene
               *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda;
377 1 equemene
               *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda;
378 1 equemene
               *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda;
379 1 equemene
               *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda;
380 1 equemene
               *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda;
381 1 equemene
#endif
382 1 equemene
            }
383 1 equemene
384 1 equemene
            for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; }
385 1 equemene
         }
386 1 equemene
      }
387 1 equemene
   }
388 1 equemene
   else
389 1 equemene
   {
390 1 equemene
/*
391 1 equemene
 * Otherwise the max element in the current column is zero,  simply copy
392 1 equemene
 * the current row Wr0 into L1. The matrix is singular.
393 1 equemene
 */
394 1 equemene
      for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH,
395 1 equemene
           Wr0 += HPL_LOCSWP_DEPTH )
396 1 equemene
      {
397 1 equemene
         *L = Wr0[ 0]; L+=n0;
398 1 equemene
#if ( HPL_LOCSWP_DEPTH >  1 )
399 1 equemene
         *L = Wr0[ 1]; L+=n0;
400 1 equemene
#endif
401 1 equemene
#if ( HPL_LOCSWP_DEPTH >  2 )
402 1 equemene
         *L = Wr0[ 2]; L+=n0; *L = Wr0[ 3]; L+=n0;
403 1 equemene
#endif
404 1 equemene
#if ( HPL_LOCSWP_DEPTH >  4 )
405 1 equemene
         *L = Wr0[ 4]; L+=n0; *L = Wr0[ 5]; L+=n0;
406 1 equemene
         *L = Wr0[ 6]; L+=n0; *L = Wr0[ 7]; L+=n0;
407 1 equemene
#endif
408 1 equemene
#if ( HPL_LOCSWP_DEPTH >  8 )
409 1 equemene
         *L = Wr0[ 8]; L+=n0; *L = Wr0[ 9]; L+=n0;
410 1 equemene
         *L = Wr0[10]; L+=n0; *L = Wr0[11]; L+=n0;
411 1 equemene
         *L = Wr0[12]; L+=n0; *L = Wr0[13]; L+=n0;
412 1 equemene
         *L = Wr0[14]; L+=n0; *L = Wr0[15]; L+=n0;
413 1 equemene
#endif
414 1 equemene
#if ( HPL_LOCSWP_DEPTH > 16 )
415 1 equemene
         *L = Wr0[16]; L+=n0; *L = Wr0[17]; L+=n0;
416 1 equemene
         *L = Wr0[18]; L+=n0; *L = Wr0[19]; L+=n0;
417 1 equemene
         *L = Wr0[20]; L+=n0; *L = Wr0[21]; L+=n0;
418 1 equemene
         *L = Wr0[22]; L+=n0; *L = Wr0[23]; L+=n0;
419 1 equemene
         *L = Wr0[24]; L+=n0; *L = Wr0[25]; L+=n0;
420 1 equemene
         *L = Wr0[26]; L+=n0; *L = Wr0[27]; L+=n0;
421 1 equemene
         *L = Wr0[28]; L+=n0; *L = Wr0[29]; L+=n0;
422 1 equemene
         *L = Wr0[30]; L+=n0; *L = Wr0[31]; L+=n0;
423 1 equemene
#endif
424 1 equemene
      }
425 1 equemene
426 1 equemene
      for( i = 0; i < nr; i++, L += n0 ) { *L = Wr0[i]; }
427 1 equemene
/*
428 1 equemene
 * set INFO.
429 1 equemene
 */
430 1 equemene
      if( *(PANEL->DINFO) == 0.0 )
431 1 equemene
         *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1);
432 1 equemene
   }
433 1 equemene
/*
434 1 equemene
 * End of HPL_dlocswpN
435 1 equemene
 */
436 1 equemene
}