root / src / pgesv / HPL_pdupdateNT.c
Historique | Voir | Annoter | Télécharger (15,15 ko)
1 |
/*
|
---|---|
2 |
* -- High Performance Computing Linpack Benchmark (HPL)
|
3 |
* HPL - 2.0 - September 10, 2008
|
4 |
* Antoine P. Petitet
|
5 |
* University of Tennessee, Knoxville
|
6 |
* Innovative Computing Laboratory
|
7 |
* (C) Copyright 2000-2008 All Rights Reserved
|
8 |
*
|
9 |
* -- Copyright notice and Licensing terms:
|
10 |
*
|
11 |
* Redistribution and use in source and binary forms, with or without
|
12 |
* modification, are permitted provided that the following conditions
|
13 |
* are met:
|
14 |
*
|
15 |
* 1. Redistributions of source code must retain the above copyright
|
16 |
* notice, this list of conditions and the following disclaimer.
|
17 |
*
|
18 |
* 2. Redistributions in binary form must reproduce the above copyright
|
19 |
* notice, this list of conditions, and the following disclaimer in the
|
20 |
* documentation and/or other materials provided with the distribution.
|
21 |
*
|
22 |
* 3. All advertising materials mentioning features or use of this
|
23 |
* software must display the following acknowledgement:
|
24 |
* This product includes software developed at the University of
|
25 |
* Tennessee, Knoxville, Innovative Computing Laboratory.
|
26 |
*
|
27 |
* 4. The name of the University, the name of the Laboratory, or the
|
28 |
* names of its contributors may not be used to endorse or promote
|
29 |
* products derived from this software without specific written
|
30 |
* permission.
|
31 |
*
|
32 |
* -- Disclaimer:
|
33 |
*
|
34 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
35 |
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
36 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
37 |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
|
38 |
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
39 |
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
40 |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
41 |
* DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
42 |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
43 |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
44 |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
45 |
* ---------------------------------------------------------------------
|
46 |
*/
|
47 |
/*
|
48 |
* Include files
|
49 |
*/
|
50 |
#include "hpl.h" |
51 |
|
52 |
#ifdef STDC_HEADERS
|
53 |
void HPL_pdupdateNT
|
54 |
( |
55 |
HPL_T_panel * PBCST, |
56 |
int * IFLAG,
|
57 |
HPL_T_panel * PANEL, |
58 |
const int NN |
59 |
) |
60 |
#else
|
61 |
void HPL_pdupdateNT
|
62 |
( PBCST, IFLAG, PANEL, NN ) |
63 |
HPL_T_panel * PBCST; |
64 |
int * IFLAG;
|
65 |
HPL_T_panel * PANEL; |
66 |
const int NN; |
67 |
#endif
|
68 |
{ |
69 |
/*
|
70 |
* Purpose
|
71 |
* =======
|
72 |
*
|
73 |
* HPL_pdupdateNT broadcast - forward the panel PBCST and simultaneously
|
74 |
* applies the row interchanges and updates part of the trailing (using
|
75 |
* the panel PANEL) submatrix.
|
76 |
*
|
77 |
* Arguments
|
78 |
* =========
|
79 |
*
|
80 |
* PBCST (local input/output) HPL_T_panel *
|
81 |
* On entry, PBCST points to the data structure containing the
|
82 |
* panel (to be broadcast) information.
|
83 |
*
|
84 |
* IFLAG (local output) int *
|
85 |
* On exit, IFLAG indicates whether or not the broadcast has
|
86 |
* been completed when PBCST is not NULL on entry. In that case,
|
87 |
* IFLAG is left unchanged.
|
88 |
*
|
89 |
* PANEL (local input/output) HPL_T_panel *
|
90 |
* On entry, PANEL points to the data structure containing the
|
91 |
* panel (to be updated) information.
|
92 |
*
|
93 |
* NN (local input) const int
|
94 |
* On entry, NN specifies the local number of columns of the
|
95 |
* trailing submatrix to be updated starting at the current
|
96 |
* position. NN must be at least zero.
|
97 |
*
|
98 |
* ---------------------------------------------------------------------
|
99 |
*/
|
100 |
/*
|
101 |
* .. Local Variables ..
|
102 |
*/
|
103 |
double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
|
104 |
int * ipiv;
|
105 |
#ifdef HPL_CALL_VSIPL
|
106 |
vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; |
107 |
#endif
|
108 |
int curr, i, iroff, jb, lda, ldl2, mp, n, nb,
|
109 |
nq0, nn, test; |
110 |
static int tswap = 0; |
111 |
static HPL_T_SWAP fswap = HPL_NO_SWP;
|
112 |
#define LDU n
|
113 |
/* ..
|
114 |
* .. Executable Statements ..
|
115 |
*/
|
116 |
#ifdef HPL_DETAILED_TIMING
|
117 |
HPL_ptimer( HPL_TIMING_UPDATE ); |
118 |
#endif
|
119 |
nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; |
120 |
if( NN >= 0 ) n = Mmin( NN, n ); |
121 |
/*
|
122 |
* There is nothing to update, enforce the panel broadcast.
|
123 |
*/
|
124 |
if( ( n <= 0 ) || ( jb <= 0 ) ) |
125 |
{ |
126 |
if( PBCST != NULL ) |
127 |
{ |
128 |
do { (void) HPL_bcast( PBCST, IFLAG ); } |
129 |
while( *IFLAG != HPL_SUCCESS );
|
130 |
} |
131 |
#ifdef HPL_DETAILED_TIMING
|
132 |
HPL_ptimer( HPL_TIMING_UPDATE ); |
133 |
#endif
|
134 |
return;
|
135 |
} |
136 |
/*
|
137 |
* Enable/disable the column panel probing mechanism
|
138 |
*/
|
139 |
(void) HPL_bcast( PBCST, &test );
|
140 |
/*
|
141 |
* 1 x Q case
|
142 |
*/
|
143 |
if( PANEL->grid->nprow == 1 ) |
144 |
{ |
145 |
Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; |
146 |
ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; |
147 |
mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0;
|
148 |
#ifdef HPL_CALL_VSIPL
|
149 |
/*
|
150 |
* Admit the blocks
|
151 |
*/
|
152 |
(void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE );
|
153 |
(void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE );
|
154 |
/*
|
155 |
* Create the matrix views
|
156 |
*/
|
157 |
Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); |
158 |
Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); |
159 |
/*
|
160 |
* Create the matrix subviews
|
161 |
*/
|
162 |
Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); |
163 |
#endif
|
164 |
for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } |
165 |
/*
|
166 |
* So far we have not updated anything - test availability of the panel
|
167 |
* to be forwarded - If detected forward it and finish the update in one
|
168 |
* step.
|
169 |
*/
|
170 |
while ( test == HPL_KEEP_TESTING )
|
171 |
{ |
172 |
nn = n - nq0; nn = Mmin( nb, nn ); |
173 |
/*
|
174 |
* Update nb columns at a time
|
175 |
*/
|
176 |
#ifdef HPL_DETAILED_TIMING
|
177 |
HPL_ptimer( HPL_TIMING_LASWP ); |
178 |
HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); |
179 |
HPL_ptimer( HPL_TIMING_LASWP ); |
180 |
#else
|
181 |
HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); |
182 |
#endif
|
183 |
HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, |
184 |
HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); |
185 |
#ifdef HPL_CALL_VSIPL
|
186 |
/*
|
187 |
* Create the matrix subviews
|
188 |
*/
|
189 |
Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); |
190 |
Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); |
191 |
|
192 |
vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, |
193 |
HPL_rone, Av1 ); |
194 |
/*
|
195 |
* Destroy the matrix subviews
|
196 |
*/
|
197 |
(void) vsip_mdestroy_d( Av1 );
|
198 |
(void) vsip_mdestroy_d( Uv1 );
|
199 |
#else
|
200 |
HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, |
201 |
jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, |
202 |
Mptr( Aptr, jb, 0, lda ), lda );
|
203 |
#endif
|
204 |
Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
|
205 |
|
206 |
(void) HPL_bcast( PBCST, &test );
|
207 |
} |
208 |
/*
|
209 |
* The panel has been forwarded at that point, finish the update
|
210 |
*/
|
211 |
if( ( nn = n - nq0 ) > 0 ) |
212 |
{ |
213 |
#ifdef HPL_DETAILED_TIMING
|
214 |
HPL_ptimer( HPL_TIMING_LASWP ); |
215 |
HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); |
216 |
HPL_ptimer( HPL_TIMING_LASWP ); |
217 |
#else
|
218 |
HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); |
219 |
#endif
|
220 |
HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, |
221 |
HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); |
222 |
#ifdef HPL_CALL_VSIPL
|
223 |
/*
|
224 |
* Create the matrix subviews
|
225 |
*/
|
226 |
Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); |
227 |
Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); |
228 |
|
229 |
vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, |
230 |
HPL_rone, Av1 ); |
231 |
/*
|
232 |
* Destroy the matrix subviews
|
233 |
*/
|
234 |
(void) vsip_mdestroy_d( Av1 );
|
235 |
(void) vsip_mdestroy_d( Uv1 );
|
236 |
#else
|
237 |
HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, |
238 |
jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, |
239 |
Mptr( Aptr, jb, 0, lda ), lda );
|
240 |
#endif
|
241 |
} |
242 |
#ifdef HPL_CALL_VSIPL
|
243 |
/*
|
244 |
* Destroy the matrix subviews
|
245 |
*/
|
246 |
(void) vsip_mdestroy_d( Lv1 );
|
247 |
/*
|
248 |
* Release the blocks
|
249 |
*/
|
250 |
(void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
|
251 |
(void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
|
252 |
/*
|
253 |
* Destroy the matrix views
|
254 |
*/
|
255 |
(void) vsip_mdestroy_d( Lv0 );
|
256 |
(void) vsip_mdestroy_d( Av0 );
|
257 |
#endif
|
258 |
} |
259 |
else /* nprow > 1 ... */ |
260 |
{ |
261 |
/*
|
262 |
* Selection of the swapping algorithm - swap:broadcast U.
|
263 |
*/
|
264 |
if( fswap == HPL_NO_SWP )
|
265 |
{ fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } |
266 |
|
267 |
if( ( fswap == HPL_SWAP01 ) ||
|
268 |
( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) |
269 |
{ HPL_pdlaswp01T( PBCST, &test, PANEL, n ); } |
270 |
else
|
271 |
{ HPL_pdlaswp00T( PBCST, &test, PANEL, n ); } |
272 |
/*
|
273 |
* Compute redundantly row block of U and update trailing submatrix
|
274 |
*/
|
275 |
nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); |
276 |
Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; |
277 |
Uptr = PANEL->U; ldl2 = PANEL->ldl2; |
278 |
mp = PANEL->mp - ( curr != 0 ? jb : 0 ); |
279 |
#ifdef HPL_CALL_VSIPL
|
280 |
/*
|
281 |
* Admit the blocks
|
282 |
*/
|
283 |
(void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE );
|
284 |
(void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE );
|
285 |
(void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE );
|
286 |
/*
|
287 |
* Create the matrix views
|
288 |
*/
|
289 |
Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); |
290 |
Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); |
291 |
Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, jb ); |
292 |
/*
|
293 |
* Create the matrix subviews
|
294 |
*/
|
295 |
Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); |
296 |
#endif
|
297 |
/*
|
298 |
* Broadcast has not occured yet, spliting the computational part
|
299 |
*/
|
300 |
while ( test == HPL_KEEP_TESTING )
|
301 |
{ |
302 |
nn = n - nq0; nn = Mmin( nb, nn ); |
303 |
|
304 |
HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans, |
305 |
HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); |
306 |
|
307 |
if( curr != 0 ) |
308 |
{ |
309 |
#ifdef HPL_CALL_VSIPL
|
310 |
/*
|
311 |
* Create the matrix subviews
|
312 |
*/
|
313 |
Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb );
|
314 |
Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); |
315 |
|
316 |
vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, |
317 |
HPL_rone, Av1 ); |
318 |
/*
|
319 |
* Destroy the matrix subviews
|
320 |
*/
|
321 |
(void) vsip_mdestroy_d( Av1 );
|
322 |
(void) vsip_mdestroy_d( Uv1 );
|
323 |
#else
|
324 |
HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, |
325 |
jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, |
326 |
Mptr( Aptr, jb, 0, lda ), lda );
|
327 |
#endif
|
328 |
HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); |
329 |
} |
330 |
else
|
331 |
{ |
332 |
#ifdef HPL_CALL_VSIPL
|
333 |
/*
|
334 |
* Create the matrix subviews
|
335 |
*/
|
336 |
Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb );
|
337 |
Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); |
338 |
|
339 |
vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, |
340 |
HPL_rone, Av1 ); |
341 |
/*
|
342 |
* Destroy the matrix subviews
|
343 |
*/
|
344 |
(void) vsip_mdestroy_d( Av1 );
|
345 |
(void) vsip_mdestroy_d( Uv1 );
|
346 |
#else
|
347 |
HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, |
348 |
jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, |
349 |
Aptr, lda ); |
350 |
#endif
|
351 |
} |
352 |
Uptr = Mptr( Uptr, nn, 0, LDU );
|
353 |
Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
|
354 |
|
355 |
(void) HPL_bcast( PBCST, &test );
|
356 |
} |
357 |
/*
|
358 |
* The panel has been forwarded at that point, finish the update
|
359 |
*/
|
360 |
if( ( nn = n - nq0 ) > 0 ) |
361 |
{ |
362 |
HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans, |
363 |
HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); |
364 |
|
365 |
if( curr != 0 ) |
366 |
{ |
367 |
#ifdef HPL_CALL_VSIPL
|
368 |
/*
|
369 |
* Create the matrix subviews
|
370 |
*/
|
371 |
Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb );
|
372 |
Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); |
373 |
|
374 |
vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, |
375 |
HPL_rone, Av1 ); |
376 |
/*
|
377 |
* Destroy the matrix subviews
|
378 |
*/
|
379 |
(void) vsip_mdestroy_d( Av1 );
|
380 |
(void) vsip_mdestroy_d( Uv1 );
|
381 |
#else
|
382 |
HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, |
383 |
jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, |
384 |
Mptr( Aptr, jb, 0, lda ), lda );
|
385 |
#endif
|
386 |
HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); |
387 |
} |
388 |
else
|
389 |
{ |
390 |
#ifdef HPL_CALL_VSIPL
|
391 |
/*
|
392 |
* Create the matrix subviews
|
393 |
*/
|
394 |
Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb );
|
395 |
Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); |
396 |
|
397 |
vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, |
398 |
HPL_rone, Av1 ); |
399 |
/*
|
400 |
* Destroy the matrix subviews
|
401 |
*/
|
402 |
(void) vsip_mdestroy_d( Av1 );
|
403 |
(void) vsip_mdestroy_d( Uv1 );
|
404 |
#else
|
405 |
HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, |
406 |
jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, |
407 |
Aptr, lda ); |
408 |
#endif
|
409 |
} |
410 |
} |
411 |
#ifdef HPL_CALL_VSIPL
|
412 |
/*
|
413 |
* Destroy the matrix subviews
|
414 |
*/
|
415 |
(void) vsip_mdestroy_d( Lv1 );
|
416 |
/*
|
417 |
* Release the blocks
|
418 |
*/
|
419 |
(void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
|
420 |
(void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
|
421 |
(void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
|
422 |
/*
|
423 |
* Destroy the matrix views
|
424 |
*/
|
425 |
(void) vsip_mdestroy_d( Uv0 );
|
426 |
(void) vsip_mdestroy_d( Lv0 );
|
427 |
(void) vsip_mdestroy_d( Av0 );
|
428 |
#endif
|
429 |
} |
430 |
|
431 |
PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
|
432 |
/*
|
433 |
* return the outcome of the probe (should always be HPL_SUCCESS, the
|
434 |
* panel broadcast is enforced in that routine).
|
435 |
*/
|
436 |
if( PBCST != NULL ) *IFLAG = test; |
437 |
#ifdef HPL_DETAILED_TIMING
|
438 |
HPL_ptimer( HPL_TIMING_UPDATE ); |
439 |
#endif
|
440 |
/*
|
441 |
* End of HPL_pdupdateNT
|
442 |
*/
|
443 |
} |