Statistiques
| Révision :

root / src / pgesv / HPL_pdupdateTN.c

Historique | Voir | Annoter | Télécharger (15,15 ko)

1
/* 
2
 * -- High Performance Computing Linpack Benchmark (HPL)                
3
 *    HPL - 2.0 - September 10, 2008                          
4
 *    Antoine P. Petitet                                                
5
 *    University of Tennessee, Knoxville                                
6
 *    Innovative Computing Laboratory                                 
7
 *    (C) Copyright 2000-2008 All Rights Reserved                       
8
 *                                                                      
9
 * -- Copyright notice and Licensing terms:                             
10
 *                                                                      
11
 * Redistribution  and  use in  source and binary forms, with or without
12
 * modification, are  permitted provided  that the following  conditions
13
 * are met:                                                             
14
 *                                                                      
15
 * 1. Redistributions  of  source  code  must retain the above copyright
16
 * notice, this list of conditions and the following disclaimer.        
17
 *                                                                      
18
 * 2. Redistributions in binary form must reproduce  the above copyright
19
 * notice, this list of conditions,  and the following disclaimer in the
20
 * documentation and/or other materials provided with the distribution. 
21
 *                                                                      
22
 * 3. All  advertising  materials  mentioning  features  or  use of this
23
 * software must display the following acknowledgement:                 
24
 * This  product  includes  software  developed  at  the  University  of
25
 * Tennessee, Knoxville, Innovative Computing Laboratory.             
26
 *                                                                      
27
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28
 * names  of  its  contributors  may  not  be used to endorse or promote
29
 * products  derived   from   this  software  without  specific  written
30
 * permission.                                                          
31
 *                                                                      
32
 * -- Disclaimer:                                                       
33
 *                                                                      
34
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
45
 * ---------------------------------------------------------------------
46
 */ 
47
/*
48
 * Include files
49
 */
50
#include "hpl.h"
51

    
52
#ifdef STDC_HEADERS
53
void HPL_pdupdateTN
54
(
55
   HPL_T_panel *                    PBCST,
56
   int *                            IFLAG,
57
   HPL_T_panel *                    PANEL,
58
   const int                        NN
59
)
60
#else
61
void HPL_pdupdateTN
62
( PBCST, IFLAG, PANEL, NN )
63
   HPL_T_panel *                    PBCST;
64
   int *                            IFLAG;
65
   HPL_T_panel *                    PANEL;
66
   const int                        NN;
67
#endif
68
{
69
/* 
70
 * Purpose
71
 * =======
72
 *
73
 * HPL_pdupdateTN broadcast - forward the panel PBCST and simultaneously
74
 * applies the row interchanges and updates part of the trailing  (using
75
 * the panel PANEL) submatrix.
76
 *
77
 * Arguments
78
 * =========
79
 *
80
 * PBCST   (local input/output)          HPL_T_panel *
81
 *         On entry,  PBCST  points to the data structure containing the
82
 *         panel (to be broadcast) information.
83
 *
84
 * IFLAG   (local output)                int *
85
 *         On exit,  IFLAG  indicates  whether or not  the broadcast has
86
 *         been completed when PBCST is not NULL on entry. In that case,
87
 *         IFLAG is left unchanged.
88
 *
89
 * PANEL   (local input/output)          HPL_T_panel *
90
 *         On entry,  PANEL  points to the data structure containing the
91
 *         panel (to be updated) information.
92
 *
93
 * NN      (local input)                 const int
94
 *         On entry, NN specifies  the  local  number  of columns of the
95
 *         trailing  submatrix  to be updated  starting  at the  current
96
 *         position. NN must be at least zero.
97
 *
98
 * ---------------------------------------------------------------------
99
 */ 
100
/*
101
 * .. Local Variables ..
102
 */
103
   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
104
   int                       * ipiv;
105
#ifdef HPL_CALL_VSIPL
106
   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
107
#endif
108
   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
109
                             nq0, nn, test;
110
   static int                tswap = 0;
111
   static HPL_T_SWAP         fswap = HPL_NO_SWP;
112
#define LDU                  jb
113
/* ..
114
 * .. Executable Statements ..
115
 */
116
#ifdef HPL_DETAILED_TIMING
117
   HPL_ptimer( HPL_TIMING_UPDATE );
118
#endif
119
   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
120
   if( NN >= 0 ) n = Mmin( NN, n );
121
/*
122
 * There is nothing to update, enforce the panel broadcast.
123
 */
124
   if( ( n <= 0 ) || ( jb <= 0 ) )
125
   {
126
      if( PBCST != NULL )
127
      {
128
         do { (void) HPL_bcast( PBCST, IFLAG ); }
129
         while( *IFLAG != HPL_SUCCESS );
130
      }
131
#ifdef HPL_DETAILED_TIMING
132
      HPL_ptimer( HPL_TIMING_UPDATE );
133
#endif
134
      return;
135
   }
136
/*
137
 * Enable/disable the column panel probing mechanism
138
 */
139
   (void) HPL_bcast( PBCST, &test );
140
/*
141
 * 1 x Q case
142
 */
143
   if( PANEL->grid->nprow == 1 )
144
   {
145
      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
146
      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
147
      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
148
#ifdef HPL_CALL_VSIPL
149
/*
150
 * Admit the blocks
151
 */
152
      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
153
      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
154
/*
155
 * Create the matrix views
156
 */
157
      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
158
      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
159
/*
160
 * Create the matrix subviews
161
 */
162
      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
163
#endif
164
      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
165
/*
166
 * So far we have not updated anything -  test availability of the panel
167
 * to be forwarded - If detected forward it and finish the update in one
168
 * step.
169
 */
170
      while ( test == HPL_KEEP_TESTING )
171
      {
172
         nn = n - nq0; nn = Mmin( nb, nn );
173
/*
174
 * Update nb columns at a time
175
 */
176
#ifdef HPL_DETAILED_TIMING
177
         HPL_ptimer( HPL_TIMING_LASWP );
178
         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
179
         HPL_ptimer( HPL_TIMING_LASWP );
180
#else
181
         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
182
#endif
183
         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
184
                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
185
#ifdef HPL_CALL_VSIPL
186
/*
187
 * Create the matrix subviews
188
 */
189
         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
190
         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
191

    
192
         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
193
                      HPL_rone, Av1 );
194
/*
195
 * Destroy the matrix subviews
196
 */
197
         (void) vsip_mdestroy_d( Av1 );
198
         (void) vsip_mdestroy_d( Uv1 );
199
#else
200
         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
201
                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
202
                    Mptr( Aptr, jb, 0, lda ), lda );
203
#endif
204
         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
205

    
206
         (void) HPL_bcast( PBCST, &test ); 
207
      }
208
/*
209
 * The panel has been forwarded at that point, finish the update
210
 */
211
      if( ( nn = n - nq0 ) > 0 )
212
      {
213
#ifdef HPL_DETAILED_TIMING
214
         HPL_ptimer( HPL_TIMING_LASWP );
215
         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
216
         HPL_ptimer( HPL_TIMING_LASWP );
217
#else
218
         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
219
#endif
220
         HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans,
221
                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
222
#ifdef HPL_CALL_VSIPL
223
/*
224
 * Create the matrix subviews
225
 */
226
         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
227
         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
228

    
229
         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
230
                      HPL_rone, Av1 );
231
/*
232
 * Destroy the matrix subviews
233
 */
234
         (void) vsip_mdestroy_d( Av1 );
235
         (void) vsip_mdestroy_d( Uv1 );
236
#else
237
         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
238
                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
239
                    Mptr( Aptr, jb, 0, lda ), lda );
240
#endif
241
      }
242
#ifdef HPL_CALL_VSIPL
243
/*
244
 * Destroy the matrix subviews
245
 */
246
      (void) vsip_mdestroy_d( Lv1 );
247
/*
248
 * Release the blocks
249
 */
250
      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
251
      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
252
/*
253
 * Destroy the matrix views
254
 */
255
      (void) vsip_mdestroy_d( Lv0 );
256
      (void) vsip_mdestroy_d( Av0 );
257
#endif
258
   }
259
   else                        /* nprow > 1 ... */
260
   {
261
/*
262
 * Selection of the swapping algorithm - swap:broadcast U.
263
 */
264
      if( fswap == HPL_NO_SWP )
265
      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
266

    
267
      if( (   fswap == HPL_SWAP01 ) ||
268
          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
269
      { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); }
270
      else
271
      { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); }
272
/*
273
 * Compute redundantly row block of U and update trailing submatrix
274
 */
275
      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
276
      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
277
      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
278
      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
279
#ifdef HPL_CALL_VSIPL
280
/*
281
 * Admit the blocks
282
 */
283
      (void) vsip_blockadmit_d(  PANEL->Ablock,  VSIP_TRUE );
284
      (void) vsip_blockadmit_d(  PANEL->L2block, VSIP_TRUE );
285
      (void) vsip_blockadmit_d(  PANEL->Ublock,  VSIP_TRUE );
286
/*
287
 * Create the matrix views
288
 */
289
      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
290
      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
291
      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,                n );
292
/*
293
 * Create the matrix subviews
294
 */
295
      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
296
#endif
297
/*
298
 * Broadcast has not occured yet, spliting the computational part
299
 */
300
      while ( test == HPL_KEEP_TESTING )
301
      {
302
         nn = n - nq0; nn = Mmin( nb, nn );
303

    
304
         HPL_dtrsm( HplColumnMajor, HplLeft,  HplUpper, HplTrans,
305
                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
306

    
307
         if( curr != 0 )
308
         {
309
#ifdef HPL_CALL_VSIPL
310
/*
311
 * Create the matrix subviews
312
 */
313
            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
314
            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
315

    
316
            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
317
                         HPL_rone, Av1 );
318
/*
319
 * Destroy the matrix subviews
320
 */
321
            (void) vsip_mdestroy_d( Av1 );
322
            (void) vsip_mdestroy_d( Uv1 );
323
#else
324
            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
325
                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
326
                       Mptr( Aptr, jb, 0, lda ), lda );
327
#endif
328
            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
329
         }
330
         else
331
         {
332
#ifdef HPL_CALL_VSIPL
333
/*
334
 * Create the matrix subviews
335
 */
336
            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
337
            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
338

    
339
            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
340
                         HPL_rone, Av1 );
341
/*
342
 * Destroy the matrix subviews
343
 */
344
            (void) vsip_mdestroy_d( Av1 );
345
            (void) vsip_mdestroy_d( Uv1 );
346
#else
347
            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
348
                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
349
                       Aptr, lda );
350
#endif
351
         }
352
         Uptr = Mptr( Uptr, 0, nn, LDU );
353
         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
354

    
355
         (void) HPL_bcast( PBCST, &test ); 
356
      }
357
/*
358
 * The panel has been forwarded at that point, finish the update
359
 */
360
      if( ( nn = n - nq0 ) > 0 )
361
      {
362
         HPL_dtrsm( HplColumnMajor, HplLeft,  HplUpper, HplTrans,
363
                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
364

    
365
         if( curr != 0 )
366
         {
367
#ifdef HPL_CALL_VSIPL
368
/*
369
 * Create the matrix subviews
370
 */
371
            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
372
            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
373

    
374
            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
375
                         HPL_rone, Av1 );
376
/*
377
 * Destroy the matrix subviews
378
 */
379
            (void) vsip_mdestroy_d( Av1 );
380
            (void) vsip_mdestroy_d( Uv1 );
381
#else
382
            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
383
                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
384
                       Mptr( Aptr, jb, 0, lda ), lda );
385
#endif
386
            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
387
         }
388
         else
389
         {
390
#ifdef HPL_CALL_VSIPL
391
/*
392
 * Create the matrix subviews
393
 */
394
            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
395
            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
396

    
397
            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
398
                         HPL_rone, Av1 );
399
/*
400
 * Destroy the matrix subviews
401
 */
402
            (void) vsip_mdestroy_d( Av1 );
403
            (void) vsip_mdestroy_d( Uv1 );
404
#else
405
            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
406
                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
407
                       Aptr, lda );
408
#endif
409
         }
410
      }
411
#ifdef HPL_CALL_VSIPL
412
/*
413
 * Destroy the matrix subviews
414
 */
415
      (void) vsip_mdestroy_d( Lv1 );
416
/*
417
 * Release the blocks
418
 */
419
      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
420
      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
421
      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
422
/*
423
 * Destroy the matrix views
424
 */
425
      (void) vsip_mdestroy_d( Uv0 );
426
      (void) vsip_mdestroy_d( Lv0 );
427
      (void) vsip_mdestroy_d( Av0 );
428
#endif
429
   }
430

    
431
   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
432
/*
433
 * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
434
 * panel broadcast is enforced in that routine).
435
 */
436
   if( PBCST != NULL ) *IFLAG = test;
437
#ifdef HPL_DETAILED_TIMING
438
   HPL_ptimer( HPL_TIMING_UPDATE );
439
#endif
440
/*
441
 * End of HPL_pdupdateTN
442
 */
443
}