Statistiques
| Révision :

root / src / pgesv / HPL_pdupdateNN.c @ 9

Historique | Voir | Annoter | Télécharger (15,15 ko)

1
/* 
2
 * -- High Performance Computing Linpack Benchmark (HPL)                
3
 *    HPL - 2.0 - September 10, 2008                          
4
 *    Antoine P. Petitet                                                
5
 *    University of Tennessee, Knoxville                                
6
 *    Innovative Computing Laboratory                                 
7
 *    (C) Copyright 2000-2008 All Rights Reserved                       
8
 *                                                                      
9
 * -- Copyright notice and Licensing terms:                             
10
 *                                                                      
11
 * Redistribution  and  use in  source and binary forms, with or without
12
 * modification, are  permitted provided  that the following  conditions
13
 * are met:                                                             
14
 *                                                                      
15
 * 1. Redistributions  of  source  code  must retain the above copyright
16
 * notice, this list of conditions and the following disclaimer.        
17
 *                                                                      
18
 * 2. Redistributions in binary form must reproduce  the above copyright
19
 * notice, this list of conditions,  and the following disclaimer in the
20
 * documentation and/or other materials provided with the distribution. 
21
 *                                                                      
22
 * 3. All  advertising  materials  mentioning  features  or  use of this
23
 * software must display the following acknowledgement:                 
24
 * This  product  includes  software  developed  at  the  University  of
25
 * Tennessee, Knoxville, Innovative Computing Laboratory.             
26
 *                                                                      
27
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28
 * names  of  its  contributors  may  not  be used to endorse or promote
29
 * products  derived   from   this  software  without  specific  written
30
 * permission.                                                          
31
 *                                                                      
32
 * -- Disclaimer:                                                       
33
 *                                                                      
34
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
45
 * ---------------------------------------------------------------------
46
 */ 
47
/*
48
 * Include files
49
 */
50
#include "hpl.h"
51

    
52
#ifdef STDC_HEADERS
53
void HPL_pdupdateNN
54
(
55
   HPL_T_panel *                    PBCST,
56
   int *                            IFLAG,
57
   HPL_T_panel *                    PANEL,
58
   const int                        NN
59
)
60
#else
61
void HPL_pdupdateNN
62
( PBCST, IFLAG, PANEL, NN )
63
   HPL_T_panel *                    PBCST;
64
   int *                            IFLAG;
65
   HPL_T_panel *                    PANEL;
66
   const int                        NN;
67
#endif
68
{
69
/* 
70
 * Purpose
71
 * =======
72
 *
73
 * HPL_pdupdateNN broadcast - forward the panel PBCST and simultaneously
74
 * applies the row interchanges and updates part of the trailing  (using
75
 * the panel PANEL) submatrix.
76
 *
77
 * Arguments
78
 * =========
79
 *
80
 * PBCST   (local input/output)          HPL_T_panel *
81
 *         On entry,  PBCST  points to the data structure containing the
82
 *         panel (to be broadcast) information.
83
 *
84
 * IFLAG   (local output)                int *
85
 *         On exit,  IFLAG  indicates  whether or not  the broadcast has
86
 *         been completed when PBCST is not NULL on entry. In that case,
87
 *         IFLAG is left unchanged.
88
 *
89
 * PANEL   (local input/output)          HPL_T_panel *
90
 *         On entry,  PANEL  points to the data structure containing the
91
 *         panel (to be updated) information.
92
 *
93
 * NN      (local input)                 const int
94
 *         On entry, NN specifies  the  local  number  of columns of the
95
 *         trailing  submatrix  to be updated  starting  at the  current
96
 *         position. NN must be at least zero.
97
 *
98
 * ---------------------------------------------------------------------
99
 */ 
100
/*
101
 * .. Local Variables ..
102
 */
103
   double                    * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv;
104
   int                       * ipiv;
105
#ifdef HPL_CALL_VSIPL
106
   vsip_mview_d              * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1;
107
#endif
108
   int                       curr, i, iroff, jb, lda, ldl2, mp, n, nb,
109
                             nq0, nn, test;
110
   static int                tswap = 0;
111
   static HPL_T_SWAP         fswap = HPL_NO_SWP;
112
#define LDU                  jb
113
/* ..
114
 * .. Executable Statements ..
115
 */
116
#ifdef HPL_DETAILED_TIMING
117
   HPL_ptimer( HPL_TIMING_UPDATE );
118
#endif
119
   nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda;
120
   if( NN >= 0 ) n = Mmin( NN, n );
121
/*
122
 * There is nothing to update, enforce the panel broadcast.
123
 */
124
   if( ( n <= 0 ) || ( jb <= 0 ) )
125
   {
126
      if( PBCST != NULL )
127
      {
128
         do { (void) HPL_bcast( PBCST, IFLAG ); }
129
         while( *IFLAG != HPL_SUCCESS );
130
      }
131
#ifdef HPL_DETAILED_TIMING
132
      HPL_ptimer( HPL_TIMING_UPDATE );
133
#endif
134
      return;
135
   }
136
/*
137
 * Enable/disable the column panel probing mechanism
138
 */
139
   (void) HPL_bcast( PBCST, &test );
140
/*
141
 * 1 x Q case
142
 */
143
   if( PANEL->grid->nprow == 1 )
144
   {
145
      Aptr = PANEL->A;       L2ptr = PANEL->L2;   L1ptr = PANEL->L1;
146
      ldl2 = PANEL->ldl2;    dpiv  = PANEL->DPIV; ipiv  = PANEL->IWORK;
147
      mp   = PANEL->mp - jb; iroff = PANEL->ii;   nq0   = 0; 
148
#ifdef HPL_CALL_VSIPL
149
/*
150
 * Admit the blocks
151
 */
152
      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
153
      (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE );
154
/*
155
 * Create the matrix views
156
 */
157
      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
158
      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
159
/*
160
 * Create the matrix subviews
161
 */
162
      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
163
#endif
164
      for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; }
165
/*
166
 * So far we have not updated anything -  test availability of the panel
167
 * to be forwarded - If detected forward it and finish the update in one
168
 * step.
169
 */
170
      while ( test == HPL_KEEP_TESTING )
171
      {
172
         nn = n - nq0; nn = Mmin( nb, nn );
173
/*
174
 * Update nb columns at a time
175
 */
176
#ifdef HPL_DETAILED_TIMING
177
         HPL_ptimer( HPL_TIMING_LASWP );
178
         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
179
         HPL_ptimer( HPL_TIMING_LASWP );
180
#else
181
         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
182
#endif
183
         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
184
                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
185
#ifdef HPL_CALL_VSIPL
186
/*
187
 * Create the matrix subviews
188
 */
189
         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
190
         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
191

    
192
         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
193
                      HPL_rone, Av1 );
194
/*
195
 * Destroy the matrix subviews
196
 */
197
         (void) vsip_mdestroy_d( Av1 );
198
         (void) vsip_mdestroy_d( Uv1 );
199
#else
200
         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
201
                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
202
                    Mptr( Aptr, jb, 0, lda ), lda );
203
#endif
204
         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; 
205

    
206
         (void) HPL_bcast( PBCST, &test ); 
207
      }
208
/*
209
 * The panel has been forwarded at that point, finish the update
210
 */
211
      if( ( nn = n - nq0 ) > 0 )
212
      {
213
#ifdef HPL_DETAILED_TIMING
214
         HPL_ptimer( HPL_TIMING_LASWP );
215
         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
216
         HPL_ptimer( HPL_TIMING_LASWP );
217
#else
218
         HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv );
219
#endif
220
         HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans,
221
                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda );
222
#ifdef HPL_CALL_VSIPL
223
/*
224
 * Create the matrix subviews
225
 */
226
         Uv1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, jb, nn );
227
         Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
228

    
229
         vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
230
                      HPL_rone, Av1 );
231
/*
232
 * Destroy the matrix subviews
233
 */
234
         (void) vsip_mdestroy_d( Av1 );
235
         (void) vsip_mdestroy_d( Uv1 );
236
#else
237
         HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
238
                    jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone,
239
                    Mptr( Aptr, jb, 0, lda ), lda );
240
#endif
241
      }
242
#ifdef HPL_CALL_VSIPL
243
/*
244
 * Destroy the matrix subviews
245
 */
246
      (void) vsip_mdestroy_d( Lv1 );
247
/*
248
 * Release the blocks
249
 */
250
      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
251
      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
252
/*
253
 * Destroy the matrix views
254
 */
255
      (void) vsip_mdestroy_d( Lv0 );
256
      (void) vsip_mdestroy_d( Av0 );
257
#endif
258
   }
259
   else                        /* nprow > 1 ... */
260
   {
261
/*
262
 * Selection of the swapping algorithm - swap:broadcast U.
263
 */
264
      if( fswap == HPL_NO_SWP )
265
      { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; }
266

    
267
      if( (   fswap == HPL_SWAP01 ) ||
268
          ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) )
269
      { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); }
270
      else
271
      { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); }
272
/*
273
 * Compute redundantly row block of U and update trailing submatrix
274
 */
275
      nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 );
276
      Aptr = PANEL->A; L2ptr = PANEL->L2;  L1ptr = PANEL->L1;
277
      Uptr = PANEL->U; ldl2 = PANEL->ldl2;
278
      mp   = PANEL->mp - ( curr != 0 ? jb : 0 );
279
#ifdef HPL_CALL_VSIPL
280
/*
281
 * Admit the blocks
282
 */
283
      (void) vsip_blockadmit_d( PANEL->Ablock,  VSIP_TRUE );
284
      (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE );
285
      (void) vsip_blockadmit_d( PANEL->Ublock,  VSIP_TRUE );
286
/*
287
 * Create the matrix views
288
 */
289
      Av0 = vsip_mbind_d( PANEL->Ablock,  0, 1, lda,  lda,  PANEL->pmat->nq );
290
      Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2,              jb );
291
      Uv0 = vsip_mbind_d( PANEL->Ublock,  0, 1, LDU,  LDU,                n );
292
/*
293
 * Create the matrix subviews
294
 */
295
      Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb );
296
#endif
297
/*
298
 * Broadcast has not occured yet, spliting the computational part
299
 */
300
      while ( test == HPL_KEEP_TESTING )
301
      {
302
         nn = n - nq0; nn = Mmin( nb, nn );
303

    
304
         HPL_dtrsm( HplColumnMajor, HplLeft,  HplLower, HplNoTrans,
305
                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
306
         if( curr != 0 )
307
         {
308
#ifdef HPL_CALL_VSIPL
309
/*
310
 * Create the matrix subviews
311
 */
312
            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
313
            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
314

    
315
            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
316
                         HPL_rone, Av1 );
317
/*
318
 * Destroy the matrix subviews
319
 */
320
            (void) vsip_mdestroy_d( Av1 );
321
            (void) vsip_mdestroy_d( Uv1 );
322
#else
323
            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
324
                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
325
                       Mptr( Aptr, jb, 0, lda ), lda );
326
#endif
327
            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
328
         }
329
         else
330
         {
331
#ifdef HPL_CALL_VSIPL
332
/*
333
 * Create the matrix subviews
334
 */
335
            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
336
            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
337

    
338
            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
339
                         HPL_rone, Av1 );
340
/*
341
 * Destroy the matrix subviews
342
 */
343
            (void) vsip_mdestroy_d( Av1 );
344
            (void) vsip_mdestroy_d( Uv1 );
345
#else
346
            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
347
                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
348
                       Aptr, lda );
349
#endif
350
         }
351
         Uptr = Mptr( Uptr, 0, nn, LDU );
352
         Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn;
353

    
354
         (void) HPL_bcast( PBCST, &test ); 
355
      }
356
/*
357
 * The panel has been forwarded at that point, finish the update
358
 */
359
      if( ( nn = n - nq0 ) > 0 )
360
      {
361
         HPL_dtrsm( HplColumnMajor, HplLeft,  HplLower, HplNoTrans,
362
                    HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU );
363

    
364
         if( curr != 0 )
365
         {
366
#ifdef HPL_CALL_VSIPL
367
/*
368
 * Create the matrix subviews
369
 */
370
            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
371
            Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn );
372

    
373
            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
374
                         HPL_rone, Av1 );
375
/*
376
 * Destroy the matrix subviews
377
 */
378
            (void) vsip_mdestroy_d( Av1 );
379
            (void) vsip_mdestroy_d( Uv1 );
380
#else
381
            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
382
                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
383
                       Mptr( Aptr, jb, 0, lda ), lda );
384
#endif
385
            HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda );
386
         }
387
         else
388
         {
389
#ifdef HPL_CALL_VSIPL
390
/*
391
 * Create the matrix subviews
392
 */
393
            Uv1 = vsip_msubview_d( Uv0, 0,            nq0,           jb, nn );
394
            Av1 = vsip_msubview_d( Av0, PANEL->ii,    PANEL->jj+nq0, mp, nn );
395

    
396
            vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS,
397
                         HPL_rone, Av1 );
398
/*
399
 * Destroy the matrix subviews
400
 */
401
            (void) vsip_mdestroy_d( Av1 );
402
            (void) vsip_mdestroy_d( Uv1 );
403
#else
404
            HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn,
405
                       jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone,
406
                       Aptr, lda );
407
#endif
408
         }
409
      }
410
#ifdef HPL_CALL_VSIPL
411
/*
412
 * Destroy the matrix subviews
413
 */
414
      (void) vsip_mdestroy_d( Lv1 );
415
/*
416
 * Release the blocks
417
 */
418
      (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE );
419
      (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE );
420
      (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE );
421
/*
422
 * Destroy the matrix views
423
 */
424
      (void) vsip_mdestroy_d( Uv0 );
425
      (void) vsip_mdestroy_d( Lv0 );
426
      (void) vsip_mdestroy_d( Av0 );
427
#endif
428
   }
429

    
430
   PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n;
431
/*
432
 * return the outcome of the probe  (should always be  HPL_SUCCESS,  the
433
 * panel broadcast is enforced in that routine).
434
 */
435
   if( PBCST != NULL ) *IFLAG = test;
436
#ifdef HPL_DETAILED_TIMING
437
   HPL_ptimer( HPL_TIMING_UPDATE );
438
#endif
439
/*
440
 * End of HPL_pdupdateNN
441
 */
442
}