Statistiques
| Révision :

root / src / pgesv / HPL_pdgesvK1.c @ 9

Historique | Voir | Annoter | Télécharger (7,94 ko)

1 1 equemene
/*
2 1 equemene
 * -- High Performance Computing Linpack Benchmark (HPL)
3 1 equemene
 *    HPL - 2.0 - September 10, 2008
4 1 equemene
 *    Antoine P. Petitet
5 1 equemene
 *    University of Tennessee, Knoxville
6 1 equemene
 *    Innovative Computing Laboratory
7 1 equemene
 *    (C) Copyright 2000-2008 All Rights Reserved
8 1 equemene
 *
9 1 equemene
 * -- Copyright notice and Licensing terms:
10 1 equemene
 *
11 1 equemene
 * Redistribution  and  use in  source and binary forms, with or without
12 1 equemene
 * modification, are  permitted provided  that the following  conditions
13 1 equemene
 * are met:
14 1 equemene
 *
15 1 equemene
 * 1. Redistributions  of  source  code  must retain the above copyright
16 1 equemene
 * notice, this list of conditions and the following disclaimer.
17 1 equemene
 *
18 1 equemene
 * 2. Redistributions in binary form must reproduce  the above copyright
19 1 equemene
 * notice, this list of conditions,  and the following disclaimer in the
20 1 equemene
 * documentation and/or other materials provided with the distribution.
21 1 equemene
 *
22 1 equemene
 * 3. All  advertising  materials  mentioning  features  or  use of this
23 1 equemene
 * software must display the following acknowledgement:
24 1 equemene
 * This  product  includes  software  developed  at  the  University  of
25 1 equemene
 * Tennessee, Knoxville, Innovative Computing Laboratory.
26 1 equemene
 *
27 1 equemene
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28 1 equemene
 * names  of  its  contributors  may  not  be used to endorse or promote
29 1 equemene
 * products  derived   from   this  software  without  specific  written
30 1 equemene
 * permission.
31 1 equemene
 *
32 1 equemene
 * -- Disclaimer:
33 1 equemene
 *
34 1 equemene
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 1 equemene
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36 1 equemene
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 1 equemene
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38 1 equemene
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39 1 equemene
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40 1 equemene
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 1 equemene
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42 1 equemene
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43 1 equemene
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 1 equemene
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 1 equemene
 * ---------------------------------------------------------------------
46 1 equemene
 */
47 1 equemene
/*
48 1 equemene
 * Include files
49 1 equemene
 */
50 1 equemene
#include "hpl.h"
51 1 equemene
52 1 equemene
#ifdef STDC_HEADERS
53 1 equemene
void HPL_pdgesvK1
54 1 equemene
(
55 1 equemene
   HPL_T_grid *                     GRID,
56 1 equemene
   HPL_T_palg *                     ALGO,
57 1 equemene
   HPL_T_pmat *                     A
58 1 equemene
)
59 1 equemene
#else
60 1 equemene
void HPL_pdgesvK1
61 1 equemene
( GRID, ALGO, A )
62 1 equemene
   HPL_T_grid *                     GRID;
63 1 equemene
   HPL_T_palg *                     ALGO;
64 1 equemene
   HPL_T_pmat *                     A;
65 1 equemene
#endif
66 1 equemene
{
67 1 equemene
/*
68 1 equemene
 * Purpose
69 1 equemene
 * =======
70 1 equemene
 *
71 1 equemene
 * HPL_pdgesvK1 factors a N+1-by-N matrix using LU factorization with row
72 1 equemene
 * partial pivoting.  The main algorithm  is the "right looking" variant
73 1 equemene
 * with look-ahead.  The  lower  triangular factor is left unpivoted and
74 1 equemene
 * the pivots are not returned. The right hand side is the N+1 column of
75 1 equemene
 * the coefficient matrix.
76 1 equemene
 *
77 1 equemene
 * Arguments
78 1 equemene
 * =========
79 1 equemene
 *
80 1 equemene
 * GRID    (local input)                 HPL_T_grid *
81 1 equemene
 *         On entry,  GRID  points  to the data structure containing the
82 1 equemene
 *         process grid information.
83 1 equemene
 *
84 1 equemene
 * ALGO    (global input)                HPL_T_palg *
85 1 equemene
 *         On entry,  ALGO  points to  the data structure containing the
86 1 equemene
 *         algorithmic parameters.
87 1 equemene
 *
88 1 equemene
 * A       (local input/output)          HPL_T_pmat *
89 1 equemene
 *         On entry, A points to the data structure containing the local
90 1 equemene
 *         array information.
91 1 equemene
 *
92 1 equemene
 * ---------------------------------------------------------------------
93 1 equemene
 */
94 1 equemene
/*
95 1 equemene
 * .. Local Variables ..
96 1 equemene
 */
97 1 equemene
   HPL_T_panel                * * panel = NULL;
98 1 equemene
   HPL_T_UPD_FUN              HPL_pdupdate;
99 1 equemene
   int                        N, depth, icurcol=0, j, jb, jj=0, jstart,
100 1 equemene
                              k, mycol, n, nb, nn, npcol, nq,
101 1 equemene
                              tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING;
102 1 equemene
/* ..
103 1 equemene
 * .. Executable Statements ..
104 1 equemene
 */
105 1 equemene
   mycol = GRID->mycol; npcol        = GRID->npcol;
106 1 equemene
   depth = ALGO->depth; HPL_pdupdate = ALGO->upfun;
107 1 equemene
   N     = A->n;        nb           = A->nb;
108 1 equemene
109 1 equemene
   if( N <= 0 ) return;
110 1 equemene
/*
111 1 equemene
 * Allocate a panel list of length depth + 1 (depth >= 1)
112 1 equemene
 */
113 1 equemene
   panel = (HPL_T_panel **)malloc( (size_t)(depth+1)*sizeof( HPL_T_panel *) );
114 1 equemene
   if( panel == NULL )
115 1 equemene
   { HPL_pabort( __LINE__, "HPL_pdgesvK1", "Memory allocation failed" ); }
116 1 equemene
/*
117 1 equemene
 * Create and initialize the first depth panels
118 1 equemene
 */
119 1 equemene
   nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0;
120 1 equemene
121 1 equemene
   for( k = 0; k < depth; k++ )
122 1 equemene
   {
123 1 equemene
      jb = Mmin( nn, nb );
124 1 equemene
      HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart,
125 1 equemene
                       tag, &panel[k] );
126 1 equemene
      nn -= jb; jstart += jb;
127 1 equemene
      if( mycol == icurcol ) { jj += jb; nq -= jb; }
128 1 equemene
      icurcol = MModAdd1( icurcol, npcol );
129 1 equemene
      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
130 1 equemene
   }
131 1 equemene
/*
132 1 equemene
 * Initialize the lookahead - Factor jstart columns: panel[0..depth-1]
133 1 equemene
 */
134 1 equemene
   for( k = 0, j = 0; k < depth; k++ )
135 1 equemene
   {
136 1 equemene
      jb = jstart - j; jb = Mmin( jb, nb ); j += jb;
137 1 equemene
/*
138 1 equemene
 * Factor and broadcast k-th panel - use long topology for those
139 1 equemene
 */
140 1 equemene
      HPL_pdfact(         panel[k] );
141 1 equemene
      (void) HPL_binit(   panel[k] );
142 1 equemene
      do
143 1 equemene
      { (void) HPL_bcast( panel[k], &test ); }
144 1 equemene
      while( test != HPL_SUCCESS );
145 1 equemene
      (void) HPL_bwait(   panel[k] );
146 1 equemene
/*
147 1 equemene
 * Partial update of the depth-1-k panels in front of me
148 1 equemene
 */
149 1 equemene
      if( k < depth - 1 )
150 1 equemene
      {
151 1 equemene
         nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol );
152 1 equemene
         HPL_pdupdate( NULL, NULL, panel[k], nn );
153 1 equemene
      }
154 1 equemene
   }
155 1 equemene
/*
156 1 equemene
 * Main loop over the remaining columns of A
157 1 equemene
 */
158 1 equemene
   for( j = jstart; j < N; j += nb )
159 1 equemene
   {
160 1 equemene
      n = N - j; jb = Mmin( n, nb );
161 1 equemene
/*
162 1 equemene
 * Allocate current panel resources - Finish latest update - Factor and
163 1 equemene
 * broadcast current panel
164 1 equemene
 */
165 1 equemene
      HPL_pdpanel_new( GRID, ALGO, n, n+1, jb, A, j, j, tag, &panel[depth] );
166 1 equemene
167 1 equemene
      if( mycol == icurcol )
168 1 equemene
      {
169 1 equemene
         nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol );
170 1 equemene
         for( k = 0; k < depth; k++ )   /* partial updates 0..depth-1 */
171 1 equemene
            HPL_pdupdate( NULL, NULL, panel[k], nn );
172 1 equemene
         HPL_pdfact(    panel[depth] );       /* factor current panel */
173 1 equemene
      }
174 1 equemene
      else { nn = 0; }
175 1 equemene
          /* Finish the latest update and broadcast the current panel */
176 1 equemene
      (void) HPL_binit( panel[depth] );
177 1 equemene
      HPL_pdupdate(     panel[depth], &test, panel[0], nq-nn );
178 1 equemene
      (void) HPL_bwait( panel[depth] );
179 1 equemene
/*
180 1 equemene
 * Release latest panel resources - circular  of the panel pointers
181 1 equemene
 * Go to the next process row and column -  update  the message ids  for
182 1 equemene
 * broadcast
183 1 equemene
 */
184 1 equemene
      (void) HPL_pdpanel_disp( &panel[0] );
185 1 equemene
      for( k = 0; k < depth; k++ ) panel[k] = panel[k+1];
186 1 equemene
187 1 equemene
      if( mycol == icurcol ) { jj += jb; nq -= jb; }
188 1 equemene
      icurcol = MModAdd1( icurcol, npcol );
189 1 equemene
      tag     = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT );
190 1 equemene
   }
191 1 equemene
/*
192 1 equemene
 * Clean-up: Finish updates - release panels and panel list
193 1 equemene
 */
194 1 equemene
   nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol );
195 1 equemene
   for( k = 0; k < depth; k++ )
196 1 equemene
   {
197 1 equemene
      HPL_pdupdate( NULL, NULL, panel[k], nn );
198 1 equemene
      (void) HPL_pdpanel_disp( &panel[k] );
199 1 equemene
   }
200 1 equemene
201 1 equemene
   if( panel ) free( panel );
202 1 equemene
/*
203 1 equemene
 * End of HPL_pdgesvK1
204 1 equemene
 */
205 1 equemene
}