Statistiques
| Révision :

root / src / pgesv / HPL_pdlaswp01N.c @ 8

Historique | Voir | Annoter | Télécharger (8,82 ko)

1 1 equemene
/*
2 1 equemene
 * -- High Performance Computing Linpack Benchmark (HPL)
3 1 equemene
 *    HPL - 2.0 - September 10, 2008
4 1 equemene
 *    Antoine P. Petitet
5 1 equemene
 *    University of Tennessee, Knoxville
6 1 equemene
 *    Innovative Computing Laboratory
7 1 equemene
 *    (C) Copyright 2000-2008 All Rights Reserved
8 1 equemene
 *
9 1 equemene
 * -- Copyright notice and Licensing terms:
10 1 equemene
 *
11 1 equemene
 * Redistribution  and  use in  source and binary forms, with or without
12 1 equemene
 * modification, are  permitted provided  that the following  conditions
13 1 equemene
 * are met:
14 1 equemene
 *
15 1 equemene
 * 1. Redistributions  of  source  code  must retain the above copyright
16 1 equemene
 * notice, this list of conditions and the following disclaimer.
17 1 equemene
 *
18 1 equemene
 * 2. Redistributions in binary form must reproduce  the above copyright
19 1 equemene
 * notice, this list of conditions,  and the following disclaimer in the
20 1 equemene
 * documentation and/or other materials provided with the distribution.
21 1 equemene
 *
22 1 equemene
 * 3. All  advertising  materials  mentioning  features  or  use of this
23 1 equemene
 * software must display the following acknowledgement:
24 1 equemene
 * This  product  includes  software  developed  at  the  University  of
25 1 equemene
 * Tennessee, Knoxville, Innovative Computing Laboratory.
26 1 equemene
 *
27 1 equemene
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28 1 equemene
 * names  of  its  contributors  may  not  be used to endorse or promote
29 1 equemene
 * products  derived   from   this  software  without  specific  written
30 1 equemene
 * permission.
31 1 equemene
 *
32 1 equemene
 * -- Disclaimer:
33 1 equemene
 *
34 1 equemene
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 1 equemene
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36 1 equemene
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 1 equemene
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38 1 equemene
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39 1 equemene
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40 1 equemene
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 1 equemene
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42 1 equemene
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43 1 equemene
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 1 equemene
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 1 equemene
 * ---------------------------------------------------------------------
46 1 equemene
 */
47 1 equemene
/*
48 1 equemene
 * Include files
49 1 equemene
 */
50 1 equemene
#include "hpl.h"
51 1 equemene
52 1 equemene
#ifdef STDC_HEADERS
53 1 equemene
void HPL_pdlaswp01N
54 1 equemene
(
55 1 equemene
   HPL_T_panel *                    PBCST,
56 1 equemene
   int *                            IFLAG,
57 1 equemene
   HPL_T_panel *                    PANEL,
58 1 equemene
   const int                        NN
59 1 equemene
)
60 1 equemene
#else
61 1 equemene
void HPL_pdlaswp01N
62 1 equemene
( PBCST, IFLAG, PANEL, NN )
63 1 equemene
   HPL_T_panel *                    PBCST;
64 1 equemene
   int *                            IFLAG;
65 1 equemene
   HPL_T_panel *                    PANEL;
66 1 equemene
   const int                        NN;
67 1 equemene
#endif
68 1 equemene
{
69 1 equemene
/*
70 1 equemene
 * Purpose
71 1 equemene
 * =======
72 1 equemene
 *
73 1 equemene
 * HPL_pdlaswp01N applies the  NB  row interchanges to  NN columns of the
74 1 equemene
 * trailing submatrix and broadcast a column panel.
75 1 equemene
 *
76 1 equemene
 * A "Spread then roll" algorithm performs  the swap :: broadcast  of the
77 1 equemene
 * row panel U at once,  resulting in a minimal communication volume  and
78 1 equemene
 * a "very good"  use of the connectivity if available.  With  P  process
79 1 equemene
 * rows  and  assuming  bi-directional links,  the  running time  of this
80 1 equemene
 * function can be approximated by:
81 1 equemene
 *
82 1 equemene
 *    (log_2(P)+(P-1)) * lat +   K * NB * LocQ(N) / bdwth
83 1 equemene
 *
84 1 equemene
 * where  NB  is the number of rows of the row panel U,  N is the global
85 1 equemene
 * number of columns being updated,  lat and bdwth  are the latency  and
86 1 equemene
 * bandwidth  of  the  network  for  double  precision real words.  K is
87 1 equemene
 * a constant in (2,3] that depends on the achieved bandwidth  during  a
88 1 equemene
 * simultaneous  message exchange  between two processes.  An  empirical
89 1 equemene
 * optimistic value of K is typically 2.4.
90 1 equemene
 *
91 1 equemene
 * Arguments
92 1 equemene
 * =========
93 1 equemene
 *
94 1 equemene
 * PBCST   (local input/output)          HPL_T_panel *
95 1 equemene
 *         On entry,  PBCST  points to the data structure containing the
96 1 equemene
 *         panel (to be broadcast) information.
97 1 equemene
 *
98 1 equemene
 * IFLAG   (local input/output)          int *
99 1 equemene
 *         On entry, IFLAG  indicates  whether or not  the broadcast has
100 1 equemene
 *         already been completed.  If not,  probing will occur, and the
101 1 equemene
 *         outcome will be contained in IFLAG on exit.
102 1 equemene
 *
103 1 equemene
 * PANEL   (local input/output)          HPL_T_panel *
104 1 equemene
 *         On entry,  PANEL  points to the data structure containing the
105 1 equemene
 *         panel information.
106 1 equemene
 *
107 1 equemene
 * NN      (local input)                 const int
108 1 equemene
 *         On entry, NN specifies  the  local  number  of columns of the
109 1 equemene
 *         trailing  submatrix  to  be swapped and broadcast starting at
110 1 equemene
 *         the current position. NN must be at least zero.
111 1 equemene
 *
112 1 equemene
 * ---------------------------------------------------------------------
113 1 equemene
 */
114 1 equemene
/*
115 1 equemene
 * .. Local Variables ..
116 1 equemene
 */
117 1 equemene
   double                    * A, * U;
118 1 equemene
   int                       * ipID, * iplen, * ipmap, * ipmapm1,
119 1 equemene
                             * iwork, * lindxA = NULL, * lindxAU,
120 1 equemene
                             * permU;
121 1 equemene
   static int                equil=-1;
122 1 equemene
   int                       icurrow, * iflag, * ipA, * ipl, jb, k,
123 1 equemene
                             lda, myrow, n, nprow;
124 1 equemene
#define LDU                  jb
125 1 equemene
/* ..
126 1 equemene
 * .. Executable Statements ..
127 1 equemene
 */
128 1 equemene
   n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb;
129 1 equemene
/*
130 1 equemene
 * Quick return if there is nothing to do
131 1 equemene
 */
132 1 equemene
   if( ( n <= 0 ) || ( jb <= 0 ) ) return;
133 1 equemene
#ifdef HPL_DETAILED_TIMING
134 1 equemene
   HPL_ptimer( HPL_TIMING_LASWP );
135 1 equemene
#endif
136 1 equemene
/*
137 1 equemene
 * Decide whether equilibration should be performed or not
138 1 equemene
 */
139 1 equemene
   if( equil == -1 ) equil = PANEL->algo->equil;
140 1 equemene
/*
141 1 equemene
 * Retrieve parameters from the PANEL data structure
142 1 equemene
 */
143 1 equemene
   nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow;
144 1 equemene
   A     = PANEL->A;   U       = PANEL->U;     iflag  = PANEL->IWORK;
145 1 equemene
   lda   = PANEL->lda; icurrow = PANEL->prow;
146 1 equemene
/*
147 1 equemene
 * Compute ipID (if not already done for this panel). lindxA and lindxAU
148 1 equemene
 * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1
149 1 equemene
 * are of size nprow,  permU is of length jb, and  this function needs a
150 1 equemene
 * workspace of size max( 2 * jb (plindx1), nprow+1(equil)):
151 1 equemene
 * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1)
152 1 equemene
 * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1);
153 1 equemene
 */
154 1 equemene
   k = (int)((unsigned int)(jb) << 1);  ipl = iflag + 1; ipID = ipl + 1;
155 1 equemene
   ipA     = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1;
156 1 equemene
   lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1;
157 1 equemene
   ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb;
158 1 equemene
159 1 equemene
   if( *iflag == -1 )    /* no index arrays have been computed so far */
160 1 equemene
   {
161 1 equemene
      HPL_pipid(   PANEL,  ipl, ipID );
162 1 equemene
      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
163 1 equemene
                   ipmap, ipmapm1, permU, iwork );
164 1 equemene
      *iflag = 1;
165 1 equemene
   }
166 1 equemene
   else if( *iflag == 0 ) /* HPL_pdlaswp00N called before: reuse ipID */
167 1 equemene
   {
168 1 equemene
      HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen,
169 1 equemene
                   ipmap, ipmapm1, permU, iwork );
170 1 equemene
      *iflag = 1;
171 1 equemene
   }
172 1 equemene
   else if( ( *iflag == 1 ) && ( equil != 0 ) )
173 1 equemene
   {   /* HPL_pdlaswp01N was call before only re-compute IPLEN, IPMAP */
174 1 equemene
      HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 );
175 1 equemene
      *iflag = 1;
176 1 equemene
   }
177 1 equemene
/*
178 1 equemene
 * Copy into U the rows to be spread (local to icurrow)
179 1 equemene
 */
180 1 equemene
   if( myrow == icurrow )
181 1 equemene
   { HPL_dlaswp01N( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); }
182 1 equemene
/*
183 1 equemene
 * Spread U - optionally probe for column panel
184 1 equemene
 */
185 1 equemene
   HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen,
186 1 equemene
                ipmap, ipmapm1 );
187 1 equemene
/*
188 1 equemene
 * Local exchange (everywhere but in process row icurrow)
189 1 equemene
 */
190 1 equemene
   if( myrow != icurrow )
191 1 equemene
   {
192 1 equemene
      k = ipmapm1[myrow];
193 1 equemene
      HPL_dlaswp06N( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, iplen[k],
194 1 equemene
                     0, LDU ), LDU, lindxA );
195 1 equemene
   }
196 1 equemene
/*
197 1 equemene
 * Equilibration
198 1 equemene
 */
199 1 equemene
   if( equil != 0 )
200 1 equemene
      HPL_equil( PBCST, IFLAG, PANEL, HplNoTrans, n, U, LDU, iplen,
201 1 equemene
                 ipmap, ipmapm1, iwork );
202 1 equemene
/*
203 1 equemene
 * Rolling phase
204 1 equemene
 */
205 1 equemene
   HPL_rollN( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 );
206 1 equemene
/*
207 1 equemene
 * Permute U in every process row
208 1 equemene
 */
209 1 equemene
   HPL_dlaswp00N( jb, n, U, LDU, permU );
210 1 equemene
211 1 equemene
#ifdef HPL_DETAILED_TIMING
212 1 equemene
   HPL_ptimer( HPL_TIMING_LASWP );
213 1 equemene
#endif
214 1 equemene
/*
215 1 equemene
 * End of HPL_pdlaswp01N
216 1 equemene
 */
217 1 equemene
}