Statistiques
| Révision :

root / src / pgesv / HPL_rollT.c @ 8

Historique | Voir | Annoter | Télécharger (10,03 ko)

1 1 equemene
/*
2 1 equemene
 * -- High Performance Computing Linpack Benchmark (HPL)
3 1 equemene
 *    HPL - 2.0 - September 10, 2008
4 1 equemene
 *    Antoine P. Petitet
5 1 equemene
 *    University of Tennessee, Knoxville
6 1 equemene
 *    Innovative Computing Laboratory
7 1 equemene
 *    (C) Copyright 2000-2008 All Rights Reserved
8 1 equemene
 *
9 1 equemene
 * -- Copyright notice and Licensing terms:
10 1 equemene
 *
11 1 equemene
 * Redistribution  and  use in  source and binary forms, with or without
12 1 equemene
 * modification, are  permitted provided  that the following  conditions
13 1 equemene
 * are met:
14 1 equemene
 *
15 1 equemene
 * 1. Redistributions  of  source  code  must retain the above copyright
16 1 equemene
 * notice, this list of conditions and the following disclaimer.
17 1 equemene
 *
18 1 equemene
 * 2. Redistributions in binary form must reproduce  the above copyright
19 1 equemene
 * notice, this list of conditions,  and the following disclaimer in the
20 1 equemene
 * documentation and/or other materials provided with the distribution.
21 1 equemene
 *
22 1 equemene
 * 3. All  advertising  materials  mentioning  features  or  use of this
23 1 equemene
 * software must display the following acknowledgement:
24 1 equemene
 * This  product  includes  software  developed  at  the  University  of
25 1 equemene
 * Tennessee, Knoxville, Innovative Computing Laboratory.
26 1 equemene
 *
27 1 equemene
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28 1 equemene
 * names  of  its  contributors  may  not  be used to endorse or promote
29 1 equemene
 * products  derived   from   this  software  without  specific  written
30 1 equemene
 * permission.
31 1 equemene
 *
32 1 equemene
 * -- Disclaimer:
33 1 equemene
 *
34 1 equemene
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 1 equemene
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36 1 equemene
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 1 equemene
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38 1 equemene
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39 1 equemene
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40 1 equemene
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 1 equemene
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42 1 equemene
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43 1 equemene
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 1 equemene
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 1 equemene
 * ---------------------------------------------------------------------
46 1 equemene
 */
47 1 equemene
/*
48 1 equemene
 * Include files
49 1 equemene
 */
50 1 equemene
#include "hpl.h"
51 1 equemene
52 1 equemene
#define   I_SEND    0
53 1 equemene
#define   I_RECV    1
54 1 equemene
55 1 equemene
#ifdef STDC_HEADERS
56 1 equemene
void HPL_rollT
57 1 equemene
(
58 1 equemene
   HPL_T_panel *                    PBCST,
59 1 equemene
   int *                            IFLAG,
60 1 equemene
   HPL_T_panel *                    PANEL,
61 1 equemene
   const int                        N,
62 1 equemene
   double *                         U,
63 1 equemene
   const int                        LDU,
64 1 equemene
   const int *                      IPLEN,
65 1 equemene
   const int *                      IPMAP,
66 1 equemene
   const int *                      IPMAPM1
67 1 equemene
)
68 1 equemene
#else
69 1 equemene
void HPL_rollT
70 1 equemene
( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 )
71 1 equemene
   HPL_T_panel *                    PBCST;
72 1 equemene
   int *                            IFLAG;
73 1 equemene
   HPL_T_panel *                    PANEL;
74 1 equemene
   const int                        N;
75 1 equemene
   double *                         U;
76 1 equemene
   const int                        LDU;
77 1 equemene
   const int *                      IPLEN;
78 1 equemene
   const int *                      IPMAP;
79 1 equemene
   const int *                      IPMAPM1;
80 1 equemene
#endif
81 1 equemene
{
82 1 equemene
/*
83 1 equemene
 * Purpose
84 1 equemene
 * =======
85 1 equemene
 *
86 1 equemene
 * HPL_rollT rolls the local arrays containing the local pieces of U, so
87 1 equemene
 * that on exit to this function  U  is replicated in every process row.
88 1 equemene
 * In addition, this function probe for the presence of the column panel
89 1 equemene
 * and forwards it when available.
90 1 equemene
 *
91 1 equemene
 * Arguments
92 1 equemene
 * =========
93 1 equemene
 *
94 1 equemene
 * PBCST   (local input/output)          HPL_T_panel *
95 1 equemene
 *         On entry,  PBCST  points to the data structure containing the
96 1 equemene
 *         panel (to be broadcast) information.
97 1 equemene
 *
98 1 equemene
 * IFLAG   (local input/output)          int *
99 1 equemene
 *         On entry, IFLAG  indicates  whether or not  the broadcast has
100 1 equemene
 *         already been completed.  If not,  probing will occur, and the
101 1 equemene
 *         outcome will be contained in IFLAG on exit.
102 1 equemene
 *
103 1 equemene
 * PANEL   (local input/output)          HPL_T_panel *
104 1 equemene
 *         On entry,  PANEL  points to the data structure containing the
105 1 equemene
 *         panel (to be rolled) information.
106 1 equemene
 *
107 1 equemene
 * N       (local input)                 const int
108 1 equemene
 *         On entry, N specifies the local number of rows of  U.  N must
109 1 equemene
 *         be at least zero.
110 1 equemene
 *
111 1 equemene
 * U       (local input/output)          double *
112 1 equemene
 *         On entry,  U  is an array of dimension (LDU,*) containing the
113 1 equemene
 *         local pieces of U in each process row.
114 1 equemene
 *
115 1 equemene
 * LDU     (local input)                 const int
116 1 equemene
 *         On entry, LDU specifies the local leading dimension of U. LDU
117 1 equemene
 *         should be at least  MAX(1,N).
118 1 equemene
 *
119 1 equemene
 * IPLEN   (global input)                const int *
120 1 equemene
 *         On entry, IPLEN is an array of dimension NPROW+1.  This array
121 1 equemene
 *         is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
122 1 equemene
 *         in each process row.
123 1 equemene
 *
124 1 equemene
 * IPMAP   (global input)                const int *
125 1 equemene
 *         On entry, IMAP  is an array of dimension  NPROW.  This  array
126 1 equemene
 *         contains  the  logarithmic mapping of the processes. In other
127 1 equemene
 *         words,  IMAP[myrow]  is the absolute coordinate of the sorted
128 1 equemene
 *         process.
129 1 equemene
 *
130 1 equemene
 * IPMAPM1 (global input)                const int *
131 1 equemene
 *         On entry,  IMAPM1  is an array of dimension NPROW. This array
132 1 equemene
 *         contains  the inverse of the logarithmic mapping contained in
133 1 equemene
 *         IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.
134 1 equemene
 *
135 1 equemene
 * ---------------------------------------------------------------------
136 1 equemene
 */
137 1 equemene
/*
138 1 equemene
 * .. Local Variables ..
139 1 equemene
 */
140 1 equemene
#if 0
141 1 equemene
   MPI_Datatype               type[2];
142 1 equemene
#endif
143 1 equemene
   MPI_Status                 status;
144 1 equemene
   MPI_Request                request;
145 1 equemene
   MPI_Comm                   comm;
146 1 equemene
   int                        Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS,
147 1 equemene
                              ierr=MPI_SUCCESS, il, k, l, lengthR,
148 1 equemene
                              lengthS, mydist, myrow, next, npm1, nprow,
149 1 equemene
                              partner, prev;
150 1 equemene
/* ..
151 1 equemene
 * .. Executable Statements ..
152 1 equemene
 */
153 1 equemene
   if( N <= 0 ) return;
154 1 equemene
155 1 equemene
   npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow;
156 1 equemene
   comm = PANEL->grid->col_comm;
157 1 equemene
/*
158 1 equemene
 * Rolling phase
159 1 equemene
 */
160 1 equemene
   mydist = IPMAPM1[myrow];
161 1 equemene
   prev   = IPMAP[MModSub1( mydist, nprow )];
162 1 equemene
   next   = IPMAP[MModAdd1( mydist, nprow )];
163 1 equemene
164 1 equemene
   for( k = 0; k < npm1; k++ )
165 1 equemene
   {
166 1 equemene
      l = (int)( (unsigned int)(k) >> 1 );
167 1 equemene
168 1 equemene
      if( ( ( mydist + k ) & 1 ) != 0 )
169 1 equemene
      {
170 1 equemene
         il      = MModAdd( mydist, l,   nprow );
171 1 equemene
         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] );
172 1 equemene
         il    = MModSub( mydist, l+1, nprow );
173 1 equemene
         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev;
174 1 equemene
      }
175 1 equemene
      else
176 1 equemene
      {
177 1 equemene
         il    = MModSub( mydist, l,   nprow );
178 1 equemene
         lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] );
179 1 equemene
         il    = MModAdd( mydist, l+1, nprow );
180 1 equemene
         lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next;
181 1 equemene
      }
182 1 equemene
183 1 equemene
      if( lengthR > 0 )
184 1 equemene
      {
185 1 equemene
#if 0
186 1 equemene
         if( ierr == MPI_SUCCESS )
187 1 equemene
         {
188 1 equemene
            if( LDU == N )
189 1 equemene
               ierr = MPI_Type_contiguous( lengthR * LDU, MPI_DOUBLE,
190 1 equemene
                                           &type[I_RECV] );
191 1 equemene
            else
192 1 equemene
               ierr = MPI_Type_vector( lengthR, N, LDU, MPI_DOUBLE,
193 1 equemene
                                       &type[I_RECV] );
194 1 equemene
         }
195 1 equemene
         if( ierr == MPI_SUCCESS )
196 1 equemene
            ierr =   MPI_Type_commit( &type[I_RECV] );
197 1 equemene
         if( ierr == MPI_SUCCESS )
198 1 equemene
            ierr =   MPI_Irecv( Mptr( U, 0, ibufR, LDU ), 1, type[I_RECV],
199 1 equemene
                                partner, Cmsgid, comm, &request );
200 1 equemene
#else
201 1 equemene
/*
202 1 equemene
 * In our case, LDU is N - Do not use the MPI datatype.
203 1 equemene
 */
204 1 equemene
         if( ierr == MPI_SUCCESS )
205 1 equemene
            ierr =   MPI_Irecv( Mptr( U, 0, ibufR, LDU ), lengthR*LDU,
206 1 equemene
                                MPI_DOUBLE, partner, Cmsgid, comm, &request );
207 1 equemene
#endif
208 1 equemene
      }
209 1 equemene
210 1 equemene
      if( lengthS > 0 )
211 1 equemene
      {
212 1 equemene
#if 0
213 1 equemene
         if( ierr == MPI_SUCCESS )
214 1 equemene
         {
215 1 equemene
            if( LDU == N )
216 1 equemene
               ierr =   MPI_Type_contiguous( lengthS*LDU, MPI_DOUBLE,
217 1 equemene
                                             &type[I_SEND] );
218 1 equemene
            else
219 1 equemene
               ierr =   MPI_Type_vector( lengthS, N, LDU, MPI_DOUBLE,
220 1 equemene
                                         &type[I_SEND] );
221 1 equemene
         }
222 1 equemene
         if( ierr == MPI_SUCCESS )
223 1 equemene
            ierr =   MPI_Type_commit( &type[I_SEND] );
224 1 equemene
         if( ierr == MPI_SUCCESS )
225 1 equemene
            ierr =   MPI_Send( Mptr( U, 0, ibufS, LDU ), 1, type[I_SEND],
226 1 equemene
                               partner, Cmsgid, comm );
227 1 equemene
         if( ierr == MPI_SUCCESS )
228 1 equemene
            ierr =   MPI_Type_free( &type[I_SEND] );
229 1 equemene
#else
230 1 equemene
/*
231 1 equemene
 * In our case, LDU is N - Do not use the MPI datatype.
232 1 equemene
 */
233 1 equemene
         if( ierr == MPI_SUCCESS )
234 1 equemene
            ierr =   MPI_Send( Mptr( U, 0, ibufS, LDU ), lengthS*LDU,
235 1 equemene
                               MPI_DOUBLE, partner, Cmsgid, comm );
236 1 equemene
#endif
237 1 equemene
      }
238 1 equemene
239 1 equemene
      if( lengthR > 0 )
240 1 equemene
      {
241 1 equemene
         if( ierr == MPI_SUCCESS )
242 1 equemene
            ierr =   MPI_Wait( &request, &status );
243 1 equemene
#if 0
244 1 equemene
         if( ierr == MPI_SUCCESS )
245 1 equemene
            ierr =   MPI_Type_free( &type[I_RECV] );
246 1 equemene
#endif
247 1 equemene
      }
248 1 equemene
/*
249 1 equemene
 * Probe for column panel - forward it when available
250 1 equemene
 */
251 1 equemene
      if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG );
252 1 equemene
   }
253 1 equemene
254 1 equemene
   if( ierr != MPI_SUCCESS )
255 1 equemene
   { HPL_pabort( __LINE__, "HPL_rollT", "MPI call failed" ); }
256 1 equemene
/*
257 1 equemene
 * End of HPL_rollT
258 1 equemene
 */
259 1 equemene
}