Statistiques
| Révision :

root / src / panel / HPL_pdpanel_init.c @ 9

Historique | Voir | Annoter | Télécharger (14,53 ko)

1 1 equemene
/*
2 1 equemene
 * -- High Performance Computing Linpack Benchmark (HPL)
3 1 equemene
 *    HPL - 2.0 - September 10, 2008
4 1 equemene
 *    Antoine P. Petitet
5 1 equemene
 *    University of Tennessee, Knoxville
6 1 equemene
 *    Innovative Computing Laboratory
7 1 equemene
 *    (C) Copyright 2000-2008 All Rights Reserved
8 1 equemene
 *
9 1 equemene
 * -- Copyright notice and Licensing terms:
10 1 equemene
 *
11 1 equemene
 * Redistribution  and  use in  source and binary forms, with or without
12 1 equemene
 * modification, are  permitted provided  that the following  conditions
13 1 equemene
 * are met:
14 1 equemene
 *
15 1 equemene
 * 1. Redistributions  of  source  code  must retain the above copyright
16 1 equemene
 * notice, this list of conditions and the following disclaimer.
17 1 equemene
 *
18 1 equemene
 * 2. Redistributions in binary form must reproduce  the above copyright
19 1 equemene
 * notice, this list of conditions,  and the following disclaimer in the
20 1 equemene
 * documentation and/or other materials provided with the distribution.
21 1 equemene
 *
22 1 equemene
 * 3. All  advertising  materials  mentioning  features  or  use of this
23 1 equemene
 * software must display the following acknowledgement:
24 1 equemene
 * This  product  includes  software  developed  at  the  University  of
25 1 equemene
 * Tennessee, Knoxville, Innovative Computing Laboratory.
26 1 equemene
 *
27 1 equemene
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28 1 equemene
 * names  of  its  contributors  may  not  be used to endorse or promote
29 1 equemene
 * products  derived   from   this  software  without  specific  written
30 1 equemene
 * permission.
31 1 equemene
 *
32 1 equemene
 * -- Disclaimer:
33 1 equemene
 *
34 1 equemene
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 1 equemene
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36 1 equemene
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 1 equemene
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38 1 equemene
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39 1 equemene
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40 1 equemene
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 1 equemene
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42 1 equemene
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43 1 equemene
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 1 equemene
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 1 equemene
 * ---------------------------------------------------------------------
46 1 equemene
 */
47 1 equemene
/*
48 1 equemene
 * Include files
49 1 equemene
 */
50 1 equemene
#include "hpl.h"
51 1 equemene
52 1 equemene
#ifdef HPL_NO_MPI_DATATYPE  /* The user insists to not use MPI types */
53 1 equemene
#ifndef HPL_COPY_L       /* and also want to avoid the copy of L ... */
54 1 equemene
#define HPL_COPY_L   /* well, sorry, can not do that: force the copy */
55 1 equemene
#endif
56 1 equemene
#endif
57 1 equemene
58 1 equemene
#ifdef STDC_HEADERS
59 1 equemene
void HPL_pdpanel_init
60 1 equemene
(
61 1 equemene
   HPL_T_grid *                     GRID,
62 1 equemene
   HPL_T_palg *                     ALGO,
63 1 equemene
   const int                        M,
64 1 equemene
   const int                        N,
65 1 equemene
   const int                        JB,
66 1 equemene
   HPL_T_pmat *                     A,
67 1 equemene
   const int                        IA,
68 1 equemene
   const int                        JA,
69 1 equemene
   const int                        TAG,
70 1 equemene
   HPL_T_panel *                    PANEL
71 1 equemene
)
72 1 equemene
#else
73 1 equemene
void HPL_pdpanel_init
74 1 equemene
( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL )
75 1 equemene
   HPL_T_grid *                     GRID;
76 1 equemene
   HPL_T_palg *                     ALGO;
77 1 equemene
   const int                        M;
78 1 equemene
   const int                        N;
79 1 equemene
   const int                        JB;
80 1 equemene
   HPL_T_pmat *                     A;
81 1 equemene
   const int                        IA;
82 1 equemene
   const int                        JA;
83 1 equemene
   const int                        TAG;
84 1 equemene
   HPL_T_panel *                    PANEL;
85 1 equemene
#endif
86 1 equemene
{
87 1 equemene
/*
88 1 equemene
 * Purpose
89 1 equemene
 * =======
90 1 equemene
 *
91 1 equemene
 * HPL_pdpanel_init initializes a panel data structure.
92 1 equemene
 *
93 1 equemene
 *
94 1 equemene
 * Arguments
95 1 equemene
 * =========
96 1 equemene
 *
97 1 equemene
 * GRID    (local input)                 HPL_T_grid *
98 1 equemene
 *         On entry,  GRID  points  to the data structure containing the
99 1 equemene
 *         process grid information.
100 1 equemene
 *
101 1 equemene
 * ALGO    (global input)                HPL_T_palg *
102 1 equemene
 *         On entry,  ALGO  points to  the data structure containing the
103 1 equemene
 *         algorithmic parameters.
104 1 equemene
 *
105 1 equemene
 * M       (local input)                 const int
106 1 equemene
 *         On entry, M specifies the global number of rows of the panel.
107 1 equemene
 *         M must be at least zero.
108 1 equemene
 *
109 1 equemene
 * N       (local input)                 const int
110 1 equemene
 *         On entry,  N  specifies  the  global number of columns of the
111 1 equemene
 *         panel and trailing submatrix. N must be at least zero.
112 1 equemene
 *
113 1 equemene
 * JB      (global input)                const int
114 1 equemene
 *         On entry, JB specifies is the number of columns of the panel.
115 1 equemene
 *         JB must be at least zero.
116 1 equemene
 *
117 1 equemene
 * A       (local input/output)          HPL_T_pmat *
118 1 equemene
 *         On entry, A points to the data structure containing the local
119 1 equemene
 *         array information.
120 1 equemene
 *
121 1 equemene
 * IA      (global input)                const int
122 1 equemene
 *         On entry,  IA  is  the global row index identifying the panel
123 1 equemene
 *         and trailing submatrix. IA must be at least zero.
124 1 equemene
 *
125 1 equemene
 * JA      (global input)                const int
126 1 equemene
 *         On entry, JA is the global column index identifying the panel
127 1 equemene
 *         and trailing submatrix. JA must be at least zero.
128 1 equemene
 *
129 1 equemene
 * TAG     (global input)                const int
130 1 equemene
 *         On entry, TAG is the row broadcast message id.
131 1 equemene
 *
132 1 equemene
 * PANEL   (local input/output)          HPL_T_panel *
133 1 equemene
 *         On entry,  PANEL  points to the data structure containing the
134 1 equemene
 *         panel information.
135 1 equemene
 *
136 1 equemene
 * ---------------------------------------------------------------------
137 1 equemene
 */
138 1 equemene
/*
139 1 equemene
 * .. Local Variables ..
140 1 equemene
 */
141 1 equemene
   size_t                     dalign;
142 1 equemene
   int                        icurcol, icurrow, ii, itmp1, jj, lwork,
143 1 equemene
                              ml2, mp, mycol, myrow, nb, npcol, nprow,
144 1 equemene
                              nq, nu;
145 1 equemene
/* ..
146 1 equemene
 * .. Executable Statements ..
147 1 equemene
 */
148 1 equemene
   PANEL->grid    = GRID;                  /* ptr to the process grid */
149 1 equemene
   PANEL->algo    = ALGO;               /* ptr to the algo parameters */
150 1 equemene
   PANEL->pmat    = A;                 /* ptr to the local array info */
151 1 equemene
152 1 equemene
   myrow = GRID->myrow; mycol = GRID->mycol;
153 1 equemene
   nprow = GRID->nprow; npcol = GRID->npcol; nb = A->nb;
154 1 equemene
155 1 equemene
   HPL_infog2l( IA, JA, nb, nb, nb, nb, 0, 0, myrow, mycol,
156 1 equemene
                nprow, npcol, &ii, &jj, &icurrow, &icurcol );
157 1 equemene
   mp = HPL_numrocI( M, IA, nb, nb, myrow, 0, nprow );
158 1 equemene
   nq = HPL_numrocI( N, JA, nb, nb, mycol, 0, npcol );
159 1 equemene
                                         /* ptr to trailing part of A */
160 1 equemene
   PANEL->A       = Mptr( (double *)(A->A), ii, jj, A->ld );
161 1 equemene
/*
162 1 equemene
 * Workspace pointers are initialized to NULL.
163 1 equemene
 */
164 1 equemene
   PANEL->WORK    = NULL; PANEL->L2      = NULL; PANEL->L1      = NULL;
165 1 equemene
   PANEL->DPIV    = NULL; PANEL->DINFO   = NULL; PANEL->U       = NULL;
166 1 equemene
   PANEL->IWORK   = NULL;
167 1 equemene
/*
168 1 equemene
 * Local lengths, indexes process coordinates
169 1 equemene
 */
170 1 equemene
   PANEL->nb      = nb;               /* distribution blocking factor */
171 1 equemene
   PANEL->jb      = JB;                                /* panel width */
172 1 equemene
   PANEL->m       = M;      /* global # of rows of trailing part of A */
173 1 equemene
   PANEL->n       = N;      /* global # of cols of trailing part of A */
174 1 equemene
   PANEL->ia      = IA;     /* global row index of trailing part of A */
175 1 equemene
   PANEL->ja      = JA;     /* global col index of trailing part of A */
176 1 equemene
   PANEL->mp      = mp;      /* local # of rows of trailing part of A */
177 1 equemene
   PANEL->nq      = nq;      /* local # of cols of trailing part of A */
178 1 equemene
   PANEL->ii      = ii;      /* local row index of trailing part of A */
179 1 equemene
   PANEL->jj      = jj;      /* local col index of trailing part of A */
180 1 equemene
   PANEL->lda     = A->ld;            /* local leading dim of array A */
181 1 equemene
   PANEL->prow    = icurrow; /* proc row owning 1st row of trailing A */
182 1 equemene
   PANEL->pcol    = icurcol; /* proc col owning 1st col of trailing A */
183 1 equemene
   PANEL->msgid   = TAG;     /* message id to be used for panel bcast */
184 1 equemene
/*
185 1 equemene
 * Initialize  ldl2 and len to temporary dummy values and Update tag for
186 1 equemene
 * next panel
187 1 equemene
 */
188 1 equemene
   PANEL->ldl2    = 0;               /* local leading dim of array L2 */
189 1 equemene
   PANEL->len     = 0;           /* length of the buffer to broadcast */
190 1 equemene
/*
191 1 equemene
 * Figure out the exact amount of workspace  needed by the factorization
192 1 equemene
 * and the update - Allocate that space - Finish the panel data structu-
193 1 equemene
 * re initialization.
194 1 equemene
 *
195 1 equemene
 * L1:    JB x JB in all processes
196 1 equemene
 * DPIV:  JB      in all processes
197 1 equemene
 * DINFO: 1       in all processes
198 1 equemene
 *
199 1 equemene
 * We make sure that those three arrays are contiguous in memory for the
200 1 equemene
 * later panel broadcast.  We  also  choose  to put this amount of space
201 1 equemene
 * right  after  L2 (when it exist) so that one can receive a contiguous
202 1 equemene
 * buffer.
203 1 equemene
 */
204 1 equemene
   dalign = ALGO->align * sizeof( double );
205 1 equemene
206 1 equemene
   if( npcol == 1 )                             /* P x 1 process grid */
207 1 equemene
   {                                     /* space for L1, DPIV, DINFO */
208 1 equemene
      lwork = ALGO->align + ( PANEL->len = JB * JB + JB + 1 );
209 1 equemene
      if( nprow > 1 )                                 /* space for U */
210 1 equemene
      { nu = nq - JB; lwork += JB * Mmax( 0, nu ); }
211 1 equemene
212 1 equemene
      if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) *
213 1 equemene
                                           sizeof( double ) ) ) )
214 1 equemene
      {
215 1 equemene
         HPL_pabort( __LINE__, "HPL_pdpanel_init",
216 1 equemene
                     "Memory allocation failed" );
217 1 equemene
      }
218 1 equemene
/*
219 1 equemene
 * Initialize the pointers of the panel structure  -  Always re-use A in
220 1 equemene
 * the only process column
221 1 equemene
 */
222 1 equemene
      PANEL->L2    = PANEL->A + ( myrow == icurrow ? JB : 0 );
223 1 equemene
      PANEL->ldl2  = A->ld;
224 1 equemene
      PANEL->L1    = (double *)HPL_PTR( PANEL->WORK, dalign );
225 1 equemene
      PANEL->DPIV  = PANEL->L1    + JB * JB;
226 1 equemene
      PANEL->DINFO = PANEL->DPIV + JB;       *(PANEL->DINFO) = 0.0;
227 1 equemene
      PANEL->U     = ( nprow > 1 ? PANEL->DINFO + 1: NULL );
228 1 equemene
   }
229 1 equemene
   else
230 1 equemene
   {                                        /* space for L2, L1, DPIV */
231 1 equemene
      ml2 = ( myrow == icurrow ? mp - JB : mp ); ml2 = Mmax( 0, ml2 );
232 1 equemene
      PANEL->len = ml2*JB + ( itmp1 = JB*JB + JB + 1 );
233 1 equemene
#ifdef HPL_COPY_L
234 1 equemene
      lwork = ALGO->align + PANEL->len;
235 1 equemene
#else
236 1 equemene
      lwork = ALGO->align + ( mycol == icurcol ? itmp1 : PANEL->len );
237 1 equemene
#endif
238 1 equemene
      if( nprow > 1 )                                 /* space for U */
239 1 equemene
      {
240 1 equemene
         nu = ( mycol == icurcol ? nq - JB : nq );
241 1 equemene
         lwork += JB * Mmax( 0, nu );
242 1 equemene
      }
243 1 equemene
244 1 equemene
      if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) *
245 1 equemene
                                           sizeof( double ) ) ) )
246 1 equemene
      {
247 1 equemene
         HPL_pabort( __LINE__, "HPL_pdpanel_init",
248 1 equemene
                     "Memory allocation failed" );
249 1 equemene
      }
250 1 equemene
/*
251 1 equemene
 * Initialize the pointers of the panel structure - Re-use A in the cur-
252 1 equemene
 * rent process column when HPL_COPY_L is not defined.
253 1 equemene
 */
254 1 equemene
#ifdef HPL_COPY_L
255 1 equemene
      PANEL->L2    = (double *)HPL_PTR( PANEL->WORK, dalign );
256 1 equemene
      PANEL->ldl2  = Mmax( 1, ml2 );
257 1 equemene
      PANEL->L1    = PANEL->L2 + ml2 * JB;
258 1 equemene
#else
259 1 equemene
      if( mycol == icurcol )
260 1 equemene
      {
261 1 equemene
         PANEL->L2   = PANEL->A + ( myrow == icurrow ? JB : 0 );
262 1 equemene
         PANEL->ldl2 = A->ld;
263 1 equemene
         PANEL->L1   = (double *)HPL_PTR( PANEL->WORK, dalign );
264 1 equemene
      }
265 1 equemene
      else
266 1 equemene
      {
267 1 equemene
         PANEL->L2   = (double *)HPL_PTR( PANEL->WORK, dalign );
268 1 equemene
         PANEL->ldl2 = Mmax( 1, ml2 );
269 1 equemene
         PANEL->L1   = PANEL->L2 + ml2 * JB;
270 1 equemene
      }
271 1 equemene
#endif
272 1 equemene
      PANEL->DPIV  = PANEL->L1   + JB * JB;
273 1 equemene
      PANEL->DINFO = PANEL->DPIV + JB;     *(PANEL->DINFO) = 0.0;
274 1 equemene
      PANEL->U     = ( nprow > 1 ? PANEL->DINFO + 1 : NULL );
275 1 equemene
   }
276 1 equemene
#ifdef HPL_CALL_VSIPL
277 1 equemene
   PANEL->Ablock  = A->block;
278 1 equemene
/*
279 1 equemene
 * Create blocks and bind them to the data pointers
280 1 equemene
 */
281 1 equemene
   PANEL->L1block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L1),
282 1 equemene
                                      (vsip_length)(JB*JB), VSIP_MEM_NONE );
283 1 equemene
   PANEL->L2block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L2),
284 1 equemene
                                      (vsip_length)(PANEL->ldl2*JB),
285 1 equemene
                                      VSIP_MEM_NONE );
286 1 equemene
   if( nprow > 1 )
287 1 equemene
   {
288 1 equemene
      nu = ( mycol == icurcol ? nq - JB : nq );
289 1 equemene
      PANEL->Ublock = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->U),
290 1 equemene
                                        (vsip_length)(JB * Mmax( 0, nu )),
291 1 equemene
                                        VSIP_MEM_NONE );
292 1 equemene
   }
293 1 equemene
   else { PANEL->Ublock = A->block; }
294 1 equemene
#endif
295 1 equemene
/*
296 1 equemene
 * If nprow is 1, we just allocate an array of JB integers for the swap.
297 1 equemene
 * When nprow > 1, we allocate the space for the index arrays immediate-
298 1 equemene
 * ly. The exact size of this array depends on the swapping routine that
299 1 equemene
 * will be used, so we allocate the maximum:
300 1 equemene
 *
301 1 equemene
 *    IWORK[0] is of size at most 1      +
302 1 equemene
 *    IPL      is of size at most 1      +
303 1 equemene
 *    IPID     is of size at most 4 * JB +
304 1 equemene
 *
305 1 equemene
 *    For HPL_pdlaswp00:
306 1 equemene
 *       lindxA   is of size at most 2 * JB +
307 1 equemene
 *       lindxAU  is of size at most 2 * JB +
308 1 equemene
 *       llen     is of size at most NPROW  +
309 1 equemene
 *       llen_sv  is of size at most NPROW.
310 1 equemene
 *
311 1 equemene
 *    For HPL_pdlaswp01:
312 1 equemene
 *       ipA      is of size ar most 1      +
313 1 equemene
 *       lindxA   is of size at most 2 * JB +
314 1 equemene
 *       lindxAU  is of size at most 2 * JB +
315 1 equemene
 *       iplen    is of size at most NPROW  + 1 +
316 1 equemene
 *       ipmap    is of size at most NPROW  +
317 1 equemene
 *       ipmapm1  is of size at most NPROW  +
318 1 equemene
 *       permU    is of size at most JB     +
319 1 equemene
 *       iwork    is of size at most MAX( 2*JB, NPROW+1 ).
320 1 equemene
 *
321 1 equemene
 * that is  3 + 8*JB + MAX(2*NPROW, 3*NPROW+1+JB+MAX(2*JB,NPROW+1))
322 1 equemene
 *       =  4 + 9*JB + 3*NPROW + MAX( 2*JB, NPROW+1 ).
323 1 equemene
 *
324 1 equemene
 * We use the fist entry of this to work array  to indicate  whether the
325 1 equemene
 * the  local  index arrays have already been computed,  and if yes,  by
326 1 equemene
 * which function:
327 1 equemene
 *    IWORK[0] = -1: no index arrays have been computed so far;
328 1 equemene
 *    IWORK[0] =  0: HPL_pdlaswp00 already computed those arrays;
329 1 equemene
 *    IWORK[0] =  1: HPL_pdlaswp01 already computed those arrays;
330 1 equemene
 * This allows to save some redundant and useless computations.
331 1 equemene
 */
332 1 equemene
   if( nprow == 1 ) { lwork = JB; }
333 1 equemene
   else
334 1 equemene
   {
335 1 equemene
      itmp1 = (JB << 1); lwork = nprow + 1; itmp1 = Mmax( itmp1, lwork );
336 1 equemene
      lwork = 4 + (9 * JB) + (3 * nprow) + itmp1;
337 1 equemene
   }
338 1 equemene
339 1 equemene
   PANEL->IWORK = (int *)malloc( (size_t)(lwork) * sizeof( int ) );
340 1 equemene
341 1 equemene
   if( PANEL->IWORK == NULL )
342 1 equemene
   { HPL_pabort( __LINE__, "HPL_pdpanel_init", "Memory allocation failed" ); }
343 1 equemene
                       /* Initialize the first entry of the workarray */
344 1 equemene
   *(PANEL->IWORK) = -1;
345 1 equemene
/*
346 1 equemene
 * End of HPL_pdpanel_init
347 1 equemene
 */
348 1 equemene
}