root / src / panel / HPL_pdpanel_init.c @ 9
Historique | Voir | Annoter | Télécharger (14,53 ko)
1 |
/*
|
---|---|
2 |
* -- High Performance Computing Linpack Benchmark (HPL)
|
3 |
* HPL - 2.0 - September 10, 2008
|
4 |
* Antoine P. Petitet
|
5 |
* University of Tennessee, Knoxville
|
6 |
* Innovative Computing Laboratory
|
7 |
* (C) Copyright 2000-2008 All Rights Reserved
|
8 |
*
|
9 |
* -- Copyright notice and Licensing terms:
|
10 |
*
|
11 |
* Redistribution and use in source and binary forms, with or without
|
12 |
* modification, are permitted provided that the following conditions
|
13 |
* are met:
|
14 |
*
|
15 |
* 1. Redistributions of source code must retain the above copyright
|
16 |
* notice, this list of conditions and the following disclaimer.
|
17 |
*
|
18 |
* 2. Redistributions in binary form must reproduce the above copyright
|
19 |
* notice, this list of conditions, and the following disclaimer in the
|
20 |
* documentation and/or other materials provided with the distribution.
|
21 |
*
|
22 |
* 3. All advertising materials mentioning features or use of this
|
23 |
* software must display the following acknowledgement:
|
24 |
* This product includes software developed at the University of
|
25 |
* Tennessee, Knoxville, Innovative Computing Laboratory.
|
26 |
*
|
27 |
* 4. The name of the University, the name of the Laboratory, or the
|
28 |
* names of its contributors may not be used to endorse or promote
|
29 |
* products derived from this software without specific written
|
30 |
* permission.
|
31 |
*
|
32 |
* -- Disclaimer:
|
33 |
*
|
34 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
35 |
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
36 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
37 |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
|
38 |
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
39 |
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
40 |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
41 |
* DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
42 |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
43 |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
44 |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
45 |
* ---------------------------------------------------------------------
|
46 |
*/
|
47 |
/*
|
48 |
* Include files
|
49 |
*/
|
50 |
#include "hpl.h" |
51 |
|
52 |
#ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ |
53 |
#ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ |
54 |
#define HPL_COPY_L /* well, sorry, can not do that: force the copy */ |
55 |
#endif
|
56 |
#endif
|
57 |
|
58 |
#ifdef STDC_HEADERS
|
59 |
void HPL_pdpanel_init
|
60 |
( |
61 |
HPL_T_grid * GRID, |
62 |
HPL_T_palg * ALGO, |
63 |
const int M, |
64 |
const int N, |
65 |
const int JB, |
66 |
HPL_T_pmat * A, |
67 |
const int IA, |
68 |
const int JA, |
69 |
const int TAG, |
70 |
HPL_T_panel * PANEL |
71 |
) |
72 |
#else
|
73 |
void HPL_pdpanel_init
|
74 |
( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL ) |
75 |
HPL_T_grid * GRID; |
76 |
HPL_T_palg * ALGO; |
77 |
const int M; |
78 |
const int N; |
79 |
const int JB; |
80 |
HPL_T_pmat * A; |
81 |
const int IA; |
82 |
const int JA; |
83 |
const int TAG; |
84 |
HPL_T_panel * PANEL; |
85 |
#endif
|
86 |
{ |
87 |
/*
|
88 |
* Purpose
|
89 |
* =======
|
90 |
*
|
91 |
* HPL_pdpanel_init initializes a panel data structure.
|
92 |
*
|
93 |
*
|
94 |
* Arguments
|
95 |
* =========
|
96 |
*
|
97 |
* GRID (local input) HPL_T_grid *
|
98 |
* On entry, GRID points to the data structure containing the
|
99 |
* process grid information.
|
100 |
*
|
101 |
* ALGO (global input) HPL_T_palg *
|
102 |
* On entry, ALGO points to the data structure containing the
|
103 |
* algorithmic parameters.
|
104 |
*
|
105 |
* M (local input) const int
|
106 |
* On entry, M specifies the global number of rows of the panel.
|
107 |
* M must be at least zero.
|
108 |
*
|
109 |
* N (local input) const int
|
110 |
* On entry, N specifies the global number of columns of the
|
111 |
* panel and trailing submatrix. N must be at least zero.
|
112 |
*
|
113 |
* JB (global input) const int
|
114 |
* On entry, JB specifies is the number of columns of the panel.
|
115 |
* JB must be at least zero.
|
116 |
*
|
117 |
* A (local input/output) HPL_T_pmat *
|
118 |
* On entry, A points to the data structure containing the local
|
119 |
* array information.
|
120 |
*
|
121 |
* IA (global input) const int
|
122 |
* On entry, IA is the global row index identifying the panel
|
123 |
* and trailing submatrix. IA must be at least zero.
|
124 |
*
|
125 |
* JA (global input) const int
|
126 |
* On entry, JA is the global column index identifying the panel
|
127 |
* and trailing submatrix. JA must be at least zero.
|
128 |
*
|
129 |
* TAG (global input) const int
|
130 |
* On entry, TAG is the row broadcast message id.
|
131 |
*
|
132 |
* PANEL (local input/output) HPL_T_panel *
|
133 |
* On entry, PANEL points to the data structure containing the
|
134 |
* panel information.
|
135 |
*
|
136 |
* ---------------------------------------------------------------------
|
137 |
*/
|
138 |
/*
|
139 |
* .. Local Variables ..
|
140 |
*/
|
141 |
size_t dalign; |
142 |
int icurcol, icurrow, ii, itmp1, jj, lwork,
|
143 |
ml2, mp, mycol, myrow, nb, npcol, nprow, |
144 |
nq, nu; |
145 |
/* ..
|
146 |
* .. Executable Statements ..
|
147 |
*/
|
148 |
PANEL->grid = GRID; /* ptr to the process grid */
|
149 |
PANEL->algo = ALGO; /* ptr to the algo parameters */
|
150 |
PANEL->pmat = A; /* ptr to the local array info */
|
151 |
|
152 |
myrow = GRID->myrow; mycol = GRID->mycol; |
153 |
nprow = GRID->nprow; npcol = GRID->npcol; nb = A->nb; |
154 |
|
155 |
HPL_infog2l( IA, JA, nb, nb, nb, nb, 0, 0, myrow, mycol, |
156 |
nprow, npcol, &ii, &jj, &icurrow, &icurcol ); |
157 |
mp = HPL_numrocI( M, IA, nb, nb, myrow, 0, nprow );
|
158 |
nq = HPL_numrocI( N, JA, nb, nb, mycol, 0, npcol );
|
159 |
/* ptr to trailing part of A */
|
160 |
PANEL->A = Mptr( (double *)(A->A), ii, jj, A->ld );
|
161 |
/*
|
162 |
* Workspace pointers are initialized to NULL.
|
163 |
*/
|
164 |
PANEL->WORK = NULL; PANEL->L2 = NULL; PANEL->L1 = NULL; |
165 |
PANEL->DPIV = NULL; PANEL->DINFO = NULL; PANEL->U = NULL; |
166 |
PANEL->IWORK = NULL;
|
167 |
/*
|
168 |
* Local lengths, indexes process coordinates
|
169 |
*/
|
170 |
PANEL->nb = nb; /* distribution blocking factor */
|
171 |
PANEL->jb = JB; /* panel width */
|
172 |
PANEL->m = M; /* global # of rows of trailing part of A */
|
173 |
PANEL->n = N; /* global # of cols of trailing part of A */
|
174 |
PANEL->ia = IA; /* global row index of trailing part of A */
|
175 |
PANEL->ja = JA; /* global col index of trailing part of A */
|
176 |
PANEL->mp = mp; /* local # of rows of trailing part of A */
|
177 |
PANEL->nq = nq; /* local # of cols of trailing part of A */
|
178 |
PANEL->ii = ii; /* local row index of trailing part of A */
|
179 |
PANEL->jj = jj; /* local col index of trailing part of A */
|
180 |
PANEL->lda = A->ld; /* local leading dim of array A */
|
181 |
PANEL->prow = icurrow; /* proc row owning 1st row of trailing A */
|
182 |
PANEL->pcol = icurcol; /* proc col owning 1st col of trailing A */
|
183 |
PANEL->msgid = TAG; /* message id to be used for panel bcast */
|
184 |
/*
|
185 |
* Initialize ldl2 and len to temporary dummy values and Update tag for
|
186 |
* next panel
|
187 |
*/
|
188 |
PANEL->ldl2 = 0; /* local leading dim of array L2 */ |
189 |
PANEL->len = 0; /* length of the buffer to broadcast */ |
190 |
/*
|
191 |
* Figure out the exact amount of workspace needed by the factorization
|
192 |
* and the update - Allocate that space - Finish the panel data structu-
|
193 |
* re initialization.
|
194 |
*
|
195 |
* L1: JB x JB in all processes
|
196 |
* DPIV: JB in all processes
|
197 |
* DINFO: 1 in all processes
|
198 |
*
|
199 |
* We make sure that those three arrays are contiguous in memory for the
|
200 |
* later panel broadcast. We also choose to put this amount of space
|
201 |
* right after L2 (when it exist) so that one can receive a contiguous
|
202 |
* buffer.
|
203 |
*/
|
204 |
dalign = ALGO->align * sizeof( double ); |
205 |
|
206 |
if( npcol == 1 ) /* P x 1 process grid */ |
207 |
{ /* space for L1, DPIV, DINFO */
|
208 |
lwork = ALGO->align + ( PANEL->len = JB * JB + JB + 1 );
|
209 |
if( nprow > 1 ) /* space for U */ |
210 |
{ nu = nq - JB; lwork += JB * Mmax( 0, nu ); }
|
211 |
|
212 |
if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * |
213 |
sizeof( double ) ) ) ) |
214 |
{ |
215 |
HPL_pabort( __LINE__, "HPL_pdpanel_init",
|
216 |
"Memory allocation failed" );
|
217 |
} |
218 |
/*
|
219 |
* Initialize the pointers of the panel structure - Always re-use A in
|
220 |
* the only process column
|
221 |
*/
|
222 |
PANEL->L2 = PANEL->A + ( myrow == icurrow ? JB : 0 );
|
223 |
PANEL->ldl2 = A->ld; |
224 |
PANEL->L1 = (double *)HPL_PTR( PANEL->WORK, dalign );
|
225 |
PANEL->DPIV = PANEL->L1 + JB * JB; |
226 |
PANEL->DINFO = PANEL->DPIV + JB; *(PANEL->DINFO) = 0.0; |
227 |
PANEL->U = ( nprow > 1 ? PANEL->DINFO + 1: NULL ); |
228 |
} |
229 |
else
|
230 |
{ /* space for L2, L1, DPIV */
|
231 |
ml2 = ( myrow == icurrow ? mp - JB : mp ); ml2 = Mmax( 0, ml2 );
|
232 |
PANEL->len = ml2*JB + ( itmp1 = JB*JB + JB + 1 );
|
233 |
#ifdef HPL_COPY_L
|
234 |
lwork = ALGO->align + PANEL->len; |
235 |
#else
|
236 |
lwork = ALGO->align + ( mycol == icurcol ? itmp1 : PANEL->len ); |
237 |
#endif
|
238 |
if( nprow > 1 ) /* space for U */ |
239 |
{ |
240 |
nu = ( mycol == icurcol ? nq - JB : nq ); |
241 |
lwork += JB * Mmax( 0, nu );
|
242 |
} |
243 |
|
244 |
if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * |
245 |
sizeof( double ) ) ) ) |
246 |
{ |
247 |
HPL_pabort( __LINE__, "HPL_pdpanel_init",
|
248 |
"Memory allocation failed" );
|
249 |
} |
250 |
/*
|
251 |
* Initialize the pointers of the panel structure - Re-use A in the cur-
|
252 |
* rent process column when HPL_COPY_L is not defined.
|
253 |
*/
|
254 |
#ifdef HPL_COPY_L
|
255 |
PANEL->L2 = (double *)HPL_PTR( PANEL->WORK, dalign );
|
256 |
PANEL->ldl2 = Mmax( 1, ml2 );
|
257 |
PANEL->L1 = PANEL->L2 + ml2 * JB; |
258 |
#else
|
259 |
if( mycol == icurcol )
|
260 |
{ |
261 |
PANEL->L2 = PANEL->A + ( myrow == icurrow ? JB : 0 );
|
262 |
PANEL->ldl2 = A->ld; |
263 |
PANEL->L1 = (double *)HPL_PTR( PANEL->WORK, dalign );
|
264 |
} |
265 |
else
|
266 |
{ |
267 |
PANEL->L2 = (double *)HPL_PTR( PANEL->WORK, dalign );
|
268 |
PANEL->ldl2 = Mmax( 1, ml2 );
|
269 |
PANEL->L1 = PANEL->L2 + ml2 * JB; |
270 |
} |
271 |
#endif
|
272 |
PANEL->DPIV = PANEL->L1 + JB * JB; |
273 |
PANEL->DINFO = PANEL->DPIV + JB; *(PANEL->DINFO) = 0.0; |
274 |
PANEL->U = ( nprow > 1 ? PANEL->DINFO + 1 : NULL ); |
275 |
} |
276 |
#ifdef HPL_CALL_VSIPL
|
277 |
PANEL->Ablock = A->block; |
278 |
/*
|
279 |
* Create blocks and bind them to the data pointers
|
280 |
*/
|
281 |
PANEL->L1block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L1), |
282 |
(vsip_length)(JB*JB), VSIP_MEM_NONE ); |
283 |
PANEL->L2block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L2), |
284 |
(vsip_length)(PANEL->ldl2*JB), |
285 |
VSIP_MEM_NONE ); |
286 |
if( nprow > 1 ) |
287 |
{ |
288 |
nu = ( mycol == icurcol ? nq - JB : nq ); |
289 |
PANEL->Ublock = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->U), |
290 |
(vsip_length)(JB * Mmax( 0, nu )),
|
291 |
VSIP_MEM_NONE ); |
292 |
} |
293 |
else { PANEL->Ublock = A->block; }
|
294 |
#endif
|
295 |
/*
|
296 |
* If nprow is 1, we just allocate an array of JB integers for the swap.
|
297 |
* When nprow > 1, we allocate the space for the index arrays immediate-
|
298 |
* ly. The exact size of this array depends on the swapping routine that
|
299 |
* will be used, so we allocate the maximum:
|
300 |
*
|
301 |
* IWORK[0] is of size at most 1 +
|
302 |
* IPL is of size at most 1 +
|
303 |
* IPID is of size at most 4 * JB +
|
304 |
*
|
305 |
* For HPL_pdlaswp00:
|
306 |
* lindxA is of size at most 2 * JB +
|
307 |
* lindxAU is of size at most 2 * JB +
|
308 |
* llen is of size at most NPROW +
|
309 |
* llen_sv is of size at most NPROW.
|
310 |
*
|
311 |
* For HPL_pdlaswp01:
|
312 |
* ipA is of size ar most 1 +
|
313 |
* lindxA is of size at most 2 * JB +
|
314 |
* lindxAU is of size at most 2 * JB +
|
315 |
* iplen is of size at most NPROW + 1 +
|
316 |
* ipmap is of size at most NPROW +
|
317 |
* ipmapm1 is of size at most NPROW +
|
318 |
* permU is of size at most JB +
|
319 |
* iwork is of size at most MAX( 2*JB, NPROW+1 ).
|
320 |
*
|
321 |
* that is 3 + 8*JB + MAX(2*NPROW, 3*NPROW+1+JB+MAX(2*JB,NPROW+1))
|
322 |
* = 4 + 9*JB + 3*NPROW + MAX( 2*JB, NPROW+1 ).
|
323 |
*
|
324 |
* We use the fist entry of this to work array to indicate whether the
|
325 |
* the local index arrays have already been computed, and if yes, by
|
326 |
* which function:
|
327 |
* IWORK[0] = -1: no index arrays have been computed so far;
|
328 |
* IWORK[0] = 0: HPL_pdlaswp00 already computed those arrays;
|
329 |
* IWORK[0] = 1: HPL_pdlaswp01 already computed those arrays;
|
330 |
* This allows to save some redundant and useless computations.
|
331 |
*/
|
332 |
if( nprow == 1 ) { lwork = JB; } |
333 |
else
|
334 |
{ |
335 |
itmp1 = (JB << 1); lwork = nprow + 1; itmp1 = Mmax( itmp1, lwork ); |
336 |
lwork = 4 + (9 * JB) + (3 * nprow) + itmp1; |
337 |
} |
338 |
|
339 |
PANEL->IWORK = (int *)malloc( (size_t)(lwork) * sizeof( int ) ); |
340 |
|
341 |
if( PANEL->IWORK == NULL ) |
342 |
{ HPL_pabort( __LINE__, "HPL_pdpanel_init", "Memory allocation failed" ); } |
343 |
/* Initialize the first entry of the workarray */
|
344 |
*(PANEL->IWORK) = -1;
|
345 |
/*
|
346 |
* End of HPL_pdpanel_init
|
347 |
*/
|
348 |
} |