root / src / panel / HPL_pdpanel_init.c @ 9
Historique | Voir | Annoter | Télécharger (14,53 ko)
1 | 1 | equemene | /*
|
---|---|---|---|
2 | 1 | equemene | * -- High Performance Computing Linpack Benchmark (HPL)
|
3 | 1 | equemene | * HPL - 2.0 - September 10, 2008
|
4 | 1 | equemene | * Antoine P. Petitet
|
5 | 1 | equemene | * University of Tennessee, Knoxville
|
6 | 1 | equemene | * Innovative Computing Laboratory
|
7 | 1 | equemene | * (C) Copyright 2000-2008 All Rights Reserved
|
8 | 1 | equemene | *
|
9 | 1 | equemene | * -- Copyright notice and Licensing terms:
|
10 | 1 | equemene | *
|
11 | 1 | equemene | * Redistribution and use in source and binary forms, with or without
|
12 | 1 | equemene | * modification, are permitted provided that the following conditions
|
13 | 1 | equemene | * are met:
|
14 | 1 | equemene | *
|
15 | 1 | equemene | * 1. Redistributions of source code must retain the above copyright
|
16 | 1 | equemene | * notice, this list of conditions and the following disclaimer.
|
17 | 1 | equemene | *
|
18 | 1 | equemene | * 2. Redistributions in binary form must reproduce the above copyright
|
19 | 1 | equemene | * notice, this list of conditions, and the following disclaimer in the
|
20 | 1 | equemene | * documentation and/or other materials provided with the distribution.
|
21 | 1 | equemene | *
|
22 | 1 | equemene | * 3. All advertising materials mentioning features or use of this
|
23 | 1 | equemene | * software must display the following acknowledgement:
|
24 | 1 | equemene | * This product includes software developed at the University of
|
25 | 1 | equemene | * Tennessee, Knoxville, Innovative Computing Laboratory.
|
26 | 1 | equemene | *
|
27 | 1 | equemene | * 4. The name of the University, the name of the Laboratory, or the
|
28 | 1 | equemene | * names of its contributors may not be used to endorse or promote
|
29 | 1 | equemene | * products derived from this software without specific written
|
30 | 1 | equemene | * permission.
|
31 | 1 | equemene | *
|
32 | 1 | equemene | * -- Disclaimer:
|
33 | 1 | equemene | *
|
34 | 1 | equemene | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
35 | 1 | equemene | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
36 | 1 | equemene | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
37 | 1 | equemene | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
|
38 | 1 | equemene | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
39 | 1 | equemene | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
40 | 1 | equemene | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
41 | 1 | equemene | * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
42 | 1 | equemene | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
43 | 1 | equemene | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
44 | 1 | equemene | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
45 | 1 | equemene | * ---------------------------------------------------------------------
|
46 | 1 | equemene | */
|
47 | 1 | equemene | /*
|
48 | 1 | equemene | * Include files
|
49 | 1 | equemene | */
|
50 | 1 | equemene | #include "hpl.h" |
51 | 1 | equemene | |
52 | 1 | equemene | #ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ |
53 | 1 | equemene | #ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ |
54 | 1 | equemene | #define HPL_COPY_L /* well, sorry, can not do that: force the copy */ |
55 | 1 | equemene | #endif
|
56 | 1 | equemene | #endif
|
57 | 1 | equemene | |
58 | 1 | equemene | #ifdef STDC_HEADERS
|
59 | 1 | equemene | void HPL_pdpanel_init
|
60 | 1 | equemene | ( |
61 | 1 | equemene | HPL_T_grid * GRID, |
62 | 1 | equemene | HPL_T_palg * ALGO, |
63 | 1 | equemene | const int M, |
64 | 1 | equemene | const int N, |
65 | 1 | equemene | const int JB, |
66 | 1 | equemene | HPL_T_pmat * A, |
67 | 1 | equemene | const int IA, |
68 | 1 | equemene | const int JA, |
69 | 1 | equemene | const int TAG, |
70 | 1 | equemene | HPL_T_panel * PANEL |
71 | 1 | equemene | ) |
72 | 1 | equemene | #else
|
73 | 1 | equemene | void HPL_pdpanel_init
|
74 | 1 | equemene | ( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL ) |
75 | 1 | equemene | HPL_T_grid * GRID; |
76 | 1 | equemene | HPL_T_palg * ALGO; |
77 | 1 | equemene | const int M; |
78 | 1 | equemene | const int N; |
79 | 1 | equemene | const int JB; |
80 | 1 | equemene | HPL_T_pmat * A; |
81 | 1 | equemene | const int IA; |
82 | 1 | equemene | const int JA; |
83 | 1 | equemene | const int TAG; |
84 | 1 | equemene | HPL_T_panel * PANEL; |
85 | 1 | equemene | #endif
|
86 | 1 | equemene | { |
87 | 1 | equemene | /*
|
88 | 1 | equemene | * Purpose
|
89 | 1 | equemene | * =======
|
90 | 1 | equemene | *
|
91 | 1 | equemene | * HPL_pdpanel_init initializes a panel data structure.
|
92 | 1 | equemene | *
|
93 | 1 | equemene | *
|
94 | 1 | equemene | * Arguments
|
95 | 1 | equemene | * =========
|
96 | 1 | equemene | *
|
97 | 1 | equemene | * GRID (local input) HPL_T_grid *
|
98 | 1 | equemene | * On entry, GRID points to the data structure containing the
|
99 | 1 | equemene | * process grid information.
|
100 | 1 | equemene | *
|
101 | 1 | equemene | * ALGO (global input) HPL_T_palg *
|
102 | 1 | equemene | * On entry, ALGO points to the data structure containing the
|
103 | 1 | equemene | * algorithmic parameters.
|
104 | 1 | equemene | *
|
105 | 1 | equemene | * M (local input) const int
|
106 | 1 | equemene | * On entry, M specifies the global number of rows of the panel.
|
107 | 1 | equemene | * M must be at least zero.
|
108 | 1 | equemene | *
|
109 | 1 | equemene | * N (local input) const int
|
110 | 1 | equemene | * On entry, N specifies the global number of columns of the
|
111 | 1 | equemene | * panel and trailing submatrix. N must be at least zero.
|
112 | 1 | equemene | *
|
113 | 1 | equemene | * JB (global input) const int
|
114 | 1 | equemene | * On entry, JB specifies is the number of columns of the panel.
|
115 | 1 | equemene | * JB must be at least zero.
|
116 | 1 | equemene | *
|
117 | 1 | equemene | * A (local input/output) HPL_T_pmat *
|
118 | 1 | equemene | * On entry, A points to the data structure containing the local
|
119 | 1 | equemene | * array information.
|
120 | 1 | equemene | *
|
121 | 1 | equemene | * IA (global input) const int
|
122 | 1 | equemene | * On entry, IA is the global row index identifying the panel
|
123 | 1 | equemene | * and trailing submatrix. IA must be at least zero.
|
124 | 1 | equemene | *
|
125 | 1 | equemene | * JA (global input) const int
|
126 | 1 | equemene | * On entry, JA is the global column index identifying the panel
|
127 | 1 | equemene | * and trailing submatrix. JA must be at least zero.
|
128 | 1 | equemene | *
|
129 | 1 | equemene | * TAG (global input) const int
|
130 | 1 | equemene | * On entry, TAG is the row broadcast message id.
|
131 | 1 | equemene | *
|
132 | 1 | equemene | * PANEL (local input/output) HPL_T_panel *
|
133 | 1 | equemene | * On entry, PANEL points to the data structure containing the
|
134 | 1 | equemene | * panel information.
|
135 | 1 | equemene | *
|
136 | 1 | equemene | * ---------------------------------------------------------------------
|
137 | 1 | equemene | */
|
138 | 1 | equemene | /*
|
139 | 1 | equemene | * .. Local Variables ..
|
140 | 1 | equemene | */
|
141 | 1 | equemene | size_t dalign; |
142 | 1 | equemene | int icurcol, icurrow, ii, itmp1, jj, lwork,
|
143 | 1 | equemene | ml2, mp, mycol, myrow, nb, npcol, nprow, |
144 | 1 | equemene | nq, nu; |
145 | 1 | equemene | /* ..
|
146 | 1 | equemene | * .. Executable Statements ..
|
147 | 1 | equemene | */
|
148 | 1 | equemene | PANEL->grid = GRID; /* ptr to the process grid */
|
149 | 1 | equemene | PANEL->algo = ALGO; /* ptr to the algo parameters */
|
150 | 1 | equemene | PANEL->pmat = A; /* ptr to the local array info */
|
151 | 1 | equemene | |
152 | 1 | equemene | myrow = GRID->myrow; mycol = GRID->mycol; |
153 | 1 | equemene | nprow = GRID->nprow; npcol = GRID->npcol; nb = A->nb; |
154 | 1 | equemene | |
155 | 1 | equemene | HPL_infog2l( IA, JA, nb, nb, nb, nb, 0, 0, myrow, mycol, |
156 | 1 | equemene | nprow, npcol, &ii, &jj, &icurrow, &icurcol ); |
157 | 1 | equemene | mp = HPL_numrocI( M, IA, nb, nb, myrow, 0, nprow );
|
158 | 1 | equemene | nq = HPL_numrocI( N, JA, nb, nb, mycol, 0, npcol );
|
159 | 1 | equemene | /* ptr to trailing part of A */
|
160 | 1 | equemene | PANEL->A = Mptr( (double *)(A->A), ii, jj, A->ld );
|
161 | 1 | equemene | /*
|
162 | 1 | equemene | * Workspace pointers are initialized to NULL.
|
163 | 1 | equemene | */
|
164 | 1 | equemene | PANEL->WORK = NULL; PANEL->L2 = NULL; PANEL->L1 = NULL; |
165 | 1 | equemene | PANEL->DPIV = NULL; PANEL->DINFO = NULL; PANEL->U = NULL; |
166 | 1 | equemene | PANEL->IWORK = NULL;
|
167 | 1 | equemene | /*
|
168 | 1 | equemene | * Local lengths, indexes process coordinates
|
169 | 1 | equemene | */
|
170 | 1 | equemene | PANEL->nb = nb; /* distribution blocking factor */
|
171 | 1 | equemene | PANEL->jb = JB; /* panel width */
|
172 | 1 | equemene | PANEL->m = M; /* global # of rows of trailing part of A */
|
173 | 1 | equemene | PANEL->n = N; /* global # of cols of trailing part of A */
|
174 | 1 | equemene | PANEL->ia = IA; /* global row index of trailing part of A */
|
175 | 1 | equemene | PANEL->ja = JA; /* global col index of trailing part of A */
|
176 | 1 | equemene | PANEL->mp = mp; /* local # of rows of trailing part of A */
|
177 | 1 | equemene | PANEL->nq = nq; /* local # of cols of trailing part of A */
|
178 | 1 | equemene | PANEL->ii = ii; /* local row index of trailing part of A */
|
179 | 1 | equemene | PANEL->jj = jj; /* local col index of trailing part of A */
|
180 | 1 | equemene | PANEL->lda = A->ld; /* local leading dim of array A */
|
181 | 1 | equemene | PANEL->prow = icurrow; /* proc row owning 1st row of trailing A */
|
182 | 1 | equemene | PANEL->pcol = icurcol; /* proc col owning 1st col of trailing A */
|
183 | 1 | equemene | PANEL->msgid = TAG; /* message id to be used for panel bcast */
|
184 | 1 | equemene | /*
|
185 | 1 | equemene | * Initialize ldl2 and len to temporary dummy values and Update tag for
|
186 | 1 | equemene | * next panel
|
187 | 1 | equemene | */
|
188 | 1 | equemene | PANEL->ldl2 = 0; /* local leading dim of array L2 */ |
189 | 1 | equemene | PANEL->len = 0; /* length of the buffer to broadcast */ |
190 | 1 | equemene | /*
|
191 | 1 | equemene | * Figure out the exact amount of workspace needed by the factorization
|
192 | 1 | equemene | * and the update - Allocate that space - Finish the panel data structu-
|
193 | 1 | equemene | * re initialization.
|
194 | 1 | equemene | *
|
195 | 1 | equemene | * L1: JB x JB in all processes
|
196 | 1 | equemene | * DPIV: JB in all processes
|
197 | 1 | equemene | * DINFO: 1 in all processes
|
198 | 1 | equemene | *
|
199 | 1 | equemene | * We make sure that those three arrays are contiguous in memory for the
|
200 | 1 | equemene | * later panel broadcast. We also choose to put this amount of space
|
201 | 1 | equemene | * right after L2 (when it exist) so that one can receive a contiguous
|
202 | 1 | equemene | * buffer.
|
203 | 1 | equemene | */
|
204 | 1 | equemene | dalign = ALGO->align * sizeof( double ); |
205 | 1 | equemene | |
206 | 1 | equemene | if( npcol == 1 ) /* P x 1 process grid */ |
207 | 1 | equemene | { /* space for L1, DPIV, DINFO */
|
208 | 1 | equemene | lwork = ALGO->align + ( PANEL->len = JB * JB + JB + 1 );
|
209 | 1 | equemene | if( nprow > 1 ) /* space for U */ |
210 | 1 | equemene | { nu = nq - JB; lwork += JB * Mmax( 0, nu ); }
|
211 | 1 | equemene | |
212 | 1 | equemene | if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * |
213 | 1 | equemene | sizeof( double ) ) ) ) |
214 | 1 | equemene | { |
215 | 1 | equemene | HPL_pabort( __LINE__, "HPL_pdpanel_init",
|
216 | 1 | equemene | "Memory allocation failed" );
|
217 | 1 | equemene | } |
218 | 1 | equemene | /*
|
219 | 1 | equemene | * Initialize the pointers of the panel structure - Always re-use A in
|
220 | 1 | equemene | * the only process column
|
221 | 1 | equemene | */
|
222 | 1 | equemene | PANEL->L2 = PANEL->A + ( myrow == icurrow ? JB : 0 );
|
223 | 1 | equemene | PANEL->ldl2 = A->ld; |
224 | 1 | equemene | PANEL->L1 = (double *)HPL_PTR( PANEL->WORK, dalign );
|
225 | 1 | equemene | PANEL->DPIV = PANEL->L1 + JB * JB; |
226 | 1 | equemene | PANEL->DINFO = PANEL->DPIV + JB; *(PANEL->DINFO) = 0.0; |
227 | 1 | equemene | PANEL->U = ( nprow > 1 ? PANEL->DINFO + 1: NULL ); |
228 | 1 | equemene | } |
229 | 1 | equemene | else
|
230 | 1 | equemene | { /* space for L2, L1, DPIV */
|
231 | 1 | equemene | ml2 = ( myrow == icurrow ? mp - JB : mp ); ml2 = Mmax( 0, ml2 );
|
232 | 1 | equemene | PANEL->len = ml2*JB + ( itmp1 = JB*JB + JB + 1 );
|
233 | 1 | equemene | #ifdef HPL_COPY_L
|
234 | 1 | equemene | lwork = ALGO->align + PANEL->len; |
235 | 1 | equemene | #else
|
236 | 1 | equemene | lwork = ALGO->align + ( mycol == icurcol ? itmp1 : PANEL->len ); |
237 | 1 | equemene | #endif
|
238 | 1 | equemene | if( nprow > 1 ) /* space for U */ |
239 | 1 | equemene | { |
240 | 1 | equemene | nu = ( mycol == icurcol ? nq - JB : nq ); |
241 | 1 | equemene | lwork += JB * Mmax( 0, nu );
|
242 | 1 | equemene | } |
243 | 1 | equemene | |
244 | 1 | equemene | if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * |
245 | 1 | equemene | sizeof( double ) ) ) ) |
246 | 1 | equemene | { |
247 | 1 | equemene | HPL_pabort( __LINE__, "HPL_pdpanel_init",
|
248 | 1 | equemene | "Memory allocation failed" );
|
249 | 1 | equemene | } |
250 | 1 | equemene | /*
|
251 | 1 | equemene | * Initialize the pointers of the panel structure - Re-use A in the cur-
|
252 | 1 | equemene | * rent process column when HPL_COPY_L is not defined.
|
253 | 1 | equemene | */
|
254 | 1 | equemene | #ifdef HPL_COPY_L
|
255 | 1 | equemene | PANEL->L2 = (double *)HPL_PTR( PANEL->WORK, dalign );
|
256 | 1 | equemene | PANEL->ldl2 = Mmax( 1, ml2 );
|
257 | 1 | equemene | PANEL->L1 = PANEL->L2 + ml2 * JB; |
258 | 1 | equemene | #else
|
259 | 1 | equemene | if( mycol == icurcol )
|
260 | 1 | equemene | { |
261 | 1 | equemene | PANEL->L2 = PANEL->A + ( myrow == icurrow ? JB : 0 );
|
262 | 1 | equemene | PANEL->ldl2 = A->ld; |
263 | 1 | equemene | PANEL->L1 = (double *)HPL_PTR( PANEL->WORK, dalign );
|
264 | 1 | equemene | } |
265 | 1 | equemene | else
|
266 | 1 | equemene | { |
267 | 1 | equemene | PANEL->L2 = (double *)HPL_PTR( PANEL->WORK, dalign );
|
268 | 1 | equemene | PANEL->ldl2 = Mmax( 1, ml2 );
|
269 | 1 | equemene | PANEL->L1 = PANEL->L2 + ml2 * JB; |
270 | 1 | equemene | } |
271 | 1 | equemene | #endif
|
272 | 1 | equemene | PANEL->DPIV = PANEL->L1 + JB * JB; |
273 | 1 | equemene | PANEL->DINFO = PANEL->DPIV + JB; *(PANEL->DINFO) = 0.0; |
274 | 1 | equemene | PANEL->U = ( nprow > 1 ? PANEL->DINFO + 1 : NULL ); |
275 | 1 | equemene | } |
276 | 1 | equemene | #ifdef HPL_CALL_VSIPL
|
277 | 1 | equemene | PANEL->Ablock = A->block; |
278 | 1 | equemene | /*
|
279 | 1 | equemene | * Create blocks and bind them to the data pointers
|
280 | 1 | equemene | */
|
281 | 1 | equemene | PANEL->L1block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L1), |
282 | 1 | equemene | (vsip_length)(JB*JB), VSIP_MEM_NONE ); |
283 | 1 | equemene | PANEL->L2block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L2), |
284 | 1 | equemene | (vsip_length)(PANEL->ldl2*JB), |
285 | 1 | equemene | VSIP_MEM_NONE ); |
286 | 1 | equemene | if( nprow > 1 ) |
287 | 1 | equemene | { |
288 | 1 | equemene | nu = ( mycol == icurcol ? nq - JB : nq ); |
289 | 1 | equemene | PANEL->Ublock = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->U), |
290 | 1 | equemene | (vsip_length)(JB * Mmax( 0, nu )),
|
291 | 1 | equemene | VSIP_MEM_NONE ); |
292 | 1 | equemene | } |
293 | 1 | equemene | else { PANEL->Ublock = A->block; }
|
294 | 1 | equemene | #endif
|
295 | 1 | equemene | /*
|
296 | 1 | equemene | * If nprow is 1, we just allocate an array of JB integers for the swap.
|
297 | 1 | equemene | * When nprow > 1, we allocate the space for the index arrays immediate-
|
298 | 1 | equemene | * ly. The exact size of this array depends on the swapping routine that
|
299 | 1 | equemene | * will be used, so we allocate the maximum:
|
300 | 1 | equemene | *
|
301 | 1 | equemene | * IWORK[0] is of size at most 1 +
|
302 | 1 | equemene | * IPL is of size at most 1 +
|
303 | 1 | equemene | * IPID is of size at most 4 * JB +
|
304 | 1 | equemene | *
|
305 | 1 | equemene | * For HPL_pdlaswp00:
|
306 | 1 | equemene | * lindxA is of size at most 2 * JB +
|
307 | 1 | equemene | * lindxAU is of size at most 2 * JB +
|
308 | 1 | equemene | * llen is of size at most NPROW +
|
309 | 1 | equemene | * llen_sv is of size at most NPROW.
|
310 | 1 | equemene | *
|
311 | 1 | equemene | * For HPL_pdlaswp01:
|
312 | 1 | equemene | * ipA is of size ar most 1 +
|
313 | 1 | equemene | * lindxA is of size at most 2 * JB +
|
314 | 1 | equemene | * lindxAU is of size at most 2 * JB +
|
315 | 1 | equemene | * iplen is of size at most NPROW + 1 +
|
316 | 1 | equemene | * ipmap is of size at most NPROW +
|
317 | 1 | equemene | * ipmapm1 is of size at most NPROW +
|
318 | 1 | equemene | * permU is of size at most JB +
|
319 | 1 | equemene | * iwork is of size at most MAX( 2*JB, NPROW+1 ).
|
320 | 1 | equemene | *
|
321 | 1 | equemene | * that is 3 + 8*JB + MAX(2*NPROW, 3*NPROW+1+JB+MAX(2*JB,NPROW+1))
|
322 | 1 | equemene | * = 4 + 9*JB + 3*NPROW + MAX( 2*JB, NPROW+1 ).
|
323 | 1 | equemene | *
|
324 | 1 | equemene | * We use the fist entry of this to work array to indicate whether the
|
325 | 1 | equemene | * the local index arrays have already been computed, and if yes, by
|
326 | 1 | equemene | * which function:
|
327 | 1 | equemene | * IWORK[0] = -1: no index arrays have been computed so far;
|
328 | 1 | equemene | * IWORK[0] = 0: HPL_pdlaswp00 already computed those arrays;
|
329 | 1 | equemene | * IWORK[0] = 1: HPL_pdlaswp01 already computed those arrays;
|
330 | 1 | equemene | * This allows to save some redundant and useless computations.
|
331 | 1 | equemene | */
|
332 | 1 | equemene | if( nprow == 1 ) { lwork = JB; } |
333 | 1 | equemene | else
|
334 | 1 | equemene | { |
335 | 1 | equemene | itmp1 = (JB << 1); lwork = nprow + 1; itmp1 = Mmax( itmp1, lwork ); |
336 | 1 | equemene | lwork = 4 + (9 * JB) + (3 * nprow) + itmp1; |
337 | 1 | equemene | } |
338 | 1 | equemene | |
339 | 1 | equemene | PANEL->IWORK = (int *)malloc( (size_t)(lwork) * sizeof( int ) ); |
340 | 1 | equemene | |
341 | 1 | equemene | if( PANEL->IWORK == NULL ) |
342 | 1 | equemene | { HPL_pabort( __LINE__, "HPL_pdpanel_init", "Memory allocation failed" ); } |
343 | 1 | equemene | /* Initialize the first entry of the workarray */
|
344 | 1 | equemene | *(PANEL->IWORK) = -1;
|
345 | 1 | equemene | /*
|
346 | 1 | equemene | * End of HPL_pdpanel_init
|
347 | 1 | equemene | */
|
348 | 1 | equemene | } |