root / testing / ptest / HPL_pdinfo.c
Historique | Voir | Annoter | Télécharger (43,51 ko)
1 |
/*
|
---|---|
2 |
* -- High Performance Computing Linpack Benchmark (HPL)
|
3 |
* HPL - 2.0 - September 10, 2008
|
4 |
* Antoine P. Petitet
|
5 |
* University of Tennessee, Knoxville
|
6 |
* Innovative Computing Laboratory
|
7 |
* (C) Copyright 2000-2008 All Rights Reserved
|
8 |
*
|
9 |
* -- Copyright notice and Licensing terms:
|
10 |
*
|
11 |
* Redistribution and use in source and binary forms, with or without
|
12 |
* modification, are permitted provided that the following conditions
|
13 |
* are met:
|
14 |
*
|
15 |
* 1. Redistributions of source code must retain the above copyright
|
16 |
* notice, this list of conditions and the following disclaimer.
|
17 |
*
|
18 |
* 2. Redistributions in binary form must reproduce the above copyright
|
19 |
* notice, this list of conditions, and the following disclaimer in the
|
20 |
* documentation and/or other materials provided with the distribution.
|
21 |
*
|
22 |
* 3. All advertising materials mentioning features or use of this
|
23 |
* software must display the following acknowledgement:
|
24 |
* This product includes software developed at the University of
|
25 |
* Tennessee, Knoxville, Innovative Computing Laboratory.
|
26 |
*
|
27 |
* 4. The name of the University, the name of the Laboratory, or the
|
28 |
* names of its contributors may not be used to endorse or promote
|
29 |
* products derived from this software without specific written
|
30 |
* permission.
|
31 |
*
|
32 |
* -- Disclaimer:
|
33 |
*
|
34 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
35 |
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
36 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
37 |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
|
38 |
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
39 |
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
40 |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
41 |
* DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
42 |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
43 |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
44 |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
45 |
* ---------------------------------------------------------------------
|
46 |
*/
|
47 |
/*
|
48 |
* Include files
|
49 |
*/
|
50 |
#include "hpl.h" |
51 |
|
52 |
#ifdef STDC_HEADERS
|
53 |
void HPL_pdinfo
|
54 |
( |
55 |
HPL_T_test * TEST, |
56 |
int * NS,
|
57 |
int * N,
|
58 |
int * NBS,
|
59 |
int * NB,
|
60 |
HPL_T_ORDER * PMAPPIN, |
61 |
int * NPQS,
|
62 |
int * P,
|
63 |
int * Q,
|
64 |
int * NPFS,
|
65 |
HPL_T_FACT * PF, |
66 |
int * NBMS,
|
67 |
int * NBM,
|
68 |
int * NDVS,
|
69 |
int * NDV,
|
70 |
int * NRFS,
|
71 |
HPL_T_FACT * RF, |
72 |
int * NTPS,
|
73 |
HPL_T_TOP * TP, |
74 |
int * NDHS,
|
75 |
int * DH,
|
76 |
HPL_T_SWAP * FSWAP, |
77 |
int * TSWAP,
|
78 |
int * L1NOTRAN,
|
79 |
int * UNOTRAN,
|
80 |
int * EQUIL,
|
81 |
int * ALIGN
|
82 |
) |
83 |
#else
|
84 |
void HPL_pdinfo
|
85 |
( TEST, NS, N, NBS, NB, PMAPPIN, NPQS, P, Q, NPFS, PF, NBMS, NBM, NDVS, NDV, NRFS, RF, NTPS, TP, NDHS, DH, FSWAP, TSWAP, L1NOTRAN, UNOTRAN, EQUIL, ALIGN ) |
86 |
HPL_T_test * TEST; |
87 |
int * NS;
|
88 |
int * N;
|
89 |
int * NBS;
|
90 |
int * NB;
|
91 |
HPL_T_ORDER * PMAPPIN; |
92 |
int * NPQS;
|
93 |
int * P;
|
94 |
int * Q;
|
95 |
int * NPFS;
|
96 |
HPL_T_FACT * PF; |
97 |
int * NBMS;
|
98 |
int * NBM;
|
99 |
int * NDVS;
|
100 |
int * NDV;
|
101 |
int * NRFS;
|
102 |
HPL_T_FACT * RF; |
103 |
int * NTPS;
|
104 |
HPL_T_TOP * TP; |
105 |
int * NDHS;
|
106 |
int * DH;
|
107 |
HPL_T_SWAP * FSWAP; |
108 |
int * TSWAP;
|
109 |
int * L1NOTRAN;
|
110 |
int * UNOTRAN;
|
111 |
int * EQUIL;
|
112 |
int * ALIGN;
|
113 |
#endif
|
114 |
{ |
115 |
/*
|
116 |
* Purpose
|
117 |
* =======
|
118 |
*
|
119 |
* HPL_pdinfo reads the startup information for the various tests and
|
120 |
* transmits it to all processes.
|
121 |
*
|
122 |
* Arguments
|
123 |
* =========
|
124 |
*
|
125 |
* TEST (global output) HPL_T_test *
|
126 |
* On entry, TEST points to a testing data structure. On exit,
|
127 |
* the fields of this data structure are initialized as follows:
|
128 |
* TEST->outfp specifies the output file where the results will
|
129 |
* be printed. It is only defined and used by the process 0 of
|
130 |
* the grid. TEST->thrsh specifies the threshhold value for the
|
131 |
* test ratio. TEST->epsil is the relative machine precision of
|
132 |
* the distributed computer. Finally the test counters, kfail,
|
133 |
* kpass, kskip, ktest are initialized to zero.
|
134 |
*
|
135 |
* NS (global output) int *
|
136 |
* On exit, NS specifies the number of different problem sizes
|
137 |
* to be tested. NS is less than or equal to HPL_MAX_PARAM.
|
138 |
*
|
139 |
* N (global output) int *
|
140 |
* On entry, N is an array of dimension HPL_MAX_PARAM. On exit,
|
141 |
* the first NS entries of this array contain the problem sizes
|
142 |
* to run the code with.
|
143 |
*
|
144 |
* NBS (global output) int *
|
145 |
* On exit, NBS specifies the number of different distribution
|
146 |
* blocking factors to be tested. NBS must be less than or equal
|
147 |
* to HPL_MAX_PARAM.
|
148 |
*
|
149 |
* NB (global output) int *
|
150 |
* On exit, PMAPPIN specifies the process mapping onto the no-
|
151 |
* des of the MPI machine configuration. PMAPPIN defaults to
|
152 |
* row-major ordering.
|
153 |
*
|
154 |
* PMAPPIN (global output) HPL_T_ORDER *
|
155 |
* On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
|
156 |
* the first NBS entries of this array contain the values of the
|
157 |
* various distribution blocking factors, to run the code with.
|
158 |
*
|
159 |
* NPQS (global output) int *
|
160 |
* On exit, NPQS specifies the number of different values that
|
161 |
* can be used for P and Q, i.e., the number of process grids to
|
162 |
* run the code with. NPQS must be less than or equal to
|
163 |
* HPL_MAX_PARAM.
|
164 |
*
|
165 |
* P (global output) int *
|
166 |
* On entry, P is an array of dimension HPL_MAX_PARAM. On exit,
|
167 |
* the first NPQS entries of this array contain the values of P,
|
168 |
* the number of process rows of the NPQS grids to run the code
|
169 |
* with.
|
170 |
*
|
171 |
* Q (global output) int *
|
172 |
* On entry, Q is an array of dimension HPL_MAX_PARAM. On exit,
|
173 |
* the first NPQS entries of this array contain the values of Q,
|
174 |
* the number of process columns of the NPQS grids to run the
|
175 |
* code with.
|
176 |
*
|
177 |
* NPFS (global output) int *
|
178 |
* On exit, NPFS specifies the number of different values that
|
179 |
* can be used for PF : the panel factorization algorithm to run
|
180 |
* the code with. NPFS is less than or equal to HPL_MAX_PARAM.
|
181 |
*
|
182 |
* PF (global output) HPL_T_FACT *
|
183 |
* On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
|
184 |
* the first NPFS entries of this array contain the various
|
185 |
* panel factorization algorithms to run the code with.
|
186 |
*
|
187 |
* NBMS (global output) int *
|
188 |
* On exit, NBMS specifies the number of various recursive
|
189 |
* stopping criteria to be tested. NBMS must be less than or
|
190 |
* equal to HPL_MAX_PARAM.
|
191 |
*
|
192 |
* NBM (global output) int *
|
193 |
* On entry, NBM is an array of dimension HPL_MAX_PARAM. On
|
194 |
* exit, the first NBMS entries of this array contain the values
|
195 |
* of the various recursive stopping criteria to be tested.
|
196 |
*
|
197 |
* NDVS (global output) int *
|
198 |
* On exit, NDVS specifies the number of various numbers of
|
199 |
* panels in recursion to be tested. NDVS is less than or equal
|
200 |
* to HPL_MAX_PARAM.
|
201 |
*
|
202 |
* NDV (global output) int *
|
203 |
* On entry, NDV is an array of dimension HPL_MAX_PARAM. On
|
204 |
* exit, the first NDVS entries of this array contain the values
|
205 |
* of the various numbers of panels in recursion to be tested.
|
206 |
*
|
207 |
* NRFS (global output) int *
|
208 |
* On exit, NRFS specifies the number of different values that
|
209 |
* can be used for RF : the recursive factorization algorithm to
|
210 |
* be tested. NRFS is less than or equal to HPL_MAX_PARAM.
|
211 |
*
|
212 |
* RF (global output) HPL_T_FACT *
|
213 |
* On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
|
214 |
* the first NRFS entries of this array contain the various
|
215 |
* recursive factorization algorithms to run the code with.
|
216 |
*
|
217 |
* NTPS (global output) int *
|
218 |
* On exit, NTPS specifies the number of different values that
|
219 |
* can be used for the broadcast topologies to be tested. NTPS
|
220 |
* is less than or equal to HPL_MAX_PARAM.
|
221 |
*
|
222 |
* TP (global output) HPL_T_TOP *
|
223 |
* On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
|
224 |
* the first NTPS entries of this array contain the various
|
225 |
* broadcast (along rows) topologies to run the code with.
|
226 |
*
|
227 |
* NDHS (global output) int *
|
228 |
* On exit, NDHS specifies the number of different values that
|
229 |
* can be used for the lookahead depths to be tested. NDHS is
|
230 |
* less than or equal to HPL_MAX_PARAM.
|
231 |
*
|
232 |
* DH (global output) int *
|
233 |
* On entry, DH is an array of dimension HPL_MAX_PARAM. On
|
234 |
* exit, the first NDHS entries of this array contain the values
|
235 |
* of lookahead depths to run the code with. Such a value is at
|
236 |
* least 0 (no-lookahead) or greater than zero.
|
237 |
*
|
238 |
* FSWAP (global output) HPL_T_SWAP *
|
239 |
* On exit, FSWAP specifies the swapping algorithm to be used in
|
240 |
* all tests.
|
241 |
*
|
242 |
* TSWAP (global output) int *
|
243 |
* On exit, TSWAP specifies the swapping threshold as a number
|
244 |
* of columns when the mixed swapping algorithm was chosen.
|
245 |
*
|
246 |
* L1NOTRA (global output) int *
|
247 |
* On exit, L1NOTRAN specifies whether the upper triangle of the
|
248 |
* panels of columns should be stored in no-transposed form
|
249 |
* (L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
|
250 |
*
|
251 |
* UNOTRAN (global output) int *
|
252 |
* On exit, UNOTRAN specifies whether the panels of rows should
|
253 |
* be stored in no-transposed form (UNOTRAN=1) or transposed
|
254 |
* form (UNOTRAN=0) during their broadcast.
|
255 |
*
|
256 |
* EQUIL (global output) int *
|
257 |
* On exit, EQUIL specifies whether equilibration during the
|
258 |
* swap-broadcast of the panel of rows should be performed
|
259 |
* (EQUIL=1) or not (EQUIL=0).
|
260 |
*
|
261 |
* ALIGN (global output) int *
|
262 |
* On exit, ALIGN specifies the alignment of the dynamically
|
263 |
* allocated buffers in double precision words. ALIGN is greater
|
264 |
* than zero.
|
265 |
*
|
266 |
* ---------------------------------------------------------------------
|
267 |
*/
|
268 |
/*
|
269 |
* .. Local Variables ..
|
270 |
*/
|
271 |
char file[HPL_LINE_MAX], line[HPL_LINE_MAX],
|
272 |
auth[HPL_LINE_MAX], num [HPL_LINE_MAX]; |
273 |
FILE * infp; |
274 |
int * iwork = NULL; |
275 |
char * lineptr;
|
276 |
int error=0, fid, i, j, lwork, maxp, nprocs, |
277 |
rank, size; |
278 |
/* ..
|
279 |
* .. Executable Statements ..
|
280 |
*/
|
281 |
MPI_Comm_rank( MPI_COMM_WORLD, &rank ); |
282 |
MPI_Comm_size( MPI_COMM_WORLD, &size ); |
283 |
/*
|
284 |
* Initialize the TEST data structure with default values
|
285 |
*/
|
286 |
TEST->outfp = stderr; TEST->epsil = 2.0e-16; TEST->thrsh = 16.0; |
287 |
TEST->kfail = TEST->kpass = TEST->kskip = TEST->ktest = 0;
|
288 |
/*
|
289 |
* Process 0 reads the input data, broadcasts to other processes and
|
290 |
* writes needed information to TEST->outfp.
|
291 |
*/
|
292 |
if( rank == 0 ) |
293 |
{ |
294 |
/*
|
295 |
* Open file and skip data file header
|
296 |
*/
|
297 |
if( ( infp = fopen( "HPL.dat", "r" ) ) == NULL ) |
298 |
{ |
299 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
|
300 |
"cannot open file HPL.dat" );
|
301 |
error = 1; goto label_error; |
302 |
} |
303 |
|
304 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
305 |
(void) fgets( auth, HPL_LINE_MAX - 2, infp ); |
306 |
/*
|
307 |
* Read name and unit number for summary output file
|
308 |
*/
|
309 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
310 |
(void) sscanf( line, "%s", file ); |
311 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
312 |
(void) sscanf( line, "%s", num ); |
313 |
fid = atoi( num ); |
314 |
if ( fid == 6 ) TEST->outfp = stdout; |
315 |
else if( fid == 7 ) TEST->outfp = stderr; |
316 |
else if( ( TEST->outfp = fopen( file, "w" ) ) == NULL ) |
317 |
{ |
318 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "cannot open file %s.", |
319 |
file ); |
320 |
error = 1; goto label_error; |
321 |
} |
322 |
/*
|
323 |
* Read and check the parameter values for the tests.
|
324 |
*
|
325 |
* Problem size (>=0) (N)
|
326 |
*/
|
327 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
328 |
(void) sscanf( line, "%s", num ); *NS = atoi( num ); |
329 |
if( ( *NS < 1 ) || ( *NS > HPL_MAX_PARAM ) ) |
330 |
{ |
331 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %d", |
332 |
"Number of values of N is less than 1 or greater than",
|
333 |
HPL_MAX_PARAM ); |
334 |
error = 1; goto label_error; |
335 |
} |
336 |
|
337 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; |
338 |
for( i = 0; i < *NS; i++ ) |
339 |
{ |
340 |
(void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; |
341 |
if( ( N[ i ] = atoi( num ) ) < 0 ) |
342 |
{ |
343 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
|
344 |
"Value of N less than 0" );
|
345 |
error = 1; goto label_error; |
346 |
} |
347 |
} |
348 |
/*
|
349 |
* Block size (>=1) (NB)
|
350 |
*/
|
351 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
352 |
(void) sscanf( line, "%s", num ); *NBS = atoi( num ); |
353 |
if( ( *NBS < 1 ) || ( *NBS > HPL_MAX_PARAM ) ) |
354 |
{ |
355 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", |
356 |
"Number of values of NB is less than 1 or",
|
357 |
"greater than", HPL_MAX_PARAM );
|
358 |
error = 1; goto label_error; |
359 |
} |
360 |
|
361 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; |
362 |
for( i = 0; i < *NBS; i++ ) |
363 |
{ |
364 |
(void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; |
365 |
if( ( NB[ i ] = atoi( num ) ) < 1 ) |
366 |
{ |
367 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
|
368 |
"Value of NB less than 1" );
|
369 |
error = 1; goto label_error; |
370 |
} |
371 |
} |
372 |
/*
|
373 |
* Process grids, mapping, (>=1) (P, Q)
|
374 |
*/
|
375 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
376 |
(void) sscanf( line, "%s", num ); |
377 |
*PMAPPIN = ( atoi( num ) == 1 ? HPL_COLUMN_MAJOR : HPL_ROW_MAJOR );
|
378 |
|
379 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
380 |
(void) sscanf( line, "%s", num ); *NPQS = atoi( num ); |
381 |
if( ( *NPQS < 1 ) || ( *NPQS > HPL_MAX_PARAM ) ) |
382 |
{ |
383 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", |
384 |
"Number of values of grids is less",
|
385 |
"than 1 or greater than", HPL_MAX_PARAM );
|
386 |
error = 1; goto label_error; |
387 |
} |
388 |
|
389 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; |
390 |
for( i = 0; i < *NPQS; i++ ) |
391 |
{ |
392 |
(void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; |
393 |
if( ( P[ i ] = atoi( num ) ) < 1 ) |
394 |
{ |
395 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
|
396 |
"Value of P less than 1" );
|
397 |
error = 1; goto label_error; |
398 |
} |
399 |
} |
400 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; |
401 |
for( i = 0; i < *NPQS; i++ ) |
402 |
{ |
403 |
(void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; |
404 |
if( ( Q[ i ] = atoi( num ) ) < 1 ) |
405 |
{ |
406 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
|
407 |
"Value of Q less than 1" );
|
408 |
error = 1; goto label_error; |
409 |
} |
410 |
} |
411 |
/*
|
412 |
* Check for enough processes in machine configuration
|
413 |
*/
|
414 |
maxp = 0;
|
415 |
for( i = 0; i < *NPQS; i++ ) |
416 |
{ nprocs = P[i] * Q[i]; maxp = Mmax( maxp, nprocs ); } |
417 |
if( maxp > size )
|
418 |
{ |
419 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
|
420 |
"Need at least %d processes for these tests", maxp );
|
421 |
error = 1; goto label_error; |
422 |
} |
423 |
/*
|
424 |
* Checking threshold value (TEST->thrsh)
|
425 |
*/
|
426 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
427 |
(void) sscanf( line, "%s", num ); TEST->thrsh = atof( num ); |
428 |
/*
|
429 |
* Panel factorization algorithm (PF)
|
430 |
*/
|
431 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
432 |
(void) sscanf( line, "%s", num ); *NPFS = atoi( num ); |
433 |
if( ( *NPFS < 1 ) || ( *NPFS > HPL_MAX_PARAM ) ) |
434 |
{ |
435 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", |
436 |
"number of values of PFACT",
|
437 |
"is less than 1 or greater than", HPL_MAX_PARAM );
|
438 |
error = 1; goto label_error; |
439 |
} |
440 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; |
441 |
for( i = 0; i < *NPFS; i++ ) |
442 |
{ |
443 |
(void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; |
444 |
j = atoi( num ); |
445 |
if( j == 0 ) PF[ i ] = HPL_LEFT_LOOKING; |
446 |
else if( j == 1 ) PF[ i ] = HPL_CROUT; |
447 |
else if( j == 2 ) PF[ i ] = HPL_RIGHT_LOOKING; |
448 |
else PF[ i ] = HPL_RIGHT_LOOKING;
|
449 |
} |
450 |
/*
|
451 |
* Recursive stopping criterium (>=1) (NBM)
|
452 |
*/
|
453 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
454 |
(void) sscanf( line, "%s", num ); *NBMS = atoi( num ); |
455 |
if( ( *NBMS < 1 ) || ( *NBMS > HPL_MAX_PARAM ) ) |
456 |
{ |
457 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", |
458 |
"Number of values of NBMIN",
|
459 |
"is less than 1 or greater than", HPL_MAX_PARAM );
|
460 |
error = 1; goto label_error; |
461 |
} |
462 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; |
463 |
for( i = 0; i < *NBMS; i++ ) |
464 |
{ |
465 |
(void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; |
466 |
if( ( NBM[ i ] = atoi( num ) ) < 1 ) |
467 |
{ |
468 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
|
469 |
"Value of NBMIN less than 1" );
|
470 |
error = 1; goto label_error; |
471 |
} |
472 |
} |
473 |
/*
|
474 |
* Number of panels in recursion (>=2) (NDV)
|
475 |
*/
|
476 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
477 |
(void) sscanf( line, "%s", num ); *NDVS = atoi( num ); |
478 |
if( ( *NDVS < 1 ) || ( *NDVS > HPL_MAX_PARAM ) ) |
479 |
{ |
480 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", |
481 |
"Number of values of NDIV",
|
482 |
"is less than 1 or greater than", HPL_MAX_PARAM );
|
483 |
error = 1; goto label_error; |
484 |
} |
485 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; |
486 |
for( i = 0; i < *NDVS; i++ ) |
487 |
{ |
488 |
(void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; |
489 |
if( ( NDV[ i ] = atoi( num ) ) < 2 ) |
490 |
{ |
491 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
|
492 |
"Value of NDIV less than 2" );
|
493 |
error = 1; goto label_error; |
494 |
} |
495 |
} |
496 |
/*
|
497 |
* Recursive panel factorization (RF)
|
498 |
*/
|
499 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
500 |
(void) sscanf( line, "%s", num ); *NRFS = atoi( num ); |
501 |
if( ( *NRFS < 1 ) || ( *NRFS > HPL_MAX_PARAM ) ) |
502 |
{ |
503 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", |
504 |
"Number of values of RFACT",
|
505 |
"is less than 1 or greater than", HPL_MAX_PARAM );
|
506 |
error = 1; goto label_error; |
507 |
} |
508 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; |
509 |
for( i = 0; i < *NRFS; i++ ) |
510 |
{ |
511 |
(void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; |
512 |
j = atoi( num ); |
513 |
if( j == 0 ) RF[ i ] = HPL_LEFT_LOOKING; |
514 |
else if( j == 1 ) RF[ i ] = HPL_CROUT; |
515 |
else if( j == 2 ) RF[ i ] = HPL_RIGHT_LOOKING; |
516 |
else RF[ i ] = HPL_RIGHT_LOOKING;
|
517 |
} |
518 |
/*
|
519 |
* Broadcast topology (TP) (0=rg, 1=2rg, 2=rgM, 3=2rgM, 4=L)
|
520 |
*/
|
521 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
522 |
(void) sscanf( line, "%s", num ); *NTPS = atoi( num ); |
523 |
if( ( *NTPS < 1 ) || ( *NTPS > HPL_MAX_PARAM ) ) |
524 |
{ |
525 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", |
526 |
"Number of values of BCAST",
|
527 |
"is less than 1 or greater than", HPL_MAX_PARAM );
|
528 |
error = 1; goto label_error; |
529 |
} |
530 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; |
531 |
for( i = 0; i < *NTPS; i++ ) |
532 |
{ |
533 |
(void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; |
534 |
j = atoi( num ); |
535 |
if( j == 0 ) TP[ i ] = HPL_1RING; |
536 |
else if( j == 1 ) TP[ i ] = HPL_1RING_M; |
537 |
else if( j == 2 ) TP[ i ] = HPL_2RING; |
538 |
else if( j == 3 ) TP[ i ] = HPL_2RING_M; |
539 |
else if( j == 4 ) TP[ i ] = HPL_BLONG; |
540 |
else if( j == 5 ) TP[ i ] = HPL_BLONG_M; |
541 |
else TP[ i ] = HPL_1RING_M;
|
542 |
} |
543 |
/*
|
544 |
* Lookahead depth (>=0) (NDH)
|
545 |
*/
|
546 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
547 |
(void) sscanf( line, "%s", num ); *NDHS = atoi( num ); |
548 |
if( ( *NDHS < 1 ) || ( *NDHS > HPL_MAX_PARAM ) ) |
549 |
{ |
550 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", |
551 |
"Number of values of DEPTH",
|
552 |
"is less than 1 or greater than", HPL_MAX_PARAM );
|
553 |
error = 1; goto label_error; |
554 |
} |
555 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; |
556 |
for( i = 0; i < *NDHS; i++ ) |
557 |
{ |
558 |
(void) sscanf( lineptr, "%s", num ); |
559 |
lineptr += strlen( num ) + 1;
|
560 |
if( ( DH[ i ] = atoi( num ) ) < 0 ) |
561 |
{ |
562 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
|
563 |
"Value of DEPTH less than 0" );
|
564 |
error = 1; goto label_error; |
565 |
} |
566 |
} |
567 |
/*
|
568 |
* Swapping algorithm (0,1 or 2) (FSWAP)
|
569 |
*/
|
570 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
571 |
(void) sscanf( line, "%s", num ); j = atoi( num ); |
572 |
if( j == 0 ) *FSWAP = HPL_SWAP00; |
573 |
else if( j == 1 ) *FSWAP = HPL_SWAP01; |
574 |
else if( j == 2 ) *FSWAP = HPL_SW_MIX; |
575 |
else *FSWAP = HPL_SWAP01;
|
576 |
/*
|
577 |
* Swapping threshold (>=0) (TSWAP)
|
578 |
*/
|
579 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
580 |
(void) sscanf( line, "%s", num ); *TSWAP = atoi( num ); |
581 |
if( *TSWAP <= 0 ) *TSWAP = 0; |
582 |
/*
|
583 |
* L1 in (no-)transposed form (0 or 1)
|
584 |
*/
|
585 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
586 |
(void) sscanf( line, "%s", num ); *L1NOTRAN = atoi( num ); |
587 |
if( ( *L1NOTRAN != 0 ) && ( *L1NOTRAN != 1 ) ) *L1NOTRAN = 0; |
588 |
/*
|
589 |
* U in (no-)transposed form (0 or 1)
|
590 |
*/
|
591 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
592 |
(void) sscanf( line, "%s", num ); *UNOTRAN = atoi( num ); |
593 |
if( ( *UNOTRAN != 0 ) && ( *UNOTRAN != 1 ) ) *UNOTRAN = 0; |
594 |
/*
|
595 |
* Equilibration (0=no, 1=yes)
|
596 |
*/
|
597 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
598 |
(void) sscanf( line, "%s", num ); *EQUIL = atoi( num ); |
599 |
if( ( *EQUIL != 0 ) && ( *EQUIL != 1 ) ) *EQUIL = 1; |
600 |
/*
|
601 |
* Memory alignment in bytes (> 0) (ALIGN)
|
602 |
*/
|
603 |
(void) fgets( line, HPL_LINE_MAX - 2, infp ); |
604 |
(void) sscanf( line, "%s", num ); *ALIGN = atoi( num ); |
605 |
if( *ALIGN <= 0 ) *ALIGN = 4; |
606 |
/*
|
607 |
* Close input file
|
608 |
*/
|
609 |
label_error:
|
610 |
(void) fclose( infp );
|
611 |
} |
612 |
else { TEST->outfp = NULL; } |
613 |
/*
|
614 |
* Check for error on reading input file
|
615 |
*/
|
616 |
(void) HPL_all_reduce( (void *)(&error), 1, HPL_INT, HPL_max, |
617 |
MPI_COMM_WORLD ); |
618 |
if( error )
|
619 |
{ |
620 |
if( rank == 0 ) |
621 |
HPL_pwarn( stderr, __LINE__, "HPL_pdinfo",
|
622 |
"Illegal input in file HPL.dat. Exiting ..." );
|
623 |
MPI_Finalize(); |
624 |
#ifdef HPL_CALL_VSIPL
|
625 |
(void) vsip_finalize( NULL ); |
626 |
#endif
|
627 |
exit( 1 );
|
628 |
} |
629 |
/*
|
630 |
* Compute and broadcast machine epsilon
|
631 |
*/
|
632 |
TEST->epsil = HPL_pdlamch( MPI_COMM_WORLD, HPL_MACH_EPS ); |
633 |
/*
|
634 |
* Pack information arrays and broadcast
|
635 |
*/
|
636 |
(void) HPL_broadcast( (void *)(&(TEST->thrsh)), 1, HPL_DOUBLE, 0, |
637 |
MPI_COMM_WORLD ); |
638 |
/*
|
639 |
* Broadcast array sizes
|
640 |
*/
|
641 |
iwork = (int *)malloc( (size_t)(15) * sizeof( int ) ); |
642 |
if( rank == 0 ) |
643 |
{ |
644 |
iwork[ 0] = *NS; iwork[ 1] = *NBS; |
645 |
iwork[ 2] = ( *PMAPPIN == HPL_ROW_MAJOR ? 0 : 1 ); |
646 |
iwork[ 3] = *NPQS; iwork[ 4] = *NPFS; iwork[ 5] = *NBMS; |
647 |
iwork[ 6] = *NDVS; iwork[ 7] = *NRFS; iwork[ 8] = *NTPS; |
648 |
iwork[ 9] = *NDHS; iwork[10] = *TSWAP; iwork[11] = *L1NOTRAN; |
649 |
iwork[12] = *UNOTRAN; iwork[13] = *EQUIL; iwork[14] = *ALIGN; |
650 |
} |
651 |
(void) HPL_broadcast( (void *)iwork, 15, HPL_INT, 0, MPI_COMM_WORLD ); |
652 |
if( rank != 0 ) |
653 |
{ |
654 |
*NS = iwork[ 0]; *NBS = iwork[ 1]; |
655 |
*PMAPPIN = ( iwork[ 2] == 0 ? HPL_ROW_MAJOR : HPL_COLUMN_MAJOR ); |
656 |
*NPQS = iwork[ 3]; *NPFS = iwork[ 4]; *NBMS = iwork[ 5]; |
657 |
*NDVS = iwork[ 6]; *NRFS = iwork[ 7]; *NTPS = iwork[ 8]; |
658 |
*NDHS = iwork[ 9]; *TSWAP = iwork[10]; *L1NOTRAN = iwork[11]; |
659 |
*UNOTRAN = iwork[12]; *EQUIL = iwork[13]; *ALIGN = iwork[14]; |
660 |
} |
661 |
if( iwork ) free( iwork );
|
662 |
/*
|
663 |
* Pack information arrays and broadcast
|
664 |
*/
|
665 |
lwork = (*NS) + (*NBS) + 2 * (*NPQS) + (*NPFS) + (*NBMS) +
|
666 |
(*NDVS) + (*NRFS) + (*NTPS) + (*NDHS) + 1;
|
667 |
iwork = (int *)malloc( (size_t)(lwork) * sizeof( int ) ); |
668 |
if( rank == 0 ) |
669 |
{ |
670 |
j = 0;
|
671 |
for( i = 0; i < *NS; i++ ) { iwork[j] = N [i]; j++; } |
672 |
for( i = 0; i < *NBS; i++ ) { iwork[j] = NB[i]; j++; } |
673 |
for( i = 0; i < *NPQS; i++ ) { iwork[j] = P [i]; j++; } |
674 |
for( i = 0; i < *NPQS; i++ ) { iwork[j] = Q [i]; j++; } |
675 |
for( i = 0; i < *NPFS; i++ ) |
676 |
{ |
677 |
if( PF[i] == HPL_LEFT_LOOKING ) iwork[j] = 0; |
678 |
else if( PF[i] == HPL_CROUT ) iwork[j] = 1; |
679 |
else if( PF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2; |
680 |
j++; |
681 |
} |
682 |
for( i = 0; i < *NBMS; i++ ) { iwork[j] = NBM[i]; j++; } |
683 |
for( i = 0; i < *NDVS; i++ ) { iwork[j] = NDV[i]; j++; } |
684 |
for( i = 0; i < *NRFS; i++ ) |
685 |
{ |
686 |
if( RF[i] == HPL_LEFT_LOOKING ) iwork[j] = 0; |
687 |
else if( RF[i] == HPL_CROUT ) iwork[j] = 1; |
688 |
else if( RF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2; |
689 |
j++; |
690 |
} |
691 |
for( i = 0; i < *NTPS; i++ ) |
692 |
{ |
693 |
if( TP[i] == HPL_1RING ) iwork[j] = 0; |
694 |
else if( TP[i] == HPL_1RING_M ) iwork[j] = 1; |
695 |
else if( TP[i] == HPL_2RING ) iwork[j] = 2; |
696 |
else if( TP[i] == HPL_2RING_M ) iwork[j] = 3; |
697 |
else if( TP[i] == HPL_BLONG ) iwork[j] = 4; |
698 |
else if( TP[i] == HPL_BLONG_M ) iwork[j] = 5; |
699 |
j++; |
700 |
} |
701 |
for( i = 0; i < *NDHS; i++ ) { iwork[j] = DH[i]; j++; } |
702 |
|
703 |
if( *FSWAP == HPL_SWAP00 ) iwork[j] = 0; |
704 |
else if( *FSWAP == HPL_SWAP01 ) iwork[j] = 1; |
705 |
else if( *FSWAP == HPL_SW_MIX ) iwork[j] = 2; |
706 |
j++; |
707 |
} |
708 |
(void) HPL_broadcast( (void*)iwork, lwork, HPL_INT, 0, |
709 |
MPI_COMM_WORLD ); |
710 |
if( rank != 0 ) |
711 |
{ |
712 |
j = 0;
|
713 |
for( i = 0; i < *NS; i++ ) { N [i] = iwork[j]; j++; } |
714 |
for( i = 0; i < *NBS; i++ ) { NB[i] = iwork[j]; j++; } |
715 |
for( i = 0; i < *NPQS; i++ ) { P [i] = iwork[j]; j++; } |
716 |
for( i = 0; i < *NPQS; i++ ) { Q [i] = iwork[j]; j++; } |
717 |
|
718 |
for( i = 0; i < *NPFS; i++ ) |
719 |
{ |
720 |
if( iwork[j] == 0 ) PF[i] = HPL_LEFT_LOOKING; |
721 |
else if( iwork[j] == 1 ) PF[i] = HPL_CROUT; |
722 |
else if( iwork[j] == 2 ) PF[i] = HPL_RIGHT_LOOKING; |
723 |
j++; |
724 |
} |
725 |
for( i = 0; i < *NBMS; i++ ) { NBM[i] = iwork[j]; j++; } |
726 |
for( i = 0; i < *NDVS; i++ ) { NDV[i] = iwork[j]; j++; } |
727 |
for( i = 0; i < *NRFS; i++ ) |
728 |
{ |
729 |
if( iwork[j] == 0 ) RF[i] = HPL_LEFT_LOOKING; |
730 |
else if( iwork[j] == 1 ) RF[i] = HPL_CROUT; |
731 |
else if( iwork[j] == 2 ) RF[i] = HPL_RIGHT_LOOKING; |
732 |
j++; |
733 |
} |
734 |
for( i = 0; i < *NTPS; i++ ) |
735 |
{ |
736 |
if( iwork[j] == 0 ) TP[i] = HPL_1RING; |
737 |
else if( iwork[j] == 1 ) TP[i] = HPL_1RING_M; |
738 |
else if( iwork[j] == 2 ) TP[i] = HPL_2RING; |
739 |
else if( iwork[j] == 3 ) TP[i] = HPL_2RING_M; |
740 |
else if( iwork[j] == 4 ) TP[i] = HPL_BLONG; |
741 |
else if( iwork[j] == 5 ) TP[i] = HPL_BLONG_M; |
742 |
j++; |
743 |
} |
744 |
for( i = 0; i < *NDHS; i++ ) { DH[i] = iwork[j]; j++; } |
745 |
|
746 |
if( iwork[j] == 0 ) *FSWAP = HPL_SWAP00; |
747 |
else if( iwork[j] == 1 ) *FSWAP = HPL_SWAP01; |
748 |
else if( iwork[j] == 2 ) *FSWAP = HPL_SW_MIX; |
749 |
j++; |
750 |
} |
751 |
if( iwork ) free( iwork );
|
752 |
/*
|
753 |
* regurgitate input
|
754 |
*/
|
755 |
if( rank == 0 ) |
756 |
{ |
757 |
HPL_fprintf( TEST->outfp, "%s%s\n",
|
758 |
"========================================",
|
759 |
"========================================" );
|
760 |
HPL_fprintf( TEST->outfp, "%s%s\n",
|
761 |
"HPLinpack 2.0 -- High-Performance Linpack benchmark -- ",
|
762 |
" September 10, 2008" );
|
763 |
HPL_fprintf( TEST->outfp, "%s%s\n",
|
764 |
"Written by A. Petitet and R. Clint Whaley, ",
|
765 |
"Innovative Computing Laboratory, UTK" );
|
766 |
HPL_fprintf( TEST->outfp, "%s%s\n",
|
767 |
"Modified by Piotr Luszczek, ",
|
768 |
"Innovative Computing Laboratory, UTK" );
|
769 |
HPL_fprintf( TEST->outfp, "%s%s\n",
|
770 |
"Modified by Julien Langou, ",
|
771 |
"University of Colorado Denver");
|
772 |
HPL_fprintf( TEST->outfp, "%s%s\n",
|
773 |
"========================================",
|
774 |
"========================================" );
|
775 |
|
776 |
HPL_fprintf( TEST->outfp, "\n%s\n",
|
777 |
"An explanation of the input/output parameters follows:" );
|
778 |
HPL_fprintf( TEST->outfp, "%s\n",
|
779 |
"T/V : Wall time / encoded variant." );
|
780 |
HPL_fprintf( TEST->outfp, "%s\n",
|
781 |
"N : The order of the coefficient matrix A." );
|
782 |
HPL_fprintf( TEST->outfp, "%s\n",
|
783 |
"NB : The partitioning blocking factor." );
|
784 |
HPL_fprintf( TEST->outfp, "%s\n",
|
785 |
"P : The number of process rows." );
|
786 |
HPL_fprintf( TEST->outfp, "%s\n",
|
787 |
"Q : The number of process columns." );
|
788 |
HPL_fprintf( TEST->outfp, "%s\n",
|
789 |
"Time : Time in seconds to solve the linear system." );
|
790 |
HPL_fprintf( TEST->outfp, "%s\n\n",
|
791 |
"Gflops : Rate of execution for solving the linear system." );
|
792 |
HPL_fprintf( TEST->outfp, "%s\n",
|
793 |
"The following parameter values will be used:" );
|
794 |
/*
|
795 |
* Problem size
|
796 |
*/
|
797 |
HPL_fprintf( TEST->outfp, "\nN :" );
|
798 |
for( i = 0; i < Mmin( 8, *NS ); i++ ) |
799 |
HPL_fprintf( TEST->outfp, "%8d ", N[i] );
|
800 |
if( *NS > 8 ) |
801 |
{ |
802 |
HPL_fprintf( TEST->outfp, "\n " );
|
803 |
for( i = 8; i < Mmin( 16, *NS ); i++ ) |
804 |
HPL_fprintf( TEST->outfp, "%8d ", N[i] );
|
805 |
if( *NS > 16 ) |
806 |
{ |
807 |
HPL_fprintf( TEST->outfp, "\n " );
|
808 |
for( i = 16; i < *NS; i++ ) |
809 |
HPL_fprintf( TEST->outfp, "%8d ", N[i] );
|
810 |
} |
811 |
} |
812 |
/*
|
813 |
* Distribution blocking factor
|
814 |
*/
|
815 |
HPL_fprintf( TEST->outfp, "\nNB :" );
|
816 |
for( i = 0; i < Mmin( 8, *NBS ); i++ ) |
817 |
HPL_fprintf( TEST->outfp, "%8d ", NB[i] );
|
818 |
if( *NBS > 8 ) |
819 |
{ |
820 |
HPL_fprintf( TEST->outfp, "\n " );
|
821 |
for( i = 8; i < Mmin( 16, *NBS ); i++ ) |
822 |
HPL_fprintf( TEST->outfp, "%8d ", NB[i] );
|
823 |
if( *NBS > 16 ) |
824 |
{ |
825 |
HPL_fprintf( TEST->outfp, "\n " );
|
826 |
for( i = 16; i < *NBS; i++ ) |
827 |
HPL_fprintf( TEST->outfp, "%8d ", NB[i] );
|
828 |
} |
829 |
} |
830 |
/*
|
831 |
* Process mapping
|
832 |
*/
|
833 |
HPL_fprintf( TEST->outfp, "\nPMAP :" );
|
834 |
if( *PMAPPIN == HPL_ROW_MAJOR )
|
835 |
HPL_fprintf( TEST->outfp, " Row-major process mapping" );
|
836 |
else if( *PMAPPIN == HPL_COLUMN_MAJOR ) |
837 |
HPL_fprintf( TEST->outfp, " Column-major process mapping" );
|
838 |
/*
|
839 |
* Process grid
|
840 |
*/
|
841 |
HPL_fprintf( TEST->outfp, "\nP :" );
|
842 |
for( i = 0; i < Mmin( 8, *NPQS ); i++ ) |
843 |
HPL_fprintf( TEST->outfp, "%8d ", P[i] );
|
844 |
if( *NPQS > 8 ) |
845 |
{ |
846 |
HPL_fprintf( TEST->outfp, "\n " );
|
847 |
for( i = 8; i < Mmin( 16, *NPQS ); i++ ) |
848 |
HPL_fprintf( TEST->outfp, "%8d ", P[i] );
|
849 |
if( *NPQS > 16 ) |
850 |
{ |
851 |
HPL_fprintf( TEST->outfp, "\n " );
|
852 |
for( i = 16; i < *NPQS; i++ ) |
853 |
HPL_fprintf( TEST->outfp, "%8d ", P[i] );
|
854 |
} |
855 |
} |
856 |
HPL_fprintf( TEST->outfp, "\nQ :" );
|
857 |
for( i = 0; i < Mmin( 8, *NPQS ); i++ ) |
858 |
HPL_fprintf( TEST->outfp, "%8d ", Q[i] );
|
859 |
if( *NPQS > 8 ) |
860 |
{ |
861 |
HPL_fprintf( TEST->outfp, "\n " );
|
862 |
for( i = 8; i < Mmin( 16, *NPQS ); i++ ) |
863 |
HPL_fprintf( TEST->outfp, "%8d ", Q[i] );
|
864 |
if( *NPQS > 16 ) |
865 |
{ |
866 |
HPL_fprintf( TEST->outfp, "\n " );
|
867 |
for( i = 16; i < *NPQS; i++ ) |
868 |
HPL_fprintf( TEST->outfp, "%8d ", Q[i] );
|
869 |
} |
870 |
} |
871 |
/*
|
872 |
* Panel Factorization
|
873 |
*/
|
874 |
HPL_fprintf( TEST->outfp, "\nPFACT :" );
|
875 |
for( i = 0; i < Mmin( 8, *NPFS ); i++ ) |
876 |
{ |
877 |
if( PF[i] == HPL_LEFT_LOOKING )
|
878 |
HPL_fprintf( TEST->outfp, " Left " );
|
879 |
else if( PF[i] == HPL_CROUT ) |
880 |
HPL_fprintf( TEST->outfp, " Crout " );
|
881 |
else if( PF[i] == HPL_RIGHT_LOOKING ) |
882 |
HPL_fprintf( TEST->outfp, " Right " );
|
883 |
} |
884 |
if( *NPFS > 8 ) |
885 |
{ |
886 |
HPL_fprintf( TEST->outfp, "\n " );
|
887 |
for( i = 8; i < Mmin( 16, *NPFS ); i++ ) |
888 |
{ |
889 |
if( PF[i] == HPL_LEFT_LOOKING )
|
890 |
HPL_fprintf( TEST->outfp, " Left " );
|
891 |
else if( PF[i] == HPL_CROUT ) |
892 |
HPL_fprintf( TEST->outfp, " Crout " );
|
893 |
else if( PF[i] == HPL_RIGHT_LOOKING ) |
894 |
HPL_fprintf( TEST->outfp, " Right " );
|
895 |
} |
896 |
if( *NPFS > 16 ) |
897 |
{ |
898 |
HPL_fprintf( TEST->outfp, "\n " );
|
899 |
for( i = 16; i < *NPFS; i++ ) |
900 |
{ |
901 |
if( PF[i] == HPL_LEFT_LOOKING )
|
902 |
HPL_fprintf( TEST->outfp, " Left " );
|
903 |
else if( PF[i] == HPL_CROUT ) |
904 |
HPL_fprintf( TEST->outfp, " Crout " );
|
905 |
else if( PF[i] == HPL_RIGHT_LOOKING ) |
906 |
HPL_fprintf( TEST->outfp, " Right " );
|
907 |
} |
908 |
} |
909 |
} |
910 |
/*
|
911 |
* Recursive stopping criterium
|
912 |
*/
|
913 |
HPL_fprintf( TEST->outfp, "\nNBMIN :" );
|
914 |
for( i = 0; i < Mmin( 8, *NBMS ); i++ ) |
915 |
HPL_fprintf( TEST->outfp, "%8d ", NBM[i] );
|
916 |
if( *NBMS > 8 ) |
917 |
{ |
918 |
HPL_fprintf( TEST->outfp, "\n " );
|
919 |
for( i = 8; i < Mmin( 16, *NBMS ); i++ ) |
920 |
HPL_fprintf( TEST->outfp, "%8d ", NBM[i] );
|
921 |
if( *NBMS > 16 ) |
922 |
{ |
923 |
HPL_fprintf( TEST->outfp, "\n " );
|
924 |
for( i = 16; i < *NBMS; i++ ) |
925 |
HPL_fprintf( TEST->outfp, "%8d ", NBM[i] );
|
926 |
} |
927 |
} |
928 |
/*
|
929 |
* Number of panels in recursion
|
930 |
*/
|
931 |
HPL_fprintf( TEST->outfp, "\nNDIV :" );
|
932 |
for( i = 0; i < Mmin( 8, *NDVS ); i++ ) |
933 |
HPL_fprintf( TEST->outfp, "%8d ", NDV[i] );
|
934 |
if( *NDVS > 8 ) |
935 |
{ |
936 |
HPL_fprintf( TEST->outfp, "\n " );
|
937 |
for( i = 8; i < Mmin( 16, *NDVS ); i++ ) |
938 |
HPL_fprintf( TEST->outfp, "%8d ", NDV[i] );
|
939 |
if( *NDVS > 16 ) |
940 |
{ |
941 |
HPL_fprintf( TEST->outfp, "\n " );
|
942 |
for( i = 16; i < *NDVS; i++ ) |
943 |
HPL_fprintf( TEST->outfp, "%8d ", NDV[i] );
|
944 |
} |
945 |
} |
946 |
/*
|
947 |
* Recursive Factorization
|
948 |
*/
|
949 |
HPL_fprintf( TEST->outfp, "\nRFACT :" );
|
950 |
for( i = 0; i < Mmin( 8, *NRFS ); i++ ) |
951 |
{ |
952 |
if( RF[i] == HPL_LEFT_LOOKING )
|
953 |
HPL_fprintf( TEST->outfp, " Left " );
|
954 |
else if( RF[i] == HPL_CROUT ) |
955 |
HPL_fprintf( TEST->outfp, " Crout " );
|
956 |
else if( RF[i] == HPL_RIGHT_LOOKING ) |
957 |
HPL_fprintf( TEST->outfp, " Right " );
|
958 |
} |
959 |
if( *NRFS > 8 ) |
960 |
{ |
961 |
HPL_fprintf( TEST->outfp, "\n " );
|
962 |
for( i = 8; i < Mmin( 16, *NRFS ); i++ ) |
963 |
{ |
964 |
if( RF[i] == HPL_LEFT_LOOKING )
|
965 |
HPL_fprintf( TEST->outfp, " Left " );
|
966 |
else if( RF[i] == HPL_CROUT ) |
967 |
HPL_fprintf( TEST->outfp, " Crout " );
|
968 |
else if( RF[i] == HPL_RIGHT_LOOKING ) |
969 |
HPL_fprintf( TEST->outfp, " Right " );
|
970 |
} |
971 |
if( *NRFS > 16 ) |
972 |
{ |
973 |
HPL_fprintf( TEST->outfp, "\n " );
|
974 |
for( i = 16; i < *NRFS; i++ ) |
975 |
{ |
976 |
if( RF[i] == HPL_LEFT_LOOKING )
|
977 |
HPL_fprintf( TEST->outfp, " Left " );
|
978 |
else if( RF[i] == HPL_CROUT ) |
979 |
HPL_fprintf( TEST->outfp, " Crout " );
|
980 |
else if( RF[i] == HPL_RIGHT_LOOKING ) |
981 |
HPL_fprintf( TEST->outfp, " Right " );
|
982 |
} |
983 |
} |
984 |
} |
985 |
/*
|
986 |
* Broadcast topology
|
987 |
*/
|
988 |
HPL_fprintf( TEST->outfp, "\nBCAST :" );
|
989 |
for( i = 0; i < Mmin( 8, *NTPS ); i++ ) |
990 |
{ |
991 |
if( TP[i] == HPL_1RING )
|
992 |
HPL_fprintf( TEST->outfp, " 1ring " );
|
993 |
else if( TP[i] == HPL_1RING_M ) |
994 |
HPL_fprintf( TEST->outfp, " 1ringM " );
|
995 |
else if( TP[i] == HPL_2RING ) |
996 |
HPL_fprintf( TEST->outfp, " 2ring " );
|
997 |
else if( TP[i] == HPL_2RING_M ) |
998 |
HPL_fprintf( TEST->outfp, " 2ringM " );
|
999 |
else if( TP[i] == HPL_BLONG ) |
1000 |
HPL_fprintf( TEST->outfp, " Blong " );
|
1001 |
else if( TP[i] == HPL_BLONG_M ) |
1002 |
HPL_fprintf( TEST->outfp, " BlongM " );
|
1003 |
} |
1004 |
if( *NTPS > 8 ) |
1005 |
{ |
1006 |
HPL_fprintf( TEST->outfp, "\n " );
|
1007 |
for( i = 8; i < Mmin( 16, *NTPS ); i++ ) |
1008 |
{ |
1009 |
if( TP[i] == HPL_1RING )
|
1010 |
HPL_fprintf( TEST->outfp, " 1ring " );
|
1011 |
else if( TP[i] == HPL_1RING_M ) |
1012 |
HPL_fprintf( TEST->outfp, " 1ringM " );
|
1013 |
else if( TP[i] == HPL_2RING ) |
1014 |
HPL_fprintf( TEST->outfp, " 2ring " );
|
1015 |
else if( TP[i] == HPL_2RING_M ) |
1016 |
HPL_fprintf( TEST->outfp, " 2ringM " );
|
1017 |
else if( TP[i] == HPL_BLONG ) |
1018 |
HPL_fprintf( TEST->outfp, " Blong " );
|
1019 |
else if( TP[i] == HPL_BLONG_M ) |
1020 |
HPL_fprintf( TEST->outfp, " BlongM " );
|
1021 |
} |
1022 |
if( *NTPS > 16 ) |
1023 |
{ |
1024 |
HPL_fprintf( TEST->outfp, "\n " );
|
1025 |
for( i = 16; i < *NTPS; i++ ) |
1026 |
{ |
1027 |
if( TP[i] == HPL_1RING )
|
1028 |
HPL_fprintf( TEST->outfp, " 1ring " );
|
1029 |
else if( TP[i] == HPL_1RING_M ) |
1030 |
HPL_fprintf( TEST->outfp, " 1ringM " );
|
1031 |
else if( TP[i] == HPL_2RING ) |
1032 |
HPL_fprintf( TEST->outfp, " 2ring " );
|
1033 |
else if( TP[i] == HPL_2RING_M ) |
1034 |
HPL_fprintf( TEST->outfp, " 2ringM " );
|
1035 |
else if( TP[i] == HPL_BLONG ) |
1036 |
HPL_fprintf( TEST->outfp, " Blong " );
|
1037 |
else if( TP[i] == HPL_BLONG_M ) |
1038 |
HPL_fprintf( TEST->outfp, " BlongM " );
|
1039 |
} |
1040 |
} |
1041 |
} |
1042 |
/*
|
1043 |
* Lookahead depths
|
1044 |
*/
|
1045 |
HPL_fprintf( TEST->outfp, "\nDEPTH :" );
|
1046 |
for( i = 0; i < Mmin( 8, *NDHS ); i++ ) |
1047 |
HPL_fprintf( TEST->outfp, "%8d ", DH[i] );
|
1048 |
if( *NDHS > 8 ) |
1049 |
{ |
1050 |
HPL_fprintf( TEST->outfp, "\n " );
|
1051 |
for( i = 8; i < Mmin( 16, *NDHS ); i++ ) |
1052 |
HPL_fprintf( TEST->outfp, "%8d ", DH[i] );
|
1053 |
if( *NDHS > 16 ) |
1054 |
{ |
1055 |
HPL_fprintf( TEST->outfp, "\n " );
|
1056 |
for( i = 16; i < *NDHS; i++ ) |
1057 |
HPL_fprintf( TEST->outfp, "%8d ", DH[i] );
|
1058 |
} |
1059 |
} |
1060 |
/*
|
1061 |
* Swapping algorithm
|
1062 |
*/
|
1063 |
HPL_fprintf( TEST->outfp, "\nSWAP :" );
|
1064 |
if( *FSWAP == HPL_SWAP00 )
|
1065 |
HPL_fprintf( TEST->outfp, " Binary-exchange" );
|
1066 |
else if( *FSWAP == HPL_SWAP01 ) |
1067 |
HPL_fprintf( TEST->outfp, " Spread-roll (long)" );
|
1068 |
else if( *FSWAP == HPL_SW_MIX ) |
1069 |
HPL_fprintf( TEST->outfp, " Mix (threshold = %d)", *TSWAP );
|
1070 |
/*
|
1071 |
* L1 storage form
|
1072 |
*/
|
1073 |
HPL_fprintf( TEST->outfp, "\nL1 :" );
|
1074 |
if( *L1NOTRAN != 0 ) |
1075 |
HPL_fprintf( TEST->outfp, " no-transposed form" );
|
1076 |
else
|
1077 |
HPL_fprintf( TEST->outfp, " transposed form" );
|
1078 |
/*
|
1079 |
* U storage form
|
1080 |
*/
|
1081 |
HPL_fprintf( TEST->outfp, "\nU :" );
|
1082 |
if( *UNOTRAN != 0 ) |
1083 |
HPL_fprintf( TEST->outfp, " no-transposed form" );
|
1084 |
else
|
1085 |
HPL_fprintf( TEST->outfp, " transposed form" );
|
1086 |
/*
|
1087 |
* Equilibration
|
1088 |
*/
|
1089 |
HPL_fprintf( TEST->outfp, "\nEQUIL :" );
|
1090 |
if( *EQUIL != 0 ) |
1091 |
HPL_fprintf( TEST->outfp, " yes" );
|
1092 |
else
|
1093 |
HPL_fprintf( TEST->outfp, " no" );
|
1094 |
/*
|
1095 |
* Alignment
|
1096 |
*/
|
1097 |
HPL_fprintf( TEST->outfp, "\nALIGN : %d double precision words",
|
1098 |
*ALIGN ); |
1099 |
|
1100 |
HPL_fprintf( TEST->outfp, "\n\n" );
|
1101 |
/*
|
1102 |
* For testing only
|
1103 |
*/
|
1104 |
if( TEST->thrsh > HPL_rzero )
|
1105 |
{ |
1106 |
HPL_fprintf( TEST->outfp, "%s%s\n\n",
|
1107 |
"----------------------------------------",
|
1108 |
"----------------------------------------" );
|
1109 |
HPL_fprintf( TEST->outfp, "%s\n",
|
1110 |
"- The matrix A is randomly generated for each test." );
|
1111 |
HPL_fprintf( TEST->outfp, "%s\n",
|
1112 |
"- The following scaled residual check will be computed:" );
|
1113 |
HPL_fprintf( TEST->outfp, "%s\n",
|
1114 |
" ||Ax-b||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )" );
|
1115 |
HPL_fprintf( TEST->outfp, "%s %21.6e\n",
|
1116 |
"- The relative machine precision (eps) is taken to be ",
|
1117 |
TEST->epsil ); |
1118 |
HPL_fprintf( TEST->outfp, "%s %11.1f\n\n",
|
1119 |
"- Computational tests pass if scaled residuals are less than ",
|
1120 |
TEST->thrsh ); |
1121 |
} |
1122 |
} |
1123 |
/*
|
1124 |
* End of HPL_pdinfo
|
1125 |
*/
|
1126 |
} |