root / include / hpl_pauxil.h @ 9
Historique | Voir | Annoter | Télécharger (19,13 ko)
1 |
/*
|
---|---|
2 |
* -- High Performance Computing Linpack Benchmark (HPL)
|
3 |
* HPL - 2.0 - September 10, 2008
|
4 |
* Antoine P. Petitet
|
5 |
* University of Tennessee, Knoxville
|
6 |
* Innovative Computing Laboratory
|
7 |
* (C) Copyright 2000-2008 All Rights Reserved
|
8 |
*
|
9 |
* -- Copyright notice and Licensing terms:
|
10 |
*
|
11 |
* Redistribution and use in source and binary forms, with or without
|
12 |
* modification, are permitted provided that the following conditions
|
13 |
* are met:
|
14 |
*
|
15 |
* 1. Redistributions of source code must retain the above copyright
|
16 |
* notice, this list of conditions and the following disclaimer.
|
17 |
*
|
18 |
* 2. Redistributions in binary form must reproduce the above copyright
|
19 |
* notice, this list of conditions, and the following disclaimer in the
|
20 |
* documentation and/or other materials provided with the distribution.
|
21 |
*
|
22 |
* 3. All advertising materials mentioning features or use of this
|
23 |
* software must display the following acknowledgement:
|
24 |
* This product includes software developed at the University of
|
25 |
* Tennessee, Knoxville, Innovative Computing Laboratory.
|
26 |
*
|
27 |
* 4. The name of the University, the name of the Laboratory, or the
|
28 |
* names of its contributors may not be used to endorse or promote
|
29 |
* products derived from this software without specific written
|
30 |
* permission.
|
31 |
*
|
32 |
* -- Disclaimer:
|
33 |
*
|
34 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
35 |
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
36 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
37 |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
|
38 |
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
39 |
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
40 |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
41 |
* DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
42 |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
43 |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
44 |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
45 |
*/
|
46 |
#ifndef HPL_PAUXIL_H
|
47 |
#define HPL_PAUXIL_H
|
48 |
/*
|
49 |
* ---------------------------------------------------------------------
|
50 |
* Include files
|
51 |
* ---------------------------------------------------------------------
|
52 |
*/
|
53 |
#include "hpl_misc.h" |
54 |
#include "hpl_blas.h" |
55 |
#include "hpl_auxil.h" |
56 |
|
57 |
#include "hpl_pmisc.h" |
58 |
#include "hpl_grid.h" |
59 |
/*
|
60 |
* ---------------------------------------------------------------------
|
61 |
* #define macros definitions
|
62 |
* ---------------------------------------------------------------------
|
63 |
*/
|
64 |
/*
|
65 |
* Mindxg2p returns the process coodinate owning the entry globally in-
|
66 |
* dexed by ig_.
|
67 |
*/
|
68 |
#define Mindxg2p( ig_, inb_, nb_, proc_, src_, nprocs_ ) \
|
69 |
{ \ |
70 |
if( ( (ig_) >= (inb_) ) && ( (src_) >= 0 ) && \ |
71 |
( (nprocs_) > 1 ) ) \
|
72 |
{ \ |
73 |
proc_ = (src_) + 1 + ( (ig_)-(inb_) ) / (nb_); \
|
74 |
proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \ |
75 |
} \ |
76 |
else \
|
77 |
{ \ |
78 |
proc_ = (src_); \ |
79 |
} \ |
80 |
} |
81 |
|
82 |
#define Mindxg2l( il_, ig_, inb_, nb_, proc_, src_, nprocs_ ) \
|
83 |
{ \ |
84 |
if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) || \ |
85 |
( (nprocs_) == 1 ) ) { il_ = (ig_); } \
|
86 |
else \
|
87 |
{ \ |
88 |
int i__, j__; \
|
89 |
j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \ |
90 |
il_ = (nb_)*( j__ - i__ ) + \ |
91 |
( (i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ? \ |
92 |
(ig_) - (inb_) : (ig_) ); \ |
93 |
} \ |
94 |
} |
95 |
|
96 |
#define Mindxg2lp( il_, proc_, ig_, inb_, nb_, src_, nprocs_ ) \
|
97 |
{ \ |
98 |
if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) || \ |
99 |
( (nprocs_) == 1 ) ) \
|
100 |
{ il_ = (ig_); proc_ = (src_); } \ |
101 |
else \
|
102 |
{ \ |
103 |
int i__, j__; \
|
104 |
j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \ |
105 |
il_ = (nb_)*(j__-i__) + \ |
106 |
( ( i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ? \ |
107 |
(ig_) - (inb_) : (ig_) ); \ |
108 |
proc_ = (src_) + 1 + i__; \
|
109 |
proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \ |
110 |
} \ |
111 |
} |
112 |
/*
|
113 |
* Mindxl2g computes the global index ig_ corresponding to the local
|
114 |
* index il_ in process proc_.
|
115 |
*/
|
116 |
#define Mindxl2g( ig_, il_, inb_, nb_, proc_, src_, nprocs_ ) \
|
117 |
{ \ |
118 |
if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) ) \ |
119 |
{ \ |
120 |
if( (proc_) == (src_) ) \
|
121 |
{ \ |
122 |
if( (il_) < (inb_) ) ig_ = (il_); \
|
123 |
else ig_ = (il_) + \
|
124 |
(nb_)*((nprocs_)-1)*(((il_)-(inb_))/(nb_) + 1); \ |
125 |
} \ |
126 |
else if( (proc_) < (src_) ) \ |
127 |
{ \ |
128 |
ig_ = (il_) + (inb_) + \ |
129 |
(nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \
|
130 |
(proc_)-(src_)-1+(nprocs_) ); \
|
131 |
} \ |
132 |
else \
|
133 |
{ \ |
134 |
ig_ = (il_) + (inb_) + \ |
135 |
(nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \
|
136 |
(proc_)-(src_)-1 ); \
|
137 |
} \ |
138 |
} \ |
139 |
else \
|
140 |
{ \ |
141 |
ig_ = (il_); \ |
142 |
} \ |
143 |
} |
144 |
/*
|
145 |
* MnumrocI computes the # of local indexes np_ residing in the process
|
146 |
* of coordinate proc_ corresponding to the interval of global indexes
|
147 |
* i_:i_+n_-1 assuming that the global index 0 resides in the process
|
148 |
* src_, and that the indexes are distributed from src_ using the para-
|
149 |
* meters inb_, nb_ and nprocs_.
|
150 |
*/
|
151 |
#define MnumrocI( np_, n_, i_, inb_, nb_, proc_, src_, nprocs_ ) \
|
152 |
{ \ |
153 |
if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) ) \ |
154 |
{ \ |
155 |
int inb__, mydist__, n__, nblk__, quot__, src__; \
|
156 |
if( ( inb__ = (inb_) - (i_) ) <= 0 ) \ |
157 |
{ \ |
158 |
nblk__ = (-inb__) / (nb_) + 1; \
|
159 |
src__ = (src_) + nblk__; \ |
160 |
src__ -= ( src__ / (nprocs_) ) * (nprocs_); \ |
161 |
inb__ += nblk__*(nb_); \ |
162 |
if( ( n__ = (n_) - inb__ ) <= 0 ) \ |
163 |
{ \ |
164 |
if( (proc_) == src__ ) np_ = (n_); \
|
165 |
else np_ = 0; \ |
166 |
} \ |
167 |
else \
|
168 |
{ \ |
169 |
if( ( mydist__ = (proc_) - src__ ) < 0 ) \ |
170 |
mydist__ += (nprocs_); \ |
171 |
nblk__ = n__ / (nb_) + 1; \
|
172 |
mydist__ -= nblk__ - \ |
173 |
(quot__ = (nblk__ / (nprocs_))) * (nprocs_); \ |
174 |
if( mydist__ < 0 ) \ |
175 |
{ \ |
176 |
if( (proc_) != src__ ) \
|
177 |
np_ = (nb_) + (nb_) * quot__; \ |
178 |
else \
|
179 |
np_ = inb__ + (nb_) * quot__; \ |
180 |
} \ |
181 |
else if( mydist__ > 0 ) \ |
182 |
{ \ |
183 |
np_ = (nb_) * quot__; \ |
184 |
} \ |
185 |
else \
|
186 |
{ \ |
187 |
if( (proc_) != src__ ) \
|
188 |
np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \ |
189 |
else \
|
190 |
np_ = (n_)+ (nb_)*(quot__ - nblk__); \ |
191 |
} \ |
192 |
} \ |
193 |
} \ |
194 |
else \
|
195 |
{ \ |
196 |
if( ( n__ = (n_) - inb__ ) <= 0 ) \ |
197 |
{ \ |
198 |
if( (proc_) == (src_) ) np_ = (n_); \
|
199 |
else np_ = 0; \ |
200 |
} \ |
201 |
else \
|
202 |
{ \ |
203 |
if( ( mydist__ = (proc_) - (src_) ) < 0 ) \ |
204 |
mydist__ += (nprocs_); \ |
205 |
nblk__ = n__ / (nb_) + 1; \
|
206 |
mydist__ -= nblk__ - \ |
207 |
( quot__ = (nblk__ / (nprocs_)) )*(nprocs_); \ |
208 |
if( mydist__ < 0 ) \ |
209 |
{ \ |
210 |
if( (proc_) != (src_) ) \
|
211 |
np_ = (nb_) + (nb_) * quot__; \ |
212 |
else \
|
213 |
np_ = inb__ + (nb_) * quot__; \ |
214 |
} \ |
215 |
else if( mydist__ > 0 ) \ |
216 |
{ \ |
217 |
np_ = (nb_) * quot__; \ |
218 |
} \ |
219 |
else \
|
220 |
{ \ |
221 |
if( (proc_) != (src_) ) \
|
222 |
np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \ |
223 |
else \
|
224 |
np_ = (n_)+ (nb_)*(quot__ - nblk__); \ |
225 |
} \ |
226 |
} \ |
227 |
} \ |
228 |
} \ |
229 |
else \
|
230 |
{ \ |
231 |
np_ = (n_); \ |
232 |
} \ |
233 |
} |
234 |
|
235 |
#define Mnumroc( np_, n_, inb_, nb_, proc_, src_, nprocs_ ) \
|
236 |
MnumrocI( np_, n_, 0, inb_, nb_, proc_, src_, nprocs_ )
|
237 |
/*
|
238 |
* ---------------------------------------------------------------------
|
239 |
* Function prototypes
|
240 |
* ---------------------------------------------------------------------
|
241 |
*/
|
242 |
void HPL_indxg2lp
|
243 |
STDC_ARGS( ( |
244 |
int *,
|
245 |
int *,
|
246 |
const int, |
247 |
const int, |
248 |
const int, |
249 |
const int, |
250 |
const int |
251 |
) ); |
252 |
int HPL_indxg2l
|
253 |
STDC_ARGS( ( |
254 |
const int, |
255 |
const int, |
256 |
const int, |
257 |
const int, |
258 |
const int |
259 |
) ); |
260 |
int HPL_indxg2p
|
261 |
STDC_ARGS( ( |
262 |
const int, |
263 |
const int, |
264 |
const int, |
265 |
const int, |
266 |
const int |
267 |
) ); |
268 |
int HPL_indxl2g
|
269 |
STDC_ARGS( ( |
270 |
const int, |
271 |
const int, |
272 |
const int, |
273 |
const int, |
274 |
const int, |
275 |
const int |
276 |
) ); |
277 |
void HPL_infog2l
|
278 |
STDC_ARGS( ( |
279 |
int,
|
280 |
int,
|
281 |
const int, |
282 |
const int, |
283 |
const int, |
284 |
const int, |
285 |
const int, |
286 |
const int, |
287 |
const int, |
288 |
const int, |
289 |
const int, |
290 |
const int, |
291 |
int *,
|
292 |
int *,
|
293 |
int *,
|
294 |
int *
|
295 |
) ); |
296 |
int HPL_numroc
|
297 |
STDC_ARGS( ( |
298 |
const int, |
299 |
const int, |
300 |
const int, |
301 |
const int, |
302 |
const int, |
303 |
const int |
304 |
) ); |
305 |
int HPL_numrocI
|
306 |
STDC_ARGS( ( |
307 |
const int, |
308 |
const int, |
309 |
const int, |
310 |
const int, |
311 |
const int, |
312 |
const int, |
313 |
const int |
314 |
) ); |
315 |
|
316 |
void HPL_dlaswp00N
|
317 |
STDC_ARGS( ( |
318 |
const int, |
319 |
const int, |
320 |
double *,
|
321 |
const int, |
322 |
const int * |
323 |
) ); |
324 |
void HPL_dlaswp10N
|
325 |
STDC_ARGS( ( |
326 |
const int, |
327 |
const int, |
328 |
double *,
|
329 |
const int, |
330 |
const int * |
331 |
) ); |
332 |
void HPL_dlaswp01N
|
333 |
STDC_ARGS( ( |
334 |
const int, |
335 |
const int, |
336 |
double *,
|
337 |
const int, |
338 |
double *,
|
339 |
const int, |
340 |
const int *, |
341 |
const int * |
342 |
) ); |
343 |
void HPL_dlaswp01T
|
344 |
STDC_ARGS( ( |
345 |
const int, |
346 |
const int, |
347 |
double *,
|
348 |
const int, |
349 |
double *,
|
350 |
const int, |
351 |
const int *, |
352 |
const int * |
353 |
) ); |
354 |
void HPL_dlaswp02N
|
355 |
STDC_ARGS( ( |
356 |
const int, |
357 |
const int, |
358 |
const double *, |
359 |
const int, |
360 |
double *,
|
361 |
double *,
|
362 |
const int, |
363 |
const int *, |
364 |
const int * |
365 |
) ); |
366 |
void HPL_dlaswp03N
|
367 |
STDC_ARGS( ( |
368 |
const int, |
369 |
const int, |
370 |
double *,
|
371 |
const int, |
372 |
const double *, |
373 |
const double *, |
374 |
const int |
375 |
) ); |
376 |
void HPL_dlaswp03T
|
377 |
STDC_ARGS( ( |
378 |
const int, |
379 |
const int, |
380 |
double *,
|
381 |
const int, |
382 |
const double *, |
383 |
const double *, |
384 |
const int |
385 |
) ); |
386 |
void HPL_dlaswp04N
|
387 |
STDC_ARGS( ( |
388 |
const int, |
389 |
const int, |
390 |
const int, |
391 |
double *,
|
392 |
const int, |
393 |
double *,
|
394 |
const int, |
395 |
const double *, |
396 |
const double *, |
397 |
const int, |
398 |
const int *, |
399 |
const int * |
400 |
) ); |
401 |
void HPL_dlaswp04T
|
402 |
STDC_ARGS( ( |
403 |
const int, |
404 |
const int, |
405 |
const int, |
406 |
double *,
|
407 |
const int, |
408 |
double *,
|
409 |
const int, |
410 |
const double *, |
411 |
const double *, |
412 |
const int, |
413 |
const int *, |
414 |
const int * |
415 |
) ); |
416 |
void HPL_dlaswp05N
|
417 |
STDC_ARGS( ( |
418 |
const int, |
419 |
const int, |
420 |
double *,
|
421 |
const int, |
422 |
const double *, |
423 |
const int, |
424 |
const int *, |
425 |
const int * |
426 |
) ); |
427 |
void HPL_dlaswp05T
|
428 |
STDC_ARGS( ( |
429 |
const int, |
430 |
const int, |
431 |
double *,
|
432 |
const int, |
433 |
const double *, |
434 |
const int, |
435 |
const int *, |
436 |
const int * |
437 |
) ); |
438 |
void HPL_dlaswp06N
|
439 |
STDC_ARGS( ( |
440 |
const int, |
441 |
const int, |
442 |
double *,
|
443 |
const int, |
444 |
double *,
|
445 |
const int, |
446 |
const int * |
447 |
) ); |
448 |
void HPL_dlaswp06T
|
449 |
STDC_ARGS( ( |
450 |
const int, |
451 |
const int, |
452 |
double *,
|
453 |
const int, |
454 |
double *,
|
455 |
const int, |
456 |
const int * |
457 |
) ); |
458 |
|
459 |
void HPL_pabort
|
460 |
STDC_ARGS( ( |
461 |
int,
|
462 |
const char *, |
463 |
const char *, |
464 |
... |
465 |
) ); |
466 |
void HPL_pwarn
|
467 |
STDC_ARGS( ( |
468 |
FILE *, |
469 |
int,
|
470 |
const char *, |
471 |
const char *, |
472 |
... |
473 |
) ); |
474 |
void HPL_pdlaprnt
|
475 |
STDC_ARGS( ( |
476 |
const HPL_T_grid *,
|
477 |
const int, |
478 |
const int, |
479 |
const int, |
480 |
double *,
|
481 |
const int, |
482 |
const int, |
483 |
const int, |
484 |
const char * |
485 |
) ); |
486 |
double HPL_pdlamch
|
487 |
STDC_ARGS( ( |
488 |
MPI_Comm, |
489 |
const HPL_T_MACH
|
490 |
) ); |
491 |
double HPL_pdlange
|
492 |
STDC_ARGS( ( |
493 |
const HPL_T_grid *,
|
494 |
const HPL_T_NORM,
|
495 |
const int, |
496 |
const int, |
497 |
const int, |
498 |
const double *, |
499 |
const int |
500 |
) ); |
501 |
|
502 |
#endif
|
503 |
/*
|
504 |
* End of hpl_pauxil.h
|
505 |
*/
|