Statistiques
| Révision :

root / include / hpl_pauxil.h

Historique | Voir | Annoter | Télécharger (19,13 ko)

1
/* 
2
 * -- High Performance Computing Linpack Benchmark (HPL)                
3
 *    HPL - 2.0 - September 10, 2008                          
4
 *    Antoine P. Petitet                                                
5
 *    University of Tennessee, Knoxville                                
6
 *    Innovative Computing Laboratory                                 
7
 *    (C) Copyright 2000-2008 All Rights Reserved                       
8
 *                                                                      
9
 * -- Copyright notice and Licensing terms:                             
10
 *                                                                      
11
 * Redistribution  and  use in  source and binary forms, with or without
12
 * modification, are  permitted provided  that the following  conditions
13
 * are met:                                                             
14
 *                                                                      
15
 * 1. Redistributions  of  source  code  must retain the above copyright
16
 * notice, this list of conditions and the following disclaimer.        
17
 *                                                                      
18
 * 2. Redistributions in binary form must reproduce  the above copyright
19
 * notice, this list of conditions,  and the following disclaimer in the
20
 * documentation and/or other materials provided with the distribution. 
21
 *                                                                      
22
 * 3. All  advertising  materials  mentioning  features  or  use of this
23
 * software must display the following acknowledgement:                 
24
 * This  product  includes  software  developed  at  the  University  of
25
 * Tennessee, Knoxville, Innovative Computing Laboratory.             
26
 *                                                                      
27
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28
 * names  of  its  contributors  may  not  be used to endorse or promote
29
 * products  derived   from   this  software  without  specific  written
30
 * permission.                                                          
31
 *                                                                      
32
 * -- Disclaimer:                                                       
33
 *                                                                      
34
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
45
 */ 
46
#ifndef HPL_PAUXIL_H
47
#define HPL_PAUXIL_H
48
/*
49
 * ---------------------------------------------------------------------
50
 * Include files
51
 * ---------------------------------------------------------------------
52
 */
53
#include "hpl_misc.h"
54
#include "hpl_blas.h"
55
#include "hpl_auxil.h"
56

    
57
#include "hpl_pmisc.h"
58
#include "hpl_grid.h"
59
/*
60
 * ---------------------------------------------------------------------
61
 * #define macros definitions
62
 * ---------------------------------------------------------------------
63
 */
64
/*
65
 * Mindxg2p  returns the process coodinate owning the entry globally in-
66
 * dexed by ig_.
67
 */
68
#define    Mindxg2p( ig_, inb_, nb_, proc_, src_, nprocs_ )            \
69
           {                                                           \
70
              if( ( (ig_) >= (inb_) ) && ( (src_) >= 0 ) &&            \
71
                  ( (nprocs_) > 1 ) )                                  \
72
              {                                                        \
73
                 proc_  = (src_) + 1 + ( (ig_)-(inb_) ) / (nb_);       \
74
                 proc_ -= ( proc_ / (nprocs_) ) * (nprocs_);           \
75
              }                                                        \
76
              else                                                     \
77
              {                                                        \
78
                 proc_ = (src_);                                       \
79
              }                                                        \
80
           }
81

    
82
#define    Mindxg2l( il_, ig_, inb_, nb_, proc_, src_, nprocs_ )       \
83
           {                                                           \
84
              if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) ||            \
85
                  ( (nprocs_) == 1 ) ) { il_ = (ig_); }                \
86
              else                                                     \
87
              {                                                        \
88
                 int i__, j__;                                         \
89
                 j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \
90
                 il_ = (nb_)*( j__ - i__ ) +                           \
91
                       ( (i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ?        \
92
                         (ig_) - (inb_) : (ig_) );                     \
93
              }                                                        \
94
           }
95

    
96
#define    Mindxg2lp( il_, proc_, ig_, inb_, nb_, src_, nprocs_ )      \
97
           {                                                           \
98
              if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) ||            \
99
                  ( (nprocs_) == 1 ) )                                 \
100
              { il_ = (ig_); proc_ = (src_); }                         \
101
              else                                                     \
102
              {                                                        \
103
                 int i__, j__;                                         \
104
                 j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \
105
                 il_ = (nb_)*(j__-i__) +                               \
106
                       ( ( i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ?       \
107
                         (ig_) - (inb_) : (ig_) );                     \
108
                 proc_  = (src_) + 1 + i__;                            \
109
                 proc_ -= ( proc_ / (nprocs_) ) * (nprocs_);           \
110
              }                                                        \
111
           }
112
/*
113
 * Mindxl2g computes the global index ig_ corresponding to the local
114
 * index il_ in process proc_.
115
 */
116
#define    Mindxl2g( ig_, il_, inb_, nb_, proc_, src_, nprocs_ )       \
117
           {                                                           \
118
              if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) )               \
119
              {                                                        \
120
                 if( (proc_) == (src_) )                               \
121
                 {                                                     \
122
                    if( (il_) < (inb_) ) ig_ = (il_);                  \
123
                    else                 ig_ = (il_) +                 \
124
                       (nb_)*((nprocs_)-1)*(((il_)-(inb_))/(nb_) + 1); \
125
                 }                                                     \
126
                 else if( (proc_) < (src_) )                           \
127
                 {                                                     \
128
                    ig_ = (il_) + (inb_) +                             \
129
                          (nb_)*(  ((nprocs_)-1)*((il_)/(nb_)) +       \
130
                                   (proc_)-(src_)-1+(nprocs_) );       \
131
                 }                                                     \
132
                 else                                                  \
133
                 {                                                     \
134
                    ig_ =  (il_) + (inb_) +                            \
135
                           (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) +       \
136
                           (proc_)-(src_)-1 );                         \
137
                 }                                                     \
138
              }                                                        \
139
              else                                                     \
140
              {                                                        \
141
                 ig_ = (il_);                                          \
142
              }                                                        \
143
           }
144
/*
145
 * MnumrocI computes the # of local indexes  np_ residing in the process
146
 * of coordinate  proc_  corresponding to the interval of global indexes
147
 * i_:i_+n_-1  assuming  that the global index 0 resides in  the process
148
 * src_,  and that the indexes are distributed from src_ using the para-
149
 * meters inb_, nb_ and nprocs_.
150
 */
151
#define    MnumrocI( np_, n_, i_, inb_, nb_, proc_, src_, nprocs_ )    \
152
           {                                                           \
153
              if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) )               \
154
              {                                                        \
155
                 int inb__, mydist__, n__, nblk__, quot__, src__;      \
156
                 if( ( inb__ = (inb_) - (i_) ) <= 0 )                  \
157
                 {                                                     \
158
                    nblk__ = (-inb__) / (nb_) + 1;                     \
159
                    src__  = (src_) + nblk__;                          \
160
                    src__ -= ( src__ / (nprocs_) ) * (nprocs_);        \
161
                    inb__ += nblk__*(nb_);                             \
162
                    if( ( n__ = (n_) - inb__ ) <= 0 )                  \
163
                    {                                                  \
164
                       if( (proc_) == src__ ) np_ = (n_);              \
165
                       else                   np_ = 0;                 \
166
                    }                                                  \
167
                    else                                               \
168
                    {                                                  \
169
                       if( ( mydist__ = (proc_) - src__ ) < 0 )        \
170
                          mydist__ += (nprocs_);                       \
171
                       nblk__    = n__ / (nb_) + 1;                    \
172
                       mydist__ -= nblk__ -                            \
173
                          (quot__ = (nblk__ / (nprocs_))) * (nprocs_); \
174
                       if( mydist__ < 0 )                              \
175
                       {                                               \
176
                          if( (proc_) != src__ )                       \
177
                             np_ = (nb_) + (nb_) * quot__;             \
178
                          else                                         \
179
                             np_ = inb__ + (nb_) * quot__;             \
180
                       }                                               \
181
                       else if( mydist__ > 0 )                         \
182
                       {                                               \
183
                          np_ = (nb_) * quot__;                        \
184
                       }                                               \
185
                       else                                            \
186
                       {                                               \
187
                          if( (proc_) != src__ )                       \
188
                             np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \
189
                          else                                         \
190
                             np_ = (n_)+      (nb_)*(quot__ - nblk__); \
191
                       }                                               \
192
                    }                                                  \
193
                 }                                                     \
194
                 else                                                  \
195
                 {                                                     \
196
                    if( ( n__ = (n_) - inb__ ) <= 0 )                  \
197
                    {                                                  \
198
                       if( (proc_) == (src_) ) np_ = (n_);             \
199
                       else                    np_ = 0;                \
200
                    }                                                  \
201
                    else                                               \
202
                    {                                                  \
203
                       if( ( mydist__ = (proc_) - (src_) ) < 0 )       \
204
                          mydist__ += (nprocs_);                       \
205
                       nblk__    = n__ / (nb_) + 1;                    \
206
                       mydist__ -= nblk__ -                            \
207
                          ( quot__ = (nblk__ / (nprocs_)) )*(nprocs_); \
208
                       if( mydist__ < 0 )                              \
209
                       {                                               \
210
                          if( (proc_) != (src_) )                      \
211
                             np_ = (nb_) + (nb_) * quot__;             \
212
                          else                                         \
213
                             np_ = inb__ + (nb_) * quot__;             \
214
                       }                                               \
215
                       else if( mydist__ > 0 )                         \
216
                       {                                               \
217
                          np_ = (nb_) * quot__;                        \
218
                       }                                               \
219
                       else                                            \
220
                       {                                               \
221
                          if( (proc_) != (src_) )                      \
222
                             np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \
223
                          else                                         \
224
                             np_ = (n_)+      (nb_)*(quot__ - nblk__); \
225
                       }                                               \
226
                    }                                                  \
227
                 }                                                     \
228
              }                                                        \
229
              else                                                     \
230
              {                                                        \
231
                 np_ = (n_);                                           \
232
              }                                                        \
233
           }
234

    
235
#define    Mnumroc( np_, n_, inb_, nb_, proc_, src_, nprocs_ )         \
236
           MnumrocI( np_, n_, 0, inb_, nb_, proc_, src_, nprocs_ )
237
/*
238
 * ---------------------------------------------------------------------
239
 * Function prototypes
240
 * ---------------------------------------------------------------------
241
 */
242
void                             HPL_indxg2lp
243
STDC_ARGS( (
244
   int *,
245
   int *,
246
   const int,
247
   const int,
248
   const int,
249
   const int,
250
   const int
251
) );
252
int                              HPL_indxg2l
253
STDC_ARGS( (
254
   const int,
255
   const int,
256
   const int,
257
   const int,
258
   const int
259
) );
260
int                              HPL_indxg2p
261
STDC_ARGS( (
262
   const int,
263
   const int,
264
   const int,
265
   const int,
266
   const int
267
) );
268
int                              HPL_indxl2g
269
STDC_ARGS( (
270
   const int,
271
   const int,
272
   const int,
273
   const int,
274
   const int,
275
   const int
276
) );
277
void                             HPL_infog2l
278
STDC_ARGS( (
279
   int,
280
   int,
281
   const int,
282
   const int,
283
   const int,
284
   const int,
285
   const int,
286
   const int,
287
   const int,
288
   const int,
289
   const int,
290
   const int,
291
   int *,
292
   int *,
293
   int *,
294
   int *
295
) );
296
int                              HPL_numroc
297
STDC_ARGS( (
298
   const int,
299
   const int,
300
   const int,
301
   const int,
302
   const int,
303
   const int
304
) );
305
int                              HPL_numrocI
306
STDC_ARGS( (
307
   const int,
308
   const int,
309
   const int,
310
   const int,
311
   const int,
312
   const int,
313
   const int
314
) );
315

    
316
void                             HPL_dlaswp00N
317
STDC_ARGS( (
318
   const int,
319
   const int,
320
   double *,
321
   const int,
322
   const int *
323
) );
324
void                             HPL_dlaswp10N
325
STDC_ARGS( (
326
   const int,
327
   const int,
328
   double *,
329
   const int,
330
   const int *
331
) );
332
void                             HPL_dlaswp01N
333
STDC_ARGS( (
334
   const int,
335
   const int,
336
   double *,
337
   const int,
338
   double *,
339
   const int,
340
   const int *,
341
   const int *
342
) );
343
void                             HPL_dlaswp01T
344
STDC_ARGS( (
345
   const int,
346
   const int,
347
   double *,
348
   const int,
349
   double *,
350
   const int,
351
   const int *,
352
   const int *
353
) );
354
void                             HPL_dlaswp02N
355
STDC_ARGS( (
356
   const int,
357
   const int,
358
   const double *,
359
   const int,
360
   double *,
361
   double *,
362
   const int,
363
   const int *,
364
   const int *
365
) );
366
void                             HPL_dlaswp03N
367
STDC_ARGS( (
368
   const int,
369
   const int,
370
   double *,
371
   const int,
372
   const double *,
373
   const double *,
374
   const int
375
) );
376
void                             HPL_dlaswp03T
377
STDC_ARGS( (
378
   const int,
379
   const int,
380
   double *,
381
   const int,
382
   const double *,
383
   const double *,
384
   const int
385
) );
386
void                             HPL_dlaswp04N
387
STDC_ARGS( (
388
   const int,
389
   const int,
390
   const int,
391
   double *,
392
   const int,
393
   double *,
394
   const int,
395
   const double *,
396
   const double *,
397
   const int,
398
   const int *,
399
   const int *
400
) );
401
void                             HPL_dlaswp04T
402
STDC_ARGS( (
403
   const int,
404
   const int,
405
   const int,
406
   double *,
407
   const int,
408
   double *,
409
   const int,
410
   const double *,
411
   const double *,
412
   const int,
413
   const int *,
414
   const int *
415
) );
416
void                             HPL_dlaswp05N
417
STDC_ARGS( (
418
   const int,
419
   const int,
420
   double *,
421
   const int,
422
   const double *,
423
   const int,
424
   const int *,
425
   const int *
426
) );
427
void                             HPL_dlaswp05T
428
STDC_ARGS( (
429
   const int,
430
   const int,
431
   double *,
432
   const int,
433
   const double *,
434
   const int,
435
   const int *,
436
   const int *
437
) );
438
void                             HPL_dlaswp06N
439
STDC_ARGS( (
440
   const int,
441
   const int,
442
   double *,
443
   const int,
444
   double *,
445
   const int,
446
   const int *
447
) );
448
void                             HPL_dlaswp06T
449
STDC_ARGS( (
450
   const int,
451
   const int,
452
   double *,
453
   const int,
454
   double *,
455
   const int,
456
   const int *
457
) );
458

    
459
void                             HPL_pabort
460
STDC_ARGS( (
461
   int,
462
   const char *,
463
   const char *,
464
   ...
465
) );
466
void                             HPL_pwarn
467
STDC_ARGS( (
468
   FILE *,
469
   int,
470
   const char *,
471
   const char *,
472
   ...
473
) );
474
void                             HPL_pdlaprnt
475
STDC_ARGS( (
476
   const HPL_T_grid *,
477
   const int,
478
   const int,
479
   const int,
480
   double *,
481
   const int,
482
   const int,
483
   const int,
484
   const char *
485
) );
486
double                           HPL_pdlamch
487
STDC_ARGS( (
488
   MPI_Comm,
489
   const HPL_T_MACH
490
) );
491
double                           HPL_pdlange
492
STDC_ARGS( (
493
   const HPL_T_grid *,
494
   const HPL_T_NORM,
495
   const int,
496
   const int,
497
   const int,
498
   const double *,
499
   const int
500
) );
501

    
502
#endif
503
/*
504
 * End of hpl_pauxil.h
505
 */