Statistiques
| Révision :

root / include / hpl_pauxil.h

Historique | Voir | Annoter | Télécharger (19,13 ko)

1 1 equemene
/*
2 1 equemene
 * -- High Performance Computing Linpack Benchmark (HPL)
3 1 equemene
 *    HPL - 2.0 - September 10, 2008
4 1 equemene
 *    Antoine P. Petitet
5 1 equemene
 *    University of Tennessee, Knoxville
6 1 equemene
 *    Innovative Computing Laboratory
7 1 equemene
 *    (C) Copyright 2000-2008 All Rights Reserved
8 1 equemene
 *
9 1 equemene
 * -- Copyright notice and Licensing terms:
10 1 equemene
 *
11 1 equemene
 * Redistribution  and  use in  source and binary forms, with or without
12 1 equemene
 * modification, are  permitted provided  that the following  conditions
13 1 equemene
 * are met:
14 1 equemene
 *
15 1 equemene
 * 1. Redistributions  of  source  code  must retain the above copyright
16 1 equemene
 * notice, this list of conditions and the following disclaimer.
17 1 equemene
 *
18 1 equemene
 * 2. Redistributions in binary form must reproduce  the above copyright
19 1 equemene
 * notice, this list of conditions,  and the following disclaimer in the
20 1 equemene
 * documentation and/or other materials provided with the distribution.
21 1 equemene
 *
22 1 equemene
 * 3. All  advertising  materials  mentioning  features  or  use of this
23 1 equemene
 * software must display the following acknowledgement:
24 1 equemene
 * This  product  includes  software  developed  at  the  University  of
25 1 equemene
 * Tennessee, Knoxville, Innovative Computing Laboratory.
26 1 equemene
 *
27 1 equemene
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28 1 equemene
 * names  of  its  contributors  may  not  be used to endorse or promote
29 1 equemene
 * products  derived   from   this  software  without  specific  written
30 1 equemene
 * permission.
31 1 equemene
 *
32 1 equemene
 * -- Disclaimer:
33 1 equemene
 *
34 1 equemene
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 1 equemene
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36 1 equemene
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 1 equemene
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38 1 equemene
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39 1 equemene
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40 1 equemene
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 1 equemene
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42 1 equemene
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43 1 equemene
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 1 equemene
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 1 equemene
 */
46 1 equemene
#ifndef HPL_PAUXIL_H
47 1 equemene
#define HPL_PAUXIL_H
48 1 equemene
/*
49 1 equemene
 * ---------------------------------------------------------------------
50 1 equemene
 * Include files
51 1 equemene
 * ---------------------------------------------------------------------
52 1 equemene
 */
53 1 equemene
#include "hpl_misc.h"
54 1 equemene
#include "hpl_blas.h"
55 1 equemene
#include "hpl_auxil.h"
56 1 equemene
57 1 equemene
#include "hpl_pmisc.h"
58 1 equemene
#include "hpl_grid.h"
59 1 equemene
/*
60 1 equemene
 * ---------------------------------------------------------------------
61 1 equemene
 * #define macros definitions
62 1 equemene
 * ---------------------------------------------------------------------
63 1 equemene
 */
64 1 equemene
/*
65 1 equemene
 * Mindxg2p  returns the process coodinate owning the entry globally in-
66 1 equemene
 * dexed by ig_.
67 1 equemene
 */
68 1 equemene
#define    Mindxg2p( ig_, inb_, nb_, proc_, src_, nprocs_ )            \
69 1 equemene
           {                                                           \
70 1 equemene
              if( ( (ig_) >= (inb_) ) && ( (src_) >= 0 ) &&            \
71 1 equemene
                  ( (nprocs_) > 1 ) )                                  \
72 1 equemene
              {                                                        \
73 1 equemene
                 proc_  = (src_) + 1 + ( (ig_)-(inb_) ) / (nb_);       \
74 1 equemene
                 proc_ -= ( proc_ / (nprocs_) ) * (nprocs_);           \
75 1 equemene
              }                                                        \
76 1 equemene
              else                                                     \
77 1 equemene
              {                                                        \
78 1 equemene
                 proc_ = (src_);                                       \
79 1 equemene
              }                                                        \
80 1 equemene
           }
81 1 equemene
82 1 equemene
#define    Mindxg2l( il_, ig_, inb_, nb_, proc_, src_, nprocs_ )       \
83 1 equemene
           {                                                           \
84 1 equemene
              if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) ||            \
85 1 equemene
                  ( (nprocs_) == 1 ) ) { il_ = (ig_); }                \
86 1 equemene
              else                                                     \
87 1 equemene
              {                                                        \
88 1 equemene
                 int i__, j__;                                         \
89 1 equemene
                 j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \
90 1 equemene
                 il_ = (nb_)*( j__ - i__ ) +                           \
91 1 equemene
                       ( (i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ?        \
92 1 equemene
                         (ig_) - (inb_) : (ig_) );                     \
93 1 equemene
              }                                                        \
94 1 equemene
           }
95 1 equemene
96 1 equemene
#define    Mindxg2lp( il_, proc_, ig_, inb_, nb_, src_, nprocs_ )      \
97 1 equemene
           {                                                           \
98 1 equemene
              if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) ||            \
99 1 equemene
                  ( (nprocs_) == 1 ) )                                 \
100 1 equemene
              { il_ = (ig_); proc_ = (src_); }                         \
101 1 equemene
              else                                                     \
102 1 equemene
              {                                                        \
103 1 equemene
                 int i__, j__;                                         \
104 1 equemene
                 j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \
105 1 equemene
                 il_ = (nb_)*(j__-i__) +                               \
106 1 equemene
                       ( ( i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ?       \
107 1 equemene
                         (ig_) - (inb_) : (ig_) );                     \
108 1 equemene
                 proc_  = (src_) + 1 + i__;                            \
109 1 equemene
                 proc_ -= ( proc_ / (nprocs_) ) * (nprocs_);           \
110 1 equemene
              }                                                        \
111 1 equemene
           }
112 1 equemene
/*
113 1 equemene
 * Mindxl2g computes the global index ig_ corresponding to the local
114 1 equemene
 * index il_ in process proc_.
115 1 equemene
 */
116 1 equemene
#define    Mindxl2g( ig_, il_, inb_, nb_, proc_, src_, nprocs_ )       \
117 1 equemene
           {                                                           \
118 1 equemene
              if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) )               \
119 1 equemene
              {                                                        \
120 1 equemene
                 if( (proc_) == (src_) )                               \
121 1 equemene
                 {                                                     \
122 1 equemene
                    if( (il_) < (inb_) ) ig_ = (il_);                  \
123 1 equemene
                    else                 ig_ = (il_) +                 \
124 1 equemene
                       (nb_)*((nprocs_)-1)*(((il_)-(inb_))/(nb_) + 1); \
125 1 equemene
                 }                                                     \
126 1 equemene
                 else if( (proc_) < (src_) )                           \
127 1 equemene
                 {                                                     \
128 1 equemene
                    ig_ = (il_) + (inb_) +                             \
129 1 equemene
                          (nb_)*(  ((nprocs_)-1)*((il_)/(nb_)) +       \
130 1 equemene
                                   (proc_)-(src_)-1+(nprocs_) );       \
131 1 equemene
                 }                                                     \
132 1 equemene
                 else                                                  \
133 1 equemene
                 {                                                     \
134 1 equemene
                    ig_ =  (il_) + (inb_) +                            \
135 1 equemene
                           (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) +       \
136 1 equemene
                           (proc_)-(src_)-1 );                         \
137 1 equemene
                 }                                                     \
138 1 equemene
              }                                                        \
139 1 equemene
              else                                                     \
140 1 equemene
              {                                                        \
141 1 equemene
                 ig_ = (il_);                                          \
142 1 equemene
              }                                                        \
143 1 equemene
           }
144 1 equemene
/*
145 1 equemene
 * MnumrocI computes the # of local indexes  np_ residing in the process
146 1 equemene
 * of coordinate  proc_  corresponding to the interval of global indexes
147 1 equemene
 * i_:i_+n_-1  assuming  that the global index 0 resides in  the process
148 1 equemene
 * src_,  and that the indexes are distributed from src_ using the para-
149 1 equemene
 * meters inb_, nb_ and nprocs_.
150 1 equemene
 */
151 1 equemene
#define    MnumrocI( np_, n_, i_, inb_, nb_, proc_, src_, nprocs_ )    \
152 1 equemene
           {                                                           \
153 1 equemene
              if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) )               \
154 1 equemene
              {                                                        \
155 1 equemene
                 int inb__, mydist__, n__, nblk__, quot__, src__;      \
156 1 equemene
                 if( ( inb__ = (inb_) - (i_) ) <= 0 )                  \
157 1 equemene
                 {                                                     \
158 1 equemene
                    nblk__ = (-inb__) / (nb_) + 1;                     \
159 1 equemene
                    src__  = (src_) + nblk__;                          \
160 1 equemene
                    src__ -= ( src__ / (nprocs_) ) * (nprocs_);        \
161 1 equemene
                    inb__ += nblk__*(nb_);                             \
162 1 equemene
                    if( ( n__ = (n_) - inb__ ) <= 0 )                  \
163 1 equemene
                    {                                                  \
164 1 equemene
                       if( (proc_) == src__ ) np_ = (n_);              \
165 1 equemene
                       else                   np_ = 0;                 \
166 1 equemene
                    }                                                  \
167 1 equemene
                    else                                               \
168 1 equemene
                    {                                                  \
169 1 equemene
                       if( ( mydist__ = (proc_) - src__ ) < 0 )        \
170 1 equemene
                          mydist__ += (nprocs_);                       \
171 1 equemene
                       nblk__    = n__ / (nb_) + 1;                    \
172 1 equemene
                       mydist__ -= nblk__ -                            \
173 1 equemene
                          (quot__ = (nblk__ / (nprocs_))) * (nprocs_); \
174 1 equemene
                       if( mydist__ < 0 )                              \
175 1 equemene
                       {                                               \
176 1 equemene
                          if( (proc_) != src__ )                       \
177 1 equemene
                             np_ = (nb_) + (nb_) * quot__;             \
178 1 equemene
                          else                                         \
179 1 equemene
                             np_ = inb__ + (nb_) * quot__;             \
180 1 equemene
                       }                                               \
181 1 equemene
                       else if( mydist__ > 0 )                         \
182 1 equemene
                       {                                               \
183 1 equemene
                          np_ = (nb_) * quot__;                        \
184 1 equemene
                       }                                               \
185 1 equemene
                       else                                            \
186 1 equemene
                       {                                               \
187 1 equemene
                          if( (proc_) != src__ )                       \
188 1 equemene
                             np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \
189 1 equemene
                          else                                         \
190 1 equemene
                             np_ = (n_)+      (nb_)*(quot__ - nblk__); \
191 1 equemene
                       }                                               \
192 1 equemene
                    }                                                  \
193 1 equemene
                 }                                                     \
194 1 equemene
                 else                                                  \
195 1 equemene
                 {                                                     \
196 1 equemene
                    if( ( n__ = (n_) - inb__ ) <= 0 )                  \
197 1 equemene
                    {                                                  \
198 1 equemene
                       if( (proc_) == (src_) ) np_ = (n_);             \
199 1 equemene
                       else                    np_ = 0;                \
200 1 equemene
                    }                                                  \
201 1 equemene
                    else                                               \
202 1 equemene
                    {                                                  \
203 1 equemene
                       if( ( mydist__ = (proc_) - (src_) ) < 0 )       \
204 1 equemene
                          mydist__ += (nprocs_);                       \
205 1 equemene
                       nblk__    = n__ / (nb_) + 1;                    \
206 1 equemene
                       mydist__ -= nblk__ -                            \
207 1 equemene
                          ( quot__ = (nblk__ / (nprocs_)) )*(nprocs_); \
208 1 equemene
                       if( mydist__ < 0 )                              \
209 1 equemene
                       {                                               \
210 1 equemene
                          if( (proc_) != (src_) )                      \
211 1 equemene
                             np_ = (nb_) + (nb_) * quot__;             \
212 1 equemene
                          else                                         \
213 1 equemene
                             np_ = inb__ + (nb_) * quot__;             \
214 1 equemene
                       }                                               \
215 1 equemene
                       else if( mydist__ > 0 )                         \
216 1 equemene
                       {                                               \
217 1 equemene
                          np_ = (nb_) * quot__;                        \
218 1 equemene
                       }                                               \
219 1 equemene
                       else                                            \
220 1 equemene
                       {                                               \
221 1 equemene
                          if( (proc_) != (src_) )                      \
222 1 equemene
                             np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \
223 1 equemene
                          else                                         \
224 1 equemene
                             np_ = (n_)+      (nb_)*(quot__ - nblk__); \
225 1 equemene
                       }                                               \
226 1 equemene
                    }                                                  \
227 1 equemene
                 }                                                     \
228 1 equemene
              }                                                        \
229 1 equemene
              else                                                     \
230 1 equemene
              {                                                        \
231 1 equemene
                 np_ = (n_);                                           \
232 1 equemene
              }                                                        \
233 1 equemene
           }
234 1 equemene
235 1 equemene
#define    Mnumroc( np_, n_, inb_, nb_, proc_, src_, nprocs_ )         \
236 1 equemene
           MnumrocI( np_, n_, 0, inb_, nb_, proc_, src_, nprocs_ )
237 1 equemene
/*
238 1 equemene
 * ---------------------------------------------------------------------
239 1 equemene
 * Function prototypes
240 1 equemene
 * ---------------------------------------------------------------------
241 1 equemene
 */
242 1 equemene
void                             HPL_indxg2lp
243 1 equemene
STDC_ARGS( (
244 1 equemene
   int *,
245 1 equemene
   int *,
246 1 equemene
   const int,
247 1 equemene
   const int,
248 1 equemene
   const int,
249 1 equemene
   const int,
250 1 equemene
   const int
251 1 equemene
) );
252 1 equemene
int                              HPL_indxg2l
253 1 equemene
STDC_ARGS( (
254 1 equemene
   const int,
255 1 equemene
   const int,
256 1 equemene
   const int,
257 1 equemene
   const int,
258 1 equemene
   const int
259 1 equemene
) );
260 1 equemene
int                              HPL_indxg2p
261 1 equemene
STDC_ARGS( (
262 1 equemene
   const int,
263 1 equemene
   const int,
264 1 equemene
   const int,
265 1 equemene
   const int,
266 1 equemene
   const int
267 1 equemene
) );
268 1 equemene
int                              HPL_indxl2g
269 1 equemene
STDC_ARGS( (
270 1 equemene
   const int,
271 1 equemene
   const int,
272 1 equemene
   const int,
273 1 equemene
   const int,
274 1 equemene
   const int,
275 1 equemene
   const int
276 1 equemene
) );
277 1 equemene
void                             HPL_infog2l
278 1 equemene
STDC_ARGS( (
279 1 equemene
   int,
280 1 equemene
   int,
281 1 equemene
   const int,
282 1 equemene
   const int,
283 1 equemene
   const int,
284 1 equemene
   const int,
285 1 equemene
   const int,
286 1 equemene
   const int,
287 1 equemene
   const int,
288 1 equemene
   const int,
289 1 equemene
   const int,
290 1 equemene
   const int,
291 1 equemene
   int *,
292 1 equemene
   int *,
293 1 equemene
   int *,
294 1 equemene
   int *
295 1 equemene
) );
296 1 equemene
int                              HPL_numroc
297 1 equemene
STDC_ARGS( (
298 1 equemene
   const int,
299 1 equemene
   const int,
300 1 equemene
   const int,
301 1 equemene
   const int,
302 1 equemene
   const int,
303 1 equemene
   const int
304 1 equemene
) );
305 1 equemene
int                              HPL_numrocI
306 1 equemene
STDC_ARGS( (
307 1 equemene
   const int,
308 1 equemene
   const int,
309 1 equemene
   const int,
310 1 equemene
   const int,
311 1 equemene
   const int,
312 1 equemene
   const int,
313 1 equemene
   const int
314 1 equemene
) );
315 1 equemene
316 1 equemene
void                             HPL_dlaswp00N
317 1 equemene
STDC_ARGS( (
318 1 equemene
   const int,
319 1 equemene
   const int,
320 1 equemene
   double *,
321 1 equemene
   const int,
322 1 equemene
   const int *
323 1 equemene
) );
324 1 equemene
void                             HPL_dlaswp10N
325 1 equemene
STDC_ARGS( (
326 1 equemene
   const int,
327 1 equemene
   const int,
328 1 equemene
   double *,
329 1 equemene
   const int,
330 1 equemene
   const int *
331 1 equemene
) );
332 1 equemene
void                             HPL_dlaswp01N
333 1 equemene
STDC_ARGS( (
334 1 equemene
   const int,
335 1 equemene
   const int,
336 1 equemene
   double *,
337 1 equemene
   const int,
338 1 equemene
   double *,
339 1 equemene
   const int,
340 1 equemene
   const int *,
341 1 equemene
   const int *
342 1 equemene
) );
343 1 equemene
void                             HPL_dlaswp01T
344 1 equemene
STDC_ARGS( (
345 1 equemene
   const int,
346 1 equemene
   const int,
347 1 equemene
   double *,
348 1 equemene
   const int,
349 1 equemene
   double *,
350 1 equemene
   const int,
351 1 equemene
   const int *,
352 1 equemene
   const int *
353 1 equemene
) );
354 1 equemene
void                             HPL_dlaswp02N
355 1 equemene
STDC_ARGS( (
356 1 equemene
   const int,
357 1 equemene
   const int,
358 1 equemene
   const double *,
359 1 equemene
   const int,
360 1 equemene
   double *,
361 1 equemene
   double *,
362 1 equemene
   const int,
363 1 equemene
   const int *,
364 1 equemene
   const int *
365 1 equemene
) );
366 1 equemene
void                             HPL_dlaswp03N
367 1 equemene
STDC_ARGS( (
368 1 equemene
   const int,
369 1 equemene
   const int,
370 1 equemene
   double *,
371 1 equemene
   const int,
372 1 equemene
   const double *,
373 1 equemene
   const double *,
374 1 equemene
   const int
375 1 equemene
) );
376 1 equemene
void                             HPL_dlaswp03T
377 1 equemene
STDC_ARGS( (
378 1 equemene
   const int,
379 1 equemene
   const int,
380 1 equemene
   double *,
381 1 equemene
   const int,
382 1 equemene
   const double *,
383 1 equemene
   const double *,
384 1 equemene
   const int
385 1 equemene
) );
386 1 equemene
void                             HPL_dlaswp04N
387 1 equemene
STDC_ARGS( (
388 1 equemene
   const int,
389 1 equemene
   const int,
390 1 equemene
   const int,
391 1 equemene
   double *,
392 1 equemene
   const int,
393 1 equemene
   double *,
394 1 equemene
   const int,
395 1 equemene
   const double *,
396 1 equemene
   const double *,
397 1 equemene
   const int,
398 1 equemene
   const int *,
399 1 equemene
   const int *
400 1 equemene
) );
401 1 equemene
void                             HPL_dlaswp04T
402 1 equemene
STDC_ARGS( (
403 1 equemene
   const int,
404 1 equemene
   const int,
405 1 equemene
   const int,
406 1 equemene
   double *,
407 1 equemene
   const int,
408 1 equemene
   double *,
409 1 equemene
   const int,
410 1 equemene
   const double *,
411 1 equemene
   const double *,
412 1 equemene
   const int,
413 1 equemene
   const int *,
414 1 equemene
   const int *
415 1 equemene
) );
416 1 equemene
void                             HPL_dlaswp05N
417 1 equemene
STDC_ARGS( (
418 1 equemene
   const int,
419 1 equemene
   const int,
420 1 equemene
   double *,
421 1 equemene
   const int,
422 1 equemene
   const double *,
423 1 equemene
   const int,
424 1 equemene
   const int *,
425 1 equemene
   const int *
426 1 equemene
) );
427 1 equemene
void                             HPL_dlaswp05T
428 1 equemene
STDC_ARGS( (
429 1 equemene
   const int,
430 1 equemene
   const int,
431 1 equemene
   double *,
432 1 equemene
   const int,
433 1 equemene
   const double *,
434 1 equemene
   const int,
435 1 equemene
   const int *,
436 1 equemene
   const int *
437 1 equemene
) );
438 1 equemene
void                             HPL_dlaswp06N
439 1 equemene
STDC_ARGS( (
440 1 equemene
   const int,
441 1 equemene
   const int,
442 1 equemene
   double *,
443 1 equemene
   const int,
444 1 equemene
   double *,
445 1 equemene
   const int,
446 1 equemene
   const int *
447 1 equemene
) );
448 1 equemene
void                             HPL_dlaswp06T
449 1 equemene
STDC_ARGS( (
450 1 equemene
   const int,
451 1 equemene
   const int,
452 1 equemene
   double *,
453 1 equemene
   const int,
454 1 equemene
   double *,
455 1 equemene
   const int,
456 1 equemene
   const int *
457 1 equemene
) );
458 1 equemene
459 1 equemene
void                             HPL_pabort
460 1 equemene
STDC_ARGS( (
461 1 equemene
   int,
462 1 equemene
   const char *,
463 1 equemene
   const char *,
464 1 equemene
   ...
465 1 equemene
) );
466 1 equemene
void                             HPL_pwarn
467 1 equemene
STDC_ARGS( (
468 1 equemene
   FILE *,
469 1 equemene
   int,
470 1 equemene
   const char *,
471 1 equemene
   const char *,
472 1 equemene
   ...
473 1 equemene
) );
474 1 equemene
void                             HPL_pdlaprnt
475 1 equemene
STDC_ARGS( (
476 1 equemene
   const HPL_T_grid *,
477 1 equemene
   const int,
478 1 equemene
   const int,
479 1 equemene
   const int,
480 1 equemene
   double *,
481 1 equemene
   const int,
482 1 equemene
   const int,
483 1 equemene
   const int,
484 1 equemene
   const char *
485 1 equemene
) );
486 1 equemene
double                           HPL_pdlamch
487 1 equemene
STDC_ARGS( (
488 1 equemene
   MPI_Comm,
489 1 equemene
   const HPL_T_MACH
490 1 equemene
) );
491 1 equemene
double                           HPL_pdlange
492 1 equemene
STDC_ARGS( (
493 1 equemene
   const HPL_T_grid *,
494 1 equemene
   const HPL_T_NORM,
495 1 equemene
   const int,
496 1 equemene
   const int,
497 1 equemene
   const int,
498 1 equemene
   const double *,
499 1 equemene
   const int
500 1 equemene
) );
501 1 equemene
502 1 equemene
#endif
503 1 equemene
/*
504 1 equemene
 * End of hpl_pauxil.h
505 1 equemene
 */