Statistiques
| Révision :

root / src / pfact / HPL_pdfact.c @ 7

Historique | Voir | Annoter | Télécharger (6,38 ko)

1 1 equemene
/*
2 1 equemene
 * -- High Performance Computing Linpack Benchmark (HPL)
3 1 equemene
 *    HPL - 2.0 - September 10, 2008
4 1 equemene
 *    Antoine P. Petitet
5 1 equemene
 *    University of Tennessee, Knoxville
6 1 equemene
 *    Innovative Computing Laboratory
7 1 equemene
 *    (C) Copyright 2000-2008 All Rights Reserved
8 1 equemene
 *
9 1 equemene
 * -- Copyright notice and Licensing terms:
10 1 equemene
 *
11 1 equemene
 * Redistribution  and  use in  source and binary forms, with or without
12 1 equemene
 * modification, are  permitted provided  that the following  conditions
13 1 equemene
 * are met:
14 1 equemene
 *
15 1 equemene
 * 1. Redistributions  of  source  code  must retain the above copyright
16 1 equemene
 * notice, this list of conditions and the following disclaimer.
17 1 equemene
 *
18 1 equemene
 * 2. Redistributions in binary form must reproduce  the above copyright
19 1 equemene
 * notice, this list of conditions,  and the following disclaimer in the
20 1 equemene
 * documentation and/or other materials provided with the distribution.
21 1 equemene
 *
22 1 equemene
 * 3. All  advertising  materials  mentioning  features  or  use of this
23 1 equemene
 * software must display the following acknowledgement:
24 1 equemene
 * This  product  includes  software  developed  at  the  University  of
25 1 equemene
 * Tennessee, Knoxville, Innovative Computing Laboratory.
26 1 equemene
 *
27 1 equemene
 * 4. The name of the  University,  the name of the  Laboratory,  or the
28 1 equemene
 * names  of  its  contributors  may  not  be used to endorse or promote
29 1 equemene
 * products  derived   from   this  software  without  specific  written
30 1 equemene
 * permission.
31 1 equemene
 *
32 1 equemene
 * -- Disclaimer:
33 1 equemene
 *
34 1 equemene
 * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 1 equemene
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
36 1 equemene
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 1 equemene
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38 1 equemene
 * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
39 1 equemene
 * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
40 1 equemene
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 1 equemene
 * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
42 1 equemene
 * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
43 1 equemene
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 1 equemene
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 1 equemene
 * ---------------------------------------------------------------------
46 1 equemene
 */
47 1 equemene
/*
48 1 equemene
 * Include files
49 1 equemene
 */
50 1 equemene
#include "hpl.h"
51 1 equemene
52 1 equemene
#ifdef STDC_HEADERS
53 1 equemene
void HPL_pdfact
54 1 equemene
(
55 1 equemene
   HPL_T_panel *                    PANEL
56 1 equemene
)
57 1 equemene
#else
58 1 equemene
void HPL_pdfact
59 1 equemene
( PANEL )
60 1 equemene
   HPL_T_panel *                    PANEL;
61 1 equemene
#endif
62 1 equemene
{
63 1 equemene
/*
64 1 equemene
 * Purpose
65 1 equemene
 * =======
66 1 equemene
 *
67 1 equemene
 * HPL_pdfact recursively factorizes a  1-dimensional  panel of columns.
68 1 equemene
 * The  RPFACT  function pointer specifies the recursive algorithm to be
69 1 equemene
 * used, either Crout, Left- or Right looking.  NBMIN allows to vary the
70 1 equemene
 * recursive stopping criterium in terms of the number of columns in the
71 1 equemene
 * panel, and  NDIV  allow to specify the number of subpanels each panel
72 1 equemene
 * should be divided into. Usuallly a value of 2 will be chosen. Finally
73 1 equemene
 * PFACT is a function pointer specifying the non-recursive algorithm to
74 1 equemene
 * to be used on at most NBMIN columns. One can also choose here between
75 1 equemene
 * Crout, Left- or Right looking.  Empirical tests seem to indicate that
76 1 equemene
 * values of 4 or 8 for NBMIN give the best results.
77 1 equemene
 *
78 1 equemene
 * Bi-directional  exchange  is  used  to  perform  the  swap::broadcast
79 1 equemene
 * operations  at once  for one column in the panel.  This  results in a
80 1 equemene
 * lower number of slightly larger  messages than usual.  On P processes
81 1 equemene
 * and assuming bi-directional links,  the running time of this function
82 1 equemene
 * can be approximated by (when N is equal to N0):
83 1 equemene
 *
84 1 equemene
 *    N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
85 1 equemene
 *    N0^2 * ( M - N0/3 ) * gam2-3
86 1 equemene
 *
87 1 equemene
 * where M is the local number of rows of  the panel, lat and bdwth  are
88 1 equemene
 * the latency and bandwidth of the network for  double  precision  real
89 1 equemene
 * words, and  gam2-3  is  an estimate of the  Level 2 and Level 3  BLAS
90 1 equemene
 * rate of execution. The  recursive  algorithm  allows indeed to almost
91 1 equemene
 * achieve  Level 3 BLAS  performance  in the panel factorization.  On a
92 1 equemene
 * large  number of modern machines,  this  operation is however latency
93 1 equemene
 * bound,  meaning  that its cost can  be estimated  by only the latency
94 1 equemene
 * portion N0 * log_2(P) * lat.  Mono-directional links will double this
95 1 equemene
 * communication cost.
96 1 equemene
 *
97 1 equemene
 * Arguments
98 1 equemene
 * =========
99 1 equemene
 *
100 1 equemene
 * PANEL   (local input/output)          HPL_T_panel *
101 1 equemene
 *         On entry,  PANEL  points to the data structure containing the
102 1 equemene
 *         panel information.
103 1 equemene
 *
104 1 equemene
 * ---------------------------------------------------------------------
105 1 equemene
 */
106 1 equemene
/*
107 1 equemene
 * .. Local Variables ..
108 1 equemene
 */
109 1 equemene
   void                       * vptr = NULL;
110 1 equemene
   int                        align, jb;
111 1 equemene
/* ..
112 1 equemene
 * .. Executable Statements ..
113 1 equemene
 */
114 1 equemene
   jb = PANEL->jb; PANEL->n -= jb; PANEL->ja += jb;
115 1 equemene
116 1 equemene
   if( ( PANEL->grid->mycol != PANEL->pcol ) || ( jb <= 0 ) ) return;
117 1 equemene
#ifdef HPL_DETAILED_TIMING
118 1 equemene
   HPL_ptimer( HPL_TIMING_RPFACT );
119 1 equemene
#endif
120 1 equemene
   align = PANEL->algo->align;
121 1 equemene
   vptr  = (void *)malloc( ( (size_t)(align) +
122 1 equemene
              (size_t)(((4+((unsigned int)(jb) << 1)) << 1) )) *
123 1 equemene
              sizeof(double) );
124 1 equemene
   if( vptr == NULL )
125 1 equemene
   { HPL_pabort( __LINE__, "HPL_pdfact", "Memory allocation failed" ); }
126 1 equemene
/*
127 1 equemene
 * Factor the panel - Update the panel pointers
128 1 equemene
 */
129 1 equemene
   PANEL->algo->rffun( PANEL, PANEL->mp, jb, 0, (double *)HPL_PTR( vptr,
130 1 equemene
                       ((size_t)(align) * sizeof(double) ) ) );
131 1 equemene
   if( vptr ) free( vptr );
132 1 equemene
133 1 equemene
   PANEL->A   = Mptr( PANEL->A, 0, jb, PANEL->lda );
134 1 equemene
   PANEL->nq -= jb; PANEL->jj += jb;
135 1 equemene
#ifdef HPL_DETAILED_TIMING
136 1 equemene
   HPL_ptimer( HPL_TIMING_RPFACT );
137 1 equemene
#endif
138 1 equemene
/*
139 1 equemene
 * End of HPL_pdfact
140 1 equemene
 */
141 1 equemene
}