root / src / pfact / HPL_pdfact.c @ 7
Historique | Voir | Annoter | Télécharger (6,38 ko)
1 | 1 | equemene | /*
|
---|---|---|---|
2 | 1 | equemene | * -- High Performance Computing Linpack Benchmark (HPL)
|
3 | 1 | equemene | * HPL - 2.0 - September 10, 2008
|
4 | 1 | equemene | * Antoine P. Petitet
|
5 | 1 | equemene | * University of Tennessee, Knoxville
|
6 | 1 | equemene | * Innovative Computing Laboratory
|
7 | 1 | equemene | * (C) Copyright 2000-2008 All Rights Reserved
|
8 | 1 | equemene | *
|
9 | 1 | equemene | * -- Copyright notice and Licensing terms:
|
10 | 1 | equemene | *
|
11 | 1 | equemene | * Redistribution and use in source and binary forms, with or without
|
12 | 1 | equemene | * modification, are permitted provided that the following conditions
|
13 | 1 | equemene | * are met:
|
14 | 1 | equemene | *
|
15 | 1 | equemene | * 1. Redistributions of source code must retain the above copyright
|
16 | 1 | equemene | * notice, this list of conditions and the following disclaimer.
|
17 | 1 | equemene | *
|
18 | 1 | equemene | * 2. Redistributions in binary form must reproduce the above copyright
|
19 | 1 | equemene | * notice, this list of conditions, and the following disclaimer in the
|
20 | 1 | equemene | * documentation and/or other materials provided with the distribution.
|
21 | 1 | equemene | *
|
22 | 1 | equemene | * 3. All advertising materials mentioning features or use of this
|
23 | 1 | equemene | * software must display the following acknowledgement:
|
24 | 1 | equemene | * This product includes software developed at the University of
|
25 | 1 | equemene | * Tennessee, Knoxville, Innovative Computing Laboratory.
|
26 | 1 | equemene | *
|
27 | 1 | equemene | * 4. The name of the University, the name of the Laboratory, or the
|
28 | 1 | equemene | * names of its contributors may not be used to endorse or promote
|
29 | 1 | equemene | * products derived from this software without specific written
|
30 | 1 | equemene | * permission.
|
31 | 1 | equemene | *
|
32 | 1 | equemene | * -- Disclaimer:
|
33 | 1 | equemene | *
|
34 | 1 | equemene | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
35 | 1 | equemene | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
36 | 1 | equemene | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
37 | 1 | equemene | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
|
38 | 1 | equemene | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
39 | 1 | equemene | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
40 | 1 | equemene | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
41 | 1 | equemene | * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
42 | 1 | equemene | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
43 | 1 | equemene | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
44 | 1 | equemene | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
45 | 1 | equemene | * ---------------------------------------------------------------------
|
46 | 1 | equemene | */
|
47 | 1 | equemene | /*
|
48 | 1 | equemene | * Include files
|
49 | 1 | equemene | */
|
50 | 1 | equemene | #include "hpl.h" |
51 | 1 | equemene | |
52 | 1 | equemene | #ifdef STDC_HEADERS
|
53 | 1 | equemene | void HPL_pdfact
|
54 | 1 | equemene | ( |
55 | 1 | equemene | HPL_T_panel * PANEL |
56 | 1 | equemene | ) |
57 | 1 | equemene | #else
|
58 | 1 | equemene | void HPL_pdfact
|
59 | 1 | equemene | ( PANEL ) |
60 | 1 | equemene | HPL_T_panel * PANEL; |
61 | 1 | equemene | #endif
|
62 | 1 | equemene | { |
63 | 1 | equemene | /*
|
64 | 1 | equemene | * Purpose
|
65 | 1 | equemene | * =======
|
66 | 1 | equemene | *
|
67 | 1 | equemene | * HPL_pdfact recursively factorizes a 1-dimensional panel of columns.
|
68 | 1 | equemene | * The RPFACT function pointer specifies the recursive algorithm to be
|
69 | 1 | equemene | * used, either Crout, Left- or Right looking. NBMIN allows to vary the
|
70 | 1 | equemene | * recursive stopping criterium in terms of the number of columns in the
|
71 | 1 | equemene | * panel, and NDIV allow to specify the number of subpanels each panel
|
72 | 1 | equemene | * should be divided into. Usuallly a value of 2 will be chosen. Finally
|
73 | 1 | equemene | * PFACT is a function pointer specifying the non-recursive algorithm to
|
74 | 1 | equemene | * to be used on at most NBMIN columns. One can also choose here between
|
75 | 1 | equemene | * Crout, Left- or Right looking. Empirical tests seem to indicate that
|
76 | 1 | equemene | * values of 4 or 8 for NBMIN give the best results.
|
77 | 1 | equemene | *
|
78 | 1 | equemene | * Bi-directional exchange is used to perform the swap::broadcast
|
79 | 1 | equemene | * operations at once for one column in the panel. This results in a
|
80 | 1 | equemene | * lower number of slightly larger messages than usual. On P processes
|
81 | 1 | equemene | * and assuming bi-directional links, the running time of this function
|
82 | 1 | equemene | * can be approximated by (when N is equal to N0):
|
83 | 1 | equemene | *
|
84 | 1 | equemene | * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) +
|
85 | 1 | equemene | * N0^2 * ( M - N0/3 ) * gam2-3
|
86 | 1 | equemene | *
|
87 | 1 | equemene | * where M is the local number of rows of the panel, lat and bdwth are
|
88 | 1 | equemene | * the latency and bandwidth of the network for double precision real
|
89 | 1 | equemene | * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS
|
90 | 1 | equemene | * rate of execution. The recursive algorithm allows indeed to almost
|
91 | 1 | equemene | * achieve Level 3 BLAS performance in the panel factorization. On a
|
92 | 1 | equemene | * large number of modern machines, this operation is however latency
|
93 | 1 | equemene | * bound, meaning that its cost can be estimated by only the latency
|
94 | 1 | equemene | * portion N0 * log_2(P) * lat. Mono-directional links will double this
|
95 | 1 | equemene | * communication cost.
|
96 | 1 | equemene | *
|
97 | 1 | equemene | * Arguments
|
98 | 1 | equemene | * =========
|
99 | 1 | equemene | *
|
100 | 1 | equemene | * PANEL (local input/output) HPL_T_panel *
|
101 | 1 | equemene | * On entry, PANEL points to the data structure containing the
|
102 | 1 | equemene | * panel information.
|
103 | 1 | equemene | *
|
104 | 1 | equemene | * ---------------------------------------------------------------------
|
105 | 1 | equemene | */
|
106 | 1 | equemene | /*
|
107 | 1 | equemene | * .. Local Variables ..
|
108 | 1 | equemene | */
|
109 | 1 | equemene | void * vptr = NULL; |
110 | 1 | equemene | int align, jb;
|
111 | 1 | equemene | /* ..
|
112 | 1 | equemene | * .. Executable Statements ..
|
113 | 1 | equemene | */
|
114 | 1 | equemene | jb = PANEL->jb; PANEL->n -= jb; PANEL->ja += jb; |
115 | 1 | equemene | |
116 | 1 | equemene | if( ( PANEL->grid->mycol != PANEL->pcol ) || ( jb <= 0 ) ) return; |
117 | 1 | equemene | #ifdef HPL_DETAILED_TIMING
|
118 | 1 | equemene | HPL_ptimer( HPL_TIMING_RPFACT ); |
119 | 1 | equemene | #endif
|
120 | 1 | equemene | align = PANEL->algo->align; |
121 | 1 | equemene | vptr = (void *)malloc( ( (size_t)(align) +
|
122 | 1 | equemene | (size_t)(((4+((unsigned int)(jb) << 1)) << 1) )) * |
123 | 1 | equemene | sizeof(double) ); |
124 | 1 | equemene | if( vptr == NULL ) |
125 | 1 | equemene | { HPL_pabort( __LINE__, "HPL_pdfact", "Memory allocation failed" ); } |
126 | 1 | equemene | /*
|
127 | 1 | equemene | * Factor the panel - Update the panel pointers
|
128 | 1 | equemene | */
|
129 | 1 | equemene | PANEL->algo->rffun( PANEL, PANEL->mp, jb, 0, (double *)HPL_PTR( vptr, |
130 | 1 | equemene | ((size_t)(align) * sizeof(double) ) ) ); |
131 | 1 | equemene | if( vptr ) free( vptr );
|
132 | 1 | equemene | |
133 | 1 | equemene | PANEL->A = Mptr( PANEL->A, 0, jb, PANEL->lda );
|
134 | 1 | equemene | PANEL->nq -= jb; PANEL->jj += jb; |
135 | 1 | equemene | #ifdef HPL_DETAILED_TIMING
|
136 | 1 | equemene | HPL_ptimer( HPL_TIMING_RPFACT ); |
137 | 1 | equemene | #endif
|
138 | 1 | equemene | /*
|
139 | 1 | equemene | * End of HPL_pdfact
|
140 | 1 | equemene | */
|
141 | 1 | equemene | } |