root / src / blas / HPL_dtrsm.c @ 9
Historique | Voir | Annoter | Télécharger (32,94 ko)
1 | 1 | equemene | /*
|
---|---|---|---|
2 | 1 | equemene | * -- High Performance Computing Linpack Benchmark (HPL)
|
3 | 1 | equemene | * HPL - 2.0 - September 10, 2008
|
4 | 1 | equemene | * Antoine P. Petitet
|
5 | 1 | equemene | * University of Tennessee, Knoxville
|
6 | 1 | equemene | * Innovative Computing Laboratory
|
7 | 1 | equemene | * (C) Copyright 2000-2008 All Rights Reserved
|
8 | 1 | equemene | *
|
9 | 1 | equemene | * -- Copyright notice and Licensing terms:
|
10 | 1 | equemene | *
|
11 | 1 | equemene | * Redistribution and use in source and binary forms, with or without
|
12 | 1 | equemene | * modification, are permitted provided that the following conditions
|
13 | 1 | equemene | * are met:
|
14 | 1 | equemene | *
|
15 | 1 | equemene | * 1. Redistributions of source code must retain the above copyright
|
16 | 1 | equemene | * notice, this list of conditions and the following disclaimer.
|
17 | 1 | equemene | *
|
18 | 1 | equemene | * 2. Redistributions in binary form must reproduce the above copyright
|
19 | 1 | equemene | * notice, this list of conditions, and the following disclaimer in the
|
20 | 1 | equemene | * documentation and/or other materials provided with the distribution.
|
21 | 1 | equemene | *
|
22 | 1 | equemene | * 3. All advertising materials mentioning features or use of this
|
23 | 1 | equemene | * software must display the following acknowledgement:
|
24 | 1 | equemene | * This product includes software developed at the University of
|
25 | 1 | equemene | * Tennessee, Knoxville, Innovative Computing Laboratory.
|
26 | 1 | equemene | *
|
27 | 1 | equemene | * 4. The name of the University, the name of the Laboratory, or the
|
28 | 1 | equemene | * names of its contributors may not be used to endorse or promote
|
29 | 1 | equemene | * products derived from this software without specific written
|
30 | 1 | equemene | * permission.
|
31 | 1 | equemene | *
|
32 | 1 | equemene | * -- Disclaimer:
|
33 | 1 | equemene | *
|
34 | 1 | equemene | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
35 | 1 | equemene | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
36 | 1 | equemene | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
37 | 1 | equemene | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
|
38 | 1 | equemene | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
39 | 1 | equemene | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
40 | 1 | equemene | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
41 | 1 | equemene | * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
42 | 1 | equemene | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
43 | 1 | equemene | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
44 | 1 | equemene | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
45 | 1 | equemene | * ---------------------------------------------------------------------
|
46 | 1 | equemene | */
|
47 | 1 | equemene | /*
|
48 | 1 | equemene | * Include files
|
49 | 1 | equemene | */
|
50 | 1 | equemene | #include "hpl.h" |
51 | 1 | equemene | |
52 | 1 | equemene | #ifndef HPL_dtrsm
|
53 | 1 | equemene | |
54 | 1 | equemene | #ifdef HPL_CALL_VSIPL
|
55 | 1 | equemene | |
56 | 1 | equemene | #ifdef STDC_HEADERS
|
57 | 1 | equemene | static void HPL_dtrsmLLNN |
58 | 1 | equemene | ( |
59 | 1 | equemene | const int M, |
60 | 1 | equemene | const int N, |
61 | 1 | equemene | const double ALPHA, |
62 | 1 | equemene | const double * A, |
63 | 1 | equemene | const int LDA, |
64 | 1 | equemene | double * B,
|
65 | 1 | equemene | const int LDB |
66 | 1 | equemene | ) |
67 | 1 | equemene | #else
|
68 | 1 | equemene | static void HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ) |
69 | 1 | equemene | const int LDA, LDB, M, N; |
70 | 1 | equemene | const double ALPHA; |
71 | 1 | equemene | const double * A; |
72 | 1 | equemene | double * B;
|
73 | 1 | equemene | #endif
|
74 | 1 | equemene | { |
75 | 1 | equemene | int i, iaik, ibij, ibkj, j, jak, jbj, k;
|
76 | 1 | equemene | |
77 | 1 | equemene | for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) |
78 | 1 | equemene | { |
79 | 1 | equemene | for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } |
80 | 1 | equemene | for( k = 0, jak = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 ) |
81 | 1 | equemene | { |
82 | 1 | equemene | B[ibkj] /= A[k+jak]; |
83 | 1 | equemene | for( i = k+1, iaik = k+1+jak, ibij = k+1+jbj; |
84 | 1 | equemene | i < M; i++, iaik +=1, ibij += 1 ) |
85 | 1 | equemene | { B[ibij] -= B[ibkj] * A[iaik]; } |
86 | 1 | equemene | } |
87 | 1 | equemene | } |
88 | 1 | equemene | } |
89 | 1 | equemene | |
90 | 1 | equemene | #ifdef STDC_HEADERS
|
91 | 1 | equemene | static void HPL_dtrsmLLNU |
92 | 1 | equemene | ( |
93 | 1 | equemene | const int M, |
94 | 1 | equemene | const int N, |
95 | 1 | equemene | const double ALPHA, |
96 | 1 | equemene | const double * A, |
97 | 1 | equemene | const int LDA, |
98 | 1 | equemene | double * B,
|
99 | 1 | equemene | const int LDB |
100 | 1 | equemene | ) |
101 | 1 | equemene | #else
|
102 | 1 | equemene | static void HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ) |
103 | 1 | equemene | const int LDA, LDB, M, N; |
104 | 1 | equemene | const double ALPHA; |
105 | 1 | equemene | const double * A; |
106 | 1 | equemene | double * B;
|
107 | 1 | equemene | #endif
|
108 | 1 | equemene | { |
109 | 1 | equemene | int i, iaik, ibij, ibkj, j, jak, jbj, k;
|
110 | 1 | equemene | |
111 | 1 | equemene | for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) |
112 | 1 | equemene | { |
113 | 1 | equemene | for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } |
114 | 1 | equemene | for( k = 0, jak = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 ) |
115 | 1 | equemene | { |
116 | 1 | equemene | for( i = k+1, iaik = k+1+jak, ibij = k+1+jbj; |
117 | 1 | equemene | i < M; i++, iaik +=1, ibij += 1 ) |
118 | 1 | equemene | { B[ibij] -= B[ibkj] * A[iaik]; } |
119 | 1 | equemene | } |
120 | 1 | equemene | } |
121 | 1 | equemene | } |
122 | 1 | equemene | |
123 | 1 | equemene | #ifdef STDC_HEADERS
|
124 | 1 | equemene | static void HPL_dtrsmLLTN |
125 | 1 | equemene | ( |
126 | 1 | equemene | const int M, |
127 | 1 | equemene | const int N, |
128 | 1 | equemene | const double ALPHA, |
129 | 1 | equemene | const double * A, |
130 | 1 | equemene | const int LDA, |
131 | 1 | equemene | double * B,
|
132 | 1 | equemene | const int LDB |
133 | 1 | equemene | ) |
134 | 1 | equemene | #else
|
135 | 1 | equemene | static void HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ) |
136 | 1 | equemene | const int LDA, LDB, M, N; |
137 | 1 | equemene | const double ALPHA; |
138 | 1 | equemene | const double * A; |
139 | 1 | equemene | double * B;
|
140 | 1 | equemene | #endif
|
141 | 1 | equemene | { |
142 | 1 | equemene | register double t0; |
143 | 1 | equemene | int i, iaki, ibij, ibkj, j, jai, jbj, k;
|
144 | 1 | equemene | |
145 | 1 | equemene | for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) |
146 | 1 | equemene | { |
147 | 1 | equemene | for( i = M-1, jai = (M-1)*LDA, ibij = M-1+jbj; |
148 | 1 | equemene | i >= 0; i--, jai -= LDA, ibij -= 1 ) |
149 | 1 | equemene | { |
150 | 1 | equemene | t0 = ALPHA * B[ibij]; |
151 | 1 | equemene | for( k = i+1, iaki = i+1+jai, ibkj = i+1+jbj; |
152 | 1 | equemene | k < M; k++, iaki += 1, ibkj += 1 ) |
153 | 1 | equemene | { t0 -= A[iaki] * B[ibkj]; } |
154 | 1 | equemene | t0 /= A[i+jai]; |
155 | 1 | equemene | B[ibij] = t0; |
156 | 1 | equemene | } |
157 | 1 | equemene | } |
158 | 1 | equemene | } |
159 | 1 | equemene | |
160 | 1 | equemene | #ifdef STDC_HEADERS
|
161 | 1 | equemene | static void HPL_dtrsmLLTU |
162 | 1 | equemene | ( |
163 | 1 | equemene | const int M, |
164 | 1 | equemene | const int N, |
165 | 1 | equemene | const double ALPHA, |
166 | 1 | equemene | const double * A, |
167 | 1 | equemene | const int LDA, |
168 | 1 | equemene | double * B,
|
169 | 1 | equemene | const int LDB |
170 | 1 | equemene | ) |
171 | 1 | equemene | #else
|
172 | 1 | equemene | static void HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ) |
173 | 1 | equemene | const int LDA, LDB, M, N; |
174 | 1 | equemene | const double ALPHA; |
175 | 1 | equemene | const double * A; |
176 | 1 | equemene | double * B;
|
177 | 1 | equemene | #endif
|
178 | 1 | equemene | { |
179 | 1 | equemene | register double t0; |
180 | 1 | equemene | int i, iaki, ibij, ibkj, j, jai, jbj, k;
|
181 | 1 | equemene | |
182 | 1 | equemene | for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) |
183 | 1 | equemene | { |
184 | 1 | equemene | for( i = M-1, jai = (M-1)*LDA, ibij = M-1+jbj; |
185 | 1 | equemene | i >= 0; i--, jai -= LDA, ibij -= 1 ) |
186 | 1 | equemene | { |
187 | 1 | equemene | t0 = ALPHA * B[ibij]; |
188 | 1 | equemene | for( k = i+1, iaki = i+1+jai, ibkj = i+1+jbj; |
189 | 1 | equemene | k < M; k++, iaki += 1, ibkj += 1 ) |
190 | 1 | equemene | { t0 -= A[iaki] * B[ibkj]; } |
191 | 1 | equemene | B[ibij] = t0; |
192 | 1 | equemene | } |
193 | 1 | equemene | } |
194 | 1 | equemene | } |
195 | 1 | equemene | |
196 | 1 | equemene | #ifdef STDC_HEADERS
|
197 | 1 | equemene | static void HPL_dtrsmLUNN |
198 | 1 | equemene | ( |
199 | 1 | equemene | const int M, |
200 | 1 | equemene | const int N, |
201 | 1 | equemene | const double ALPHA, |
202 | 1 | equemene | const double * A, |
203 | 1 | equemene | const int LDA, |
204 | 1 | equemene | double * B,
|
205 | 1 | equemene | const int LDB |
206 | 1 | equemene | ) |
207 | 1 | equemene | #else
|
208 | 1 | equemene | static void HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ) |
209 | 1 | equemene | const int LDA, LDB, M, N; |
210 | 1 | equemene | const double ALPHA; |
211 | 1 | equemene | const double * A; |
212 | 1 | equemene | double * B;
|
213 | 1 | equemene | #endif
|
214 | 1 | equemene | { |
215 | 1 | equemene | int i, iaik, ibij, ibkj, j, jak, jbj, k;
|
216 | 1 | equemene | |
217 | 1 | equemene | for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) |
218 | 1 | equemene | { |
219 | 1 | equemene | for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } |
220 | 1 | equemene | for( k = M-1, jak = (M-1)*LDA, ibkj = M-1+jbj; |
221 | 1 | equemene | k >= 0; k--, jak -= LDA, ibkj -= 1 ) |
222 | 1 | equemene | { |
223 | 1 | equemene | B[ibkj] /= A[k+jak]; |
224 | 1 | equemene | for( i = 0, iaik = jak, ibij = jbj; |
225 | 1 | equemene | i < k; i++, iaik += 1, ibij += 1 ) |
226 | 1 | equemene | { B[ibij] -= B[ibkj] * A[iaik]; } |
227 | 1 | equemene | } |
228 | 1 | equemene | } |
229 | 1 | equemene | } |
230 | 1 | equemene | |
231 | 1 | equemene | |
232 | 1 | equemene | #ifdef STDC_HEADERS
|
233 | 1 | equemene | static void HPL_dtrsmLUNU |
234 | 1 | equemene | ( |
235 | 1 | equemene | const int M, |
236 | 1 | equemene | const int N, |
237 | 1 | equemene | const double ALPHA, |
238 | 1 | equemene | const double * A, |
239 | 1 | equemene | const int LDA, |
240 | 1 | equemene | double * B,
|
241 | 1 | equemene | const int LDB |
242 | 1 | equemene | ) |
243 | 1 | equemene | #else
|
244 | 1 | equemene | static void HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ) |
245 | 1 | equemene | const int LDA, LDB, M, N; |
246 | 1 | equemene | const double ALPHA; |
247 | 1 | equemene | const double * A; |
248 | 1 | equemene | double * B;
|
249 | 1 | equemene | #endif
|
250 | 1 | equemene | { |
251 | 1 | equemene | int i, iaik, ibij, ibkj, j, jak, jbj, k;
|
252 | 1 | equemene | |
253 | 1 | equemene | for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) |
254 | 1 | equemene | { |
255 | 1 | equemene | for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } |
256 | 1 | equemene | for( k = M-1, jak = (M-1)*LDA, ibkj = M-1+jbj; |
257 | 1 | equemene | k >= 0; k--, jak -= LDA, ibkj -= 1 ) |
258 | 1 | equemene | { |
259 | 1 | equemene | for( i = 0, iaik = jak, ibij = jbj; |
260 | 1 | equemene | i < k; i++, iaik += 1, ibij += 1 ) |
261 | 1 | equemene | { B[ibij] -= B[ibkj] * A[iaik]; } |
262 | 1 | equemene | } |
263 | 1 | equemene | } |
264 | 1 | equemene | } |
265 | 1 | equemene | |
266 | 1 | equemene | |
267 | 1 | equemene | #ifdef STDC_HEADERS
|
268 | 1 | equemene | static void HPL_dtrsmLUTN |
269 | 1 | equemene | ( |
270 | 1 | equemene | const int M, |
271 | 1 | equemene | const int N, |
272 | 1 | equemene | const double ALPHA, |
273 | 1 | equemene | const double * A, |
274 | 1 | equemene | const int LDA, |
275 | 1 | equemene | double * B,
|
276 | 1 | equemene | const int LDB |
277 | 1 | equemene | ) |
278 | 1 | equemene | #else
|
279 | 1 | equemene | static void HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ) |
280 | 1 | equemene | const int LDA, LDB, M, N; |
281 | 1 | equemene | const double ALPHA; |
282 | 1 | equemene | const double * A; |
283 | 1 | equemene | double * B;
|
284 | 1 | equemene | #endif
|
285 | 1 | equemene | { |
286 | 1 | equemene | int i, iaki, ibij, ibkj, j, jai, jbj, k;
|
287 | 1 | equemene | register double t0; |
288 | 1 | equemene | |
289 | 1 | equemene | for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) |
290 | 1 | equemene | { |
291 | 1 | equemene | for( i = 0, jai = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 ) |
292 | 1 | equemene | { |
293 | 1 | equemene | t0 = ALPHA * B[ibij]; |
294 | 1 | equemene | for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 ) |
295 | 1 | equemene | { t0 -= A[iaki] * B[ibkj]; } |
296 | 1 | equemene | t0 /= A[i+jai]; |
297 | 1 | equemene | B[ibij] = t0; |
298 | 1 | equemene | } |
299 | 1 | equemene | } |
300 | 1 | equemene | } |
301 | 1 | equemene | |
302 | 1 | equemene | |
303 | 1 | equemene | #ifdef STDC_HEADERS
|
304 | 1 | equemene | static void HPL_dtrsmLUTU |
305 | 1 | equemene | ( |
306 | 1 | equemene | const int M, |
307 | 1 | equemene | const int N, |
308 | 1 | equemene | const double ALPHA, |
309 | 1 | equemene | const double * A, |
310 | 1 | equemene | const int LDA, |
311 | 1 | equemene | double * B,
|
312 | 1 | equemene | const int LDB |
313 | 1 | equemene | ) |
314 | 1 | equemene | #else
|
315 | 1 | equemene | static void HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ) |
316 | 1 | equemene | const int LDA, LDB, M, N; |
317 | 1 | equemene | const double ALPHA; |
318 | 1 | equemene | const double * A; |
319 | 1 | equemene | double * B;
|
320 | 1 | equemene | #endif
|
321 | 1 | equemene | { |
322 | 1 | equemene | register double t0; |
323 | 1 | equemene | int i, iaki, ibij, ibkj, j, jai, jbj, k;
|
324 | 1 | equemene | |
325 | 1 | equemene | for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) |
326 | 1 | equemene | { |
327 | 1 | equemene | for( i = 0, jai = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 ) |
328 | 1 | equemene | { |
329 | 1 | equemene | t0 = ALPHA * B[ibij]; |
330 | 1 | equemene | for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 ) |
331 | 1 | equemene | { t0 -= A[iaki] * B[ibkj]; } |
332 | 1 | equemene | B[ibij] = t0; |
333 | 1 | equemene | } |
334 | 1 | equemene | } |
335 | 1 | equemene | } |
336 | 1 | equemene | |
337 | 1 | equemene | |
338 | 1 | equemene | #ifdef STDC_HEADERS
|
339 | 1 | equemene | static void HPL_dtrsmRLNN |
340 | 1 | equemene | ( |
341 | 1 | equemene | const int M, |
342 | 1 | equemene | const int N, |
343 | 1 | equemene | const double ALPHA, |
344 | 1 | equemene | const double * A, |
345 | 1 | equemene | const int LDA, |
346 | 1 | equemene | double * B,
|
347 | 1 | equemene | const int LDB |
348 | 1 | equemene | ) |
349 | 1 | equemene | #else
|
350 | 1 | equemene | static void HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ) |
351 | 1 | equemene | const int LDA, LDB, M, N; |
352 | 1 | equemene | const double ALPHA; |
353 | 1 | equemene | const double * A; |
354 | 1 | equemene | double * B;
|
355 | 1 | equemene | #endif
|
356 | 1 | equemene | { |
357 | 1 | equemene | int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
|
358 | 1 | equemene | |
359 | 1 | equemene | for( j = N-1, jaj = (N-1)*LDA, jbj = (N-1)*LDB; |
360 | 1 | equemene | j >= 0; j--, jaj -= LDA, jbj -= LDB )
|
361 | 1 | equemene | { |
362 | 1 | equemene | for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } |
363 | 1 | equemene | for( k = j+1, iakj = j+1+jaj, jbk = (j+1)*LDB; |
364 | 1 | equemene | k < N; k++, iakj += 1, jbk += LDB )
|
365 | 1 | equemene | { |
366 | 1 | equemene | for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) |
367 | 1 | equemene | { B[ibij] -= A[iakj] * B[ibik]; } |
368 | 1 | equemene | } |
369 | 1 | equemene | for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; } |
370 | 1 | equemene | } |
371 | 1 | equemene | } |
372 | 1 | equemene | |
373 | 1 | equemene | |
374 | 1 | equemene | #ifdef STDC_HEADERS
|
375 | 1 | equemene | static void HPL_dtrsmRLNU |
376 | 1 | equemene | ( |
377 | 1 | equemene | const int M, |
378 | 1 | equemene | const int N, |
379 | 1 | equemene | const double ALPHA, |
380 | 1 | equemene | const double * A, |
381 | 1 | equemene | const int LDA, |
382 | 1 | equemene | double * B,
|
383 | 1 | equemene | const int LDB |
384 | 1 | equemene | ) |
385 | 1 | equemene | #else
|
386 | 1 | equemene | static void HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ) |
387 | 1 | equemene | const int LDA, LDB, M, N; |
388 | 1 | equemene | const double ALPHA; |
389 | 1 | equemene | const double * A; |
390 | 1 | equemene | double * B;
|
391 | 1 | equemene | #endif
|
392 | 1 | equemene | { |
393 | 1 | equemene | int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
|
394 | 1 | equemene | |
395 | 1 | equemene | for( j = N-1, jaj = (N-1)*LDA, jbj = (N-1)*LDB; |
396 | 1 | equemene | j >= 0; j--, jaj -= LDA, jbj -= LDB )
|
397 | 1 | equemene | { |
398 | 1 | equemene | for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } |
399 | 1 | equemene | for( k = j+1, iakj = j+1+jaj, jbk = (j+1)*LDB; |
400 | 1 | equemene | k < N; k++, iakj += 1, jbk += LDB )
|
401 | 1 | equemene | { |
402 | 1 | equemene | for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) |
403 | 1 | equemene | { B[ibij] -= A[iakj] * B[ibik]; } |
404 | 1 | equemene | } |
405 | 1 | equemene | } |
406 | 1 | equemene | } |
407 | 1 | equemene | |
408 | 1 | equemene | |
409 | 1 | equemene | #ifdef STDC_HEADERS
|
410 | 1 | equemene | static void HPL_dtrsmRLTN |
411 | 1 | equemene | ( |
412 | 1 | equemene | const int M, |
413 | 1 | equemene | const int N, |
414 | 1 | equemene | const double ALPHA, |
415 | 1 | equemene | const double * A, |
416 | 1 | equemene | const int LDA, |
417 | 1 | equemene | double * B,
|
418 | 1 | equemene | const int LDB |
419 | 1 | equemene | ) |
420 | 1 | equemene | #else
|
421 | 1 | equemene | static void HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ) |
422 | 1 | equemene | const int LDA, LDB, M, N; |
423 | 1 | equemene | const double ALPHA; |
424 | 1 | equemene | const double * A; |
425 | 1 | equemene | double * B;
|
426 | 1 | equemene | #endif
|
427 | 1 | equemene | { |
428 | 1 | equemene | register double t0; |
429 | 1 | equemene | int i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
|
430 | 1 | equemene | |
431 | 1 | equemene | for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB ) |
432 | 1 | equemene | { |
433 | 1 | equemene | for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; } |
434 | 1 | equemene | for( j = k+1, iajk = (k+1)+jak, jbj = (k+1)*LDB; |
435 | 1 | equemene | j < N; j++, iajk += 1, jbj += LDB )
|
436 | 1 | equemene | { |
437 | 1 | equemene | t0 = A[iajk]; |
438 | 1 | equemene | for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) |
439 | 1 | equemene | { B[ibij] -= t0 * B[ibik]; } |
440 | 1 | equemene | } |
441 | 1 | equemene | for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } |
442 | 1 | equemene | } |
443 | 1 | equemene | } |
444 | 1 | equemene | |
445 | 1 | equemene | |
446 | 1 | equemene | #ifdef STDC_HEADERS
|
447 | 1 | equemene | static void HPL_dtrsmRLTU |
448 | 1 | equemene | ( |
449 | 1 | equemene | const int M, |
450 | 1 | equemene | const int N, |
451 | 1 | equemene | const double ALPHA, |
452 | 1 | equemene | const double * A, |
453 | 1 | equemene | const int LDA, |
454 | 1 | equemene | double * B,
|
455 | 1 | equemene | const int LDB |
456 | 1 | equemene | ) |
457 | 1 | equemene | #else
|
458 | 1 | equemene | static void HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ) |
459 | 1 | equemene | const int LDA, LDB, M, N; |
460 | 1 | equemene | const double ALPHA; |
461 | 1 | equemene | const double * A; |
462 | 1 | equemene | double * B;
|
463 | 1 | equemene | #endif
|
464 | 1 | equemene | { |
465 | 1 | equemene | register double t0; |
466 | 1 | equemene | int i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
|
467 | 1 | equemene | |
468 | 1 | equemene | for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB ) |
469 | 1 | equemene | { |
470 | 1 | equemene | for( j = k+1, iajk = (k+1)+jak, jbj = (k+1)*LDB; |
471 | 1 | equemene | j < N; j++, iajk += 1, jbj += LDB )
|
472 | 1 | equemene | { |
473 | 1 | equemene | t0 = A[iajk]; |
474 | 1 | equemene | for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) |
475 | 1 | equemene | { B[ibij] -= t0 * B[ibik]; } |
476 | 1 | equemene | } |
477 | 1 | equemene | for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } |
478 | 1 | equemene | } |
479 | 1 | equemene | } |
480 | 1 | equemene | |
481 | 1 | equemene | |
482 | 1 | equemene | #ifdef STDC_HEADERS
|
483 | 1 | equemene | static void HPL_dtrsmRUNN |
484 | 1 | equemene | ( |
485 | 1 | equemene | const int M, |
486 | 1 | equemene | const int N, |
487 | 1 | equemene | const double ALPHA, |
488 | 1 | equemene | const double * A, |
489 | 1 | equemene | const int LDA, |
490 | 1 | equemene | double * B,
|
491 | 1 | equemene | const int LDB |
492 | 1 | equemene | ) |
493 | 1 | equemene | #else
|
494 | 1 | equemene | static void HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ) |
495 | 1 | equemene | const int LDA, LDB, M, N; |
496 | 1 | equemene | const double ALPHA; |
497 | 1 | equemene | const double * A; |
498 | 1 | equemene | double * B;
|
499 | 1 | equemene | #endif
|
500 | 1 | equemene | { |
501 | 1 | equemene | int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
|
502 | 1 | equemene | |
503 | 1 | equemene | for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB ) |
504 | 1 | equemene | { |
505 | 1 | equemene | for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } |
506 | 1 | equemene | for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB ) |
507 | 1 | equemene | { |
508 | 1 | equemene | for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) |
509 | 1 | equemene | { B[ibij] -= A[iakj] * B[ibik]; } |
510 | 1 | equemene | } |
511 | 1 | equemene | for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; } |
512 | 1 | equemene | } |
513 | 1 | equemene | } |
514 | 1 | equemene | |
515 | 1 | equemene | |
516 | 1 | equemene | #ifdef STDC_HEADERS
|
517 | 1 | equemene | static void HPL_dtrsmRUNU |
518 | 1 | equemene | ( |
519 | 1 | equemene | const int M, |
520 | 1 | equemene | const int N, |
521 | 1 | equemene | const double ALPHA, |
522 | 1 | equemene | const double * A, |
523 | 1 | equemene | const int LDA, |
524 | 1 | equemene | double * B,
|
525 | 1 | equemene | const int LDB |
526 | 1 | equemene | ) |
527 | 1 | equemene | #else
|
528 | 1 | equemene | static void HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ) |
529 | 1 | equemene | const int LDA, LDB, M, N; |
530 | 1 | equemene | const double ALPHA; |
531 | 1 | equemene | const double * A; |
532 | 1 | equemene | double * B;
|
533 | 1 | equemene | #endif
|
534 | 1 | equemene | { |
535 | 1 | equemene | int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
|
536 | 1 | equemene | |
537 | 1 | equemene | for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB ) |
538 | 1 | equemene | { |
539 | 1 | equemene | for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } |
540 | 1 | equemene | for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB ) |
541 | 1 | equemene | { |
542 | 1 | equemene | for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) |
543 | 1 | equemene | { B[ibij] -= A[iakj] * B[ibik]; } |
544 | 1 | equemene | } |
545 | 1 | equemene | } |
546 | 1 | equemene | } |
547 | 1 | equemene | |
548 | 1 | equemene | |
549 | 1 | equemene | #ifdef STDC_HEADERS
|
550 | 1 | equemene | static void HPL_dtrsmRUTN |
551 | 1 | equemene | ( |
552 | 1 | equemene | const int M, |
553 | 1 | equemene | const int N, |
554 | 1 | equemene | const double ALPHA, |
555 | 1 | equemene | const double * A, |
556 | 1 | equemene | const int LDA, |
557 | 1 | equemene | double * B,
|
558 | 1 | equemene | const int LDB |
559 | 1 | equemene | ) |
560 | 1 | equemene | #else
|
561 | 1 | equemene | static void HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ) |
562 | 1 | equemene | const int LDA, LDB, M, N; |
563 | 1 | equemene | const double ALPHA; |
564 | 1 | equemene | const double * A; |
565 | 1 | equemene | double * B;
|
566 | 1 | equemene | #endif
|
567 | 1 | equemene | { |
568 | 1 | equemene | register double t0; |
569 | 1 | equemene | int i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
|
570 | 1 | equemene | |
571 | 1 | equemene | for( k = N-1, jak = (N-1)*LDA, jbk = (N-1)*LDB; |
572 | 1 | equemene | k >= 0; k--, jak -= LDA, jbk -= LDB )
|
573 | 1 | equemene | { |
574 | 1 | equemene | for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; } |
575 | 1 | equemene | for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB ) |
576 | 1 | equemene | { |
577 | 1 | equemene | t0 = A[iajk]; |
578 | 1 | equemene | for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) |
579 | 1 | equemene | { B[ibij] -= t0 * B[ibik]; } |
580 | 1 | equemene | } |
581 | 1 | equemene | for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } |
582 | 1 | equemene | } |
583 | 1 | equemene | } |
584 | 1 | equemene | |
585 | 1 | equemene | #ifdef STDC_HEADERS
|
586 | 1 | equemene | static void HPL_dtrsmRUTU |
587 | 1 | equemene | ( |
588 | 1 | equemene | const int M, |
589 | 1 | equemene | const int N, |
590 | 1 | equemene | const double ALPHA, |
591 | 1 | equemene | const double * A, |
592 | 1 | equemene | const int LDA, |
593 | 1 | equemene | double * B,
|
594 | 1 | equemene | const int LDB |
595 | 1 | equemene | ) |
596 | 1 | equemene | #else
|
597 | 1 | equemene | static void HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ) |
598 | 1 | equemene | const int LDA, LDB, M, N; |
599 | 1 | equemene | const double ALPHA; |
600 | 1 | equemene | const double * A; |
601 | 1 | equemene | double * B;
|
602 | 1 | equemene | #endif
|
603 | 1 | equemene | { |
604 | 1 | equemene | register double t0; |
605 | 1 | equemene | int i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
|
606 | 1 | equemene | |
607 | 1 | equemene | for( k = N-1, jak = (N-1)*LDA, jbk = (N-1)*LDB; |
608 | 1 | equemene | k >= 0; k--, jak -= LDA, jbk -= LDB )
|
609 | 1 | equemene | { |
610 | 1 | equemene | for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB ) |
611 | 1 | equemene | { |
612 | 1 | equemene | t0 = A[iajk]; |
613 | 1 | equemene | for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) |
614 | 1 | equemene | { B[ibij] -= t0 * B[ibik]; } |
615 | 1 | equemene | } |
616 | 1 | equemene | for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } |
617 | 1 | equemene | } |
618 | 1 | equemene | } |
619 | 1 | equemene | |
620 | 1 | equemene | #ifdef STDC_HEADERS
|
621 | 1 | equemene | static void HPL_dtrsm0 |
622 | 1 | equemene | ( |
623 | 1 | equemene | const enum HPL_SIDE SIDE, |
624 | 1 | equemene | const enum HPL_UPLO UPLO, |
625 | 1 | equemene | const enum HPL_TRANS TRANS, |
626 | 1 | equemene | const enum HPL_DIAG DIAG, |
627 | 1 | equemene | const int M, |
628 | 1 | equemene | const int N, |
629 | 1 | equemene | const double ALPHA, |
630 | 1 | equemene | const double * A, |
631 | 1 | equemene | const int LDA, |
632 | 1 | equemene | double * B,
|
633 | 1 | equemene | const int LDB |
634 | 1 | equemene | ) |
635 | 1 | equemene | #else
|
636 | 1 | equemene | static void HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ) |
637 | 1 | equemene | const enum HPL_SIDE SIDE; |
638 | 1 | equemene | const enum HPL_UPLO UPLO; |
639 | 1 | equemene | const enum HPL_TRANS TRANS; |
640 | 1 | equemene | const enum HPL_DIAG DIAG; |
641 | 1 | equemene | const int LDA, LDB, M, N; |
642 | 1 | equemene | const double ALPHA; |
643 | 1 | equemene | const double * A; |
644 | 1 | equemene | double * B;
|
645 | 1 | equemene | #endif
|
646 | 1 | equemene | { |
647 | 1 | equemene | int i, j;
|
648 | 1 | equemene | |
649 | 1 | equemene | if( ( M == 0 ) || ( N == 0 ) ) return; |
650 | 1 | equemene | |
651 | 1 | equemene | if( ALPHA == HPL_rzero )
|
652 | 1 | equemene | { |
653 | 1 | equemene | for( j = 0; j < N; j++ ) |
654 | 1 | equemene | { for( i = 0; i < M; i++ ) *(B+i+j*LDB) = HPL_rzero; } |
655 | 1 | equemene | return;
|
656 | 1 | equemene | } |
657 | 1 | equemene | |
658 | 1 | equemene | if( SIDE == HplLeft )
|
659 | 1 | equemene | { |
660 | 1 | equemene | if( UPLO == HplUpper )
|
661 | 1 | equemene | { |
662 | 1 | equemene | if( TRANS == HplNoTrans )
|
663 | 1 | equemene | { |
664 | 1 | equemene | if( DIAG == HplNonUnit )
|
665 | 1 | equemene | { HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ); } |
666 | 1 | equemene | else { HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ); }
|
667 | 1 | equemene | } |
668 | 1 | equemene | else
|
669 | 1 | equemene | { |
670 | 1 | equemene | if( DIAG == HplNonUnit )
|
671 | 1 | equemene | { HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ); } |
672 | 1 | equemene | else { HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ); }
|
673 | 1 | equemene | } |
674 | 1 | equemene | } |
675 | 1 | equemene | else
|
676 | 1 | equemene | { |
677 | 1 | equemene | if( TRANS == HplNoTrans )
|
678 | 1 | equemene | { |
679 | 1 | equemene | if( DIAG == HplNonUnit )
|
680 | 1 | equemene | { HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ); } |
681 | 1 | equemene | else { HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ); }
|
682 | 1 | equemene | } |
683 | 1 | equemene | else
|
684 | 1 | equemene | { |
685 | 1 | equemene | if( DIAG == HplNonUnit )
|
686 | 1 | equemene | { HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ); } |
687 | 1 | equemene | else { HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ); }
|
688 | 1 | equemene | } |
689 | 1 | equemene | } |
690 | 1 | equemene | } |
691 | 1 | equemene | else
|
692 | 1 | equemene | { |
693 | 1 | equemene | if( UPLO == HplUpper )
|
694 | 1 | equemene | { |
695 | 1 | equemene | if( TRANS == HplNoTrans )
|
696 | 1 | equemene | { |
697 | 1 | equemene | if( DIAG == HplNonUnit )
|
698 | 1 | equemene | { HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ); } |
699 | 1 | equemene | else { HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ); }
|
700 | 1 | equemene | } |
701 | 1 | equemene | else
|
702 | 1 | equemene | { |
703 | 1 | equemene | if( DIAG == HplNonUnit )
|
704 | 1 | equemene | { HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ); } |
705 | 1 | equemene | else { HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ); }
|
706 | 1 | equemene | } |
707 | 1 | equemene | } |
708 | 1 | equemene | else
|
709 | 1 | equemene | { |
710 | 1 | equemene | if( TRANS == HplNoTrans )
|
711 | 1 | equemene | { |
712 | 1 | equemene | if( DIAG == HplNonUnit )
|
713 | 1 | equemene | { HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ); } |
714 | 1 | equemene | else { HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ); }
|
715 | 1 | equemene | } |
716 | 1 | equemene | else
|
717 | 1 | equemene | { |
718 | 1 | equemene | if( DIAG == HplNonUnit )
|
719 | 1 | equemene | { HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ); } |
720 | 1 | equemene | else { HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ); }
|
721 | 1 | equemene | } |
722 | 1 | equemene | } |
723 | 1 | equemene | } |
724 | 1 | equemene | } |
725 | 1 | equemene | |
726 | 1 | equemene | #endif
|
727 | 1 | equemene | |
728 | 1 | equemene | #ifdef STDC_HEADERS
|
729 | 1 | equemene | void HPL_dtrsm
|
730 | 1 | equemene | ( |
731 | 1 | equemene | const enum HPL_ORDER ORDER, |
732 | 1 | equemene | const enum HPL_SIDE SIDE, |
733 | 1 | equemene | const enum HPL_UPLO UPLO, |
734 | 1 | equemene | const enum HPL_TRANS TRANS, |
735 | 1 | equemene | const enum HPL_DIAG DIAG, |
736 | 1 | equemene | const int M, |
737 | 1 | equemene | const int N, |
738 | 1 | equemene | const double ALPHA, |
739 | 1 | equemene | const double * A, |
740 | 1 | equemene | const int LDA, |
741 | 1 | equemene | double * B,
|
742 | 1 | equemene | const int LDB |
743 | 1 | equemene | ) |
744 | 1 | equemene | #else
|
745 | 1 | equemene | void HPL_dtrsm
|
746 | 1 | equemene | ( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ) |
747 | 1 | equemene | const enum HPL_ORDER ORDER; |
748 | 1 | equemene | const enum HPL_SIDE SIDE; |
749 | 1 | equemene | const enum HPL_UPLO UPLO; |
750 | 1 | equemene | const enum HPL_TRANS TRANS; |
751 | 1 | equemene | const enum HPL_DIAG DIAG; |
752 | 1 | equemene | const int M; |
753 | 1 | equemene | const int N; |
754 | 1 | equemene | const double ALPHA; |
755 | 1 | equemene | const double * A; |
756 | 1 | equemene | const int LDA; |
757 | 1 | equemene | double * B;
|
758 | 1 | equemene | const int LDB; |
759 | 1 | equemene | #endif
|
760 | 1 | equemene | { |
761 | 1 | equemene | /*
|
762 | 1 | equemene | * Purpose
|
763 | 1 | equemene | * =======
|
764 | 1 | equemene | *
|
765 | 1 | equemene | * HPL_dtrsm solves one of the matrix equations
|
766 | 1 | equemene | *
|
767 | 1 | equemene | * op( A ) * X = alpha * B, or X * op( A ) = alpha * B,
|
768 | 1 | equemene | *
|
769 | 1 | equemene | * where alpha is a scalar, X and B are m by n matrices, A is a unit, or
|
770 | 1 | equemene | * non-unit, upper or lower triangular matrix and op(A) is one of
|
771 | 1 | equemene | *
|
772 | 1 | equemene | * op( A ) = A or op( A ) = A^T.
|
773 | 1 | equemene | *
|
774 | 1 | equemene | * The matrix X is overwritten on B.
|
775 | 1 | equemene | *
|
776 | 1 | equemene | * No test for singularity or near-singularity is included in this
|
777 | 1 | equemene | * routine. Such tests must be performed before calling this routine.
|
778 | 1 | equemene | *
|
779 | 1 | equemene | * Arguments
|
780 | 1 | equemene | * =========
|
781 | 1 | equemene | *
|
782 | 1 | equemene | * ORDER (local input) const enum HPL_ORDER
|
783 | 1 | equemene | * On entry, ORDER specifies the storage format of the operands
|
784 | 1 | equemene | * as follows:
|
785 | 1 | equemene | * ORDER = HplRowMajor,
|
786 | 1 | equemene | * ORDER = HplColumnMajor.
|
787 | 1 | equemene | *
|
788 | 1 | equemene | * SIDE (local input) const enum HPL_SIDE
|
789 | 1 | equemene | * On entry, SIDE specifies whether op(A) appears on the left
|
790 | 1 | equemene | * or right of X as follows:
|
791 | 1 | equemene | * SIDE==HplLeft op( A ) * X = alpha * B,
|
792 | 1 | equemene | * SIDE==HplRight X * op( A ) = alpha * B.
|
793 | 1 | equemene | *
|
794 | 1 | equemene | * UPLO (local input) const enum HPL_UPLO
|
795 | 1 | equemene | * On entry, UPLO specifies whether the upper or lower
|
796 | 1 | equemene | * triangular part of the array A is to be referenced. When
|
797 | 1 | equemene | * UPLO==HplUpper, only the upper triangular part of A is to be
|
798 | 1 | equemene | * referenced, otherwise only the lower triangular part of A is
|
799 | 1 | equemene | * to be referenced.
|
800 | 1 | equemene | *
|
801 | 1 | equemene | * TRANS (local input) const enum HPL_TRANS
|
802 | 1 | equemene | * On entry, TRANSA specifies the form of op(A) to be used in
|
803 | 1 | equemene | * the matrix-matrix operation follows:
|
804 | 1 | equemene | * TRANSA==HplNoTrans : op( A ) = A,
|
805 | 1 | equemene | * TRANSA==HplTrans : op( A ) = A^T,
|
806 | 1 | equemene | * TRANSA==HplConjTrans : op( A ) = A^T.
|
807 | 1 | equemene | *
|
808 | 1 | equemene | * DIAG (local input) const enum HPL_DIAG
|
809 | 1 | equemene | * On entry, DIAG specifies whether A is unit triangular or
|
810 | 1 | equemene | * not. When DIAG==HplUnit, A is assumed to be unit triangular,
|
811 | 1 | equemene | * and otherwise, A is not assumed to be unit triangular.
|
812 | 1 | equemene | *
|
813 | 1 | equemene | * M (local input) const int
|
814 | 1 | equemene | * On entry, M specifies the number of rows of the matrix B.
|
815 | 1 | equemene | * M must be at least zero.
|
816 | 1 | equemene | *
|
817 | 1 | equemene | * N (local input) const int
|
818 | 1 | equemene | * On entry, N specifies the number of columns of the matrix B.
|
819 | 1 | equemene | * N must be at least zero.
|
820 | 1 | equemene | *
|
821 | 1 | equemene | * ALPHA (local input) const double
|
822 | 1 | equemene | * On entry, ALPHA specifies the scalar alpha. When ALPHA is
|
823 | 1 | equemene | * supplied as zero then the elements of the matrix B need not
|
824 | 1 | equemene | * be set on input.
|
825 | 1 | equemene | *
|
826 | 1 | equemene | * A (local input) const double *
|
827 | 1 | equemene | * On entry, A points to an array of size equal to or greater
|
828 | 1 | equemene | * than LDA * k, where k is m when SIDE==HplLeft and is n
|
829 | 1 | equemene | * otherwise. Before entry with UPLO==HplUpper, the leading
|
830 | 1 | equemene | * k by k upper triangular part of the array A must contain the
|
831 | 1 | equemene | * upper triangular matrix and the strictly lower triangular
|
832 | 1 | equemene | * part of A is not referenced. When UPLO==HplLower on entry,
|
833 | 1 | equemene | * the leading k by k lower triangular part of the array A must
|
834 | 1 | equemene | * contain the lower triangular matrix and the strictly upper
|
835 | 1 | equemene | * triangular part of A is not referenced.
|
836 | 1 | equemene | *
|
837 | 1 | equemene | * Note that when DIAG==HplUnit, the diagonal elements of A
|
838 | 1 | equemene | * not referenced either, but are assumed to be unity.
|
839 | 1 | equemene | *
|
840 | 1 | equemene | * LDA (local input) const int
|
841 | 1 | equemene | * On entry, LDA specifies the leading dimension of A as
|
842 | 1 | equemene | * declared in the calling (sub) program. LDA must be at
|
843 | 1 | equemene | * least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
|
844 | 1 | equemene | *
|
845 | 1 | equemene | * B (local input/output) double *
|
846 | 1 | equemene | * On entry, B points to an array of size equal to or greater
|
847 | 1 | equemene | * than LDB * n. Before entry, the leading m by n part of the
|
848 | 1 | equemene | * array B must contain the matrix B, except when beta is zero,
|
849 | 1 | equemene | * in which case B need not be set on entry. On exit, the array
|
850 | 1 | equemene | * B is overwritten by the m by n solution matrix.
|
851 | 1 | equemene | *
|
852 | 1 | equemene | * LDB (local input) const int
|
853 | 1 | equemene | * On entry, LDB specifies the leading dimension of B as
|
854 | 1 | equemene | * declared in the calling (sub) program. LDB must be at
|
855 | 1 | equemene | * least MAX(1,m).
|
856 | 1 | equemene | *
|
857 | 1 | equemene | * ---------------------------------------------------------------------
|
858 | 1 | equemene | */
|
859 | 1 | equemene | #ifdef HPL_CALL_CBLAS
|
860 | 1 | equemene | cblas_dtrsm( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ); |
861 | 1 | equemene | #endif
|
862 | 1 | equemene | #ifdef HPL_CALL_VSIPL
|
863 | 1 | equemene | if( ORDER == HplColumnMajor )
|
864 | 1 | equemene | { |
865 | 1 | equemene | HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ); |
866 | 1 | equemene | } |
867 | 1 | equemene | else
|
868 | 1 | equemene | { |
869 | 1 | equemene | HPL_dtrsm0( ( SIDE == HplRight ? HplLeft : HplRight ), |
870 | 1 | equemene | ( UPLO == HplLower ? HplUpper : HplLower ), |
871 | 1 | equemene | TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB ); |
872 | 1 | equemene | } |
873 | 1 | equemene | #endif
|
874 | 9 | equemene | |
875 | 1 | equemene | #ifdef HPL_CALL_FBLAS
|
876 | 1 | equemene | double alpha = ALPHA;
|
877 | 1 | equemene | #ifdef StringSunStyle
|
878 | 1 | equemene | #if defined( HPL_USE_F77_INTEGER_DEF )
|
879 | 1 | equemene | F77_INTEGER IONE = 1;
|
880 | 1 | equemene | #else
|
881 | 1 | equemene | int IONE = 1; |
882 | 1 | equemene | #endif
|
883 | 1 | equemene | #endif
|
884 | 1 | equemene | #ifdef StringStructVal
|
885 | 1 | equemene | F77_CHAR fside; |
886 | 1 | equemene | F77_CHAR fuplo; |
887 | 1 | equemene | F77_CHAR ftran; |
888 | 1 | equemene | F77_CHAR fdiag; |
889 | 1 | equemene | #endif
|
890 | 1 | equemene | #ifdef StringStructPtr
|
891 | 1 | equemene | F77_CHAR fside; |
892 | 1 | equemene | F77_CHAR fuplo; |
893 | 1 | equemene | F77_CHAR ftran; |
894 | 1 | equemene | F77_CHAR fdiag; |
895 | 1 | equemene | #endif
|
896 | 1 | equemene | #ifdef StringCrayStyle
|
897 | 1 | equemene | F77_CHAR fside; |
898 | 1 | equemene | F77_CHAR fuplo; |
899 | 1 | equemene | F77_CHAR ftran; |
900 | 1 | equemene | F77_CHAR fdiag; |
901 | 1 | equemene | #endif
|
902 | 1 | equemene | #ifdef HPL_USE_F77_INTEGER_DEF
|
903 | 1 | equemene | const F77_INTEGER F77M = M, F77N = N,
|
904 | 1 | equemene | F77lda = LDA, F77ldb = LDB; |
905 | 1 | equemene | #else
|
906 | 1 | equemene | #define F77M M
|
907 | 1 | equemene | #define F77N N
|
908 | 1 | equemene | #define F77lda LDA
|
909 | 1 | equemene | #define F77ldb LDB
|
910 | 1 | equemene | #endif
|
911 | 1 | equemene | char cside, cuplo, ctran, cdiag;
|
912 | 1 | equemene | |
913 | 1 | equemene | if( TRANS == HplNoTrans ) ctran = 'N'; |
914 | 1 | equemene | else if( TRANS == HplTrans ) ctran = 'T'; |
915 | 1 | equemene | else ctran = 'C'; |
916 | 1 | equemene | cdiag = ( DIAG == HplUnit ? 'U' : 'N' ); |
917 | 1 | equemene | |
918 | 1 | equemene | if( ORDER == HplColumnMajor )
|
919 | 1 | equemene | { |
920 | 1 | equemene | cside = ( SIDE == HplRight ? 'R' : 'L' ); |
921 | 1 | equemene | cuplo = ( UPLO == HplLower ? 'L' : 'U' ); |
922 | 1 | equemene | #ifdef StringSunStyle
|
923 | 1 | equemene | F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77M, &F77N, &alpha, |
924 | 1 | equemene | A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE ); |
925 | 1 | equemene | #endif
|
926 | 1 | equemene | #ifdef StringCrayStyle
|
927 | 1 | equemene | fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo ); |
928 | 1 | equemene | ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); |
929 | 1 | equemene | F77dtrsm( fside, fuplo, ftran, fdiag, &F77M, &F77N, &alpha, |
930 | 1 | equemene | A, &F77lda, B, &F77ldb ); |
931 | 1 | equemene | #endif
|
932 | 1 | equemene | #ifdef StringStructVal
|
933 | 1 | equemene | fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; |
934 | 1 | equemene | ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; |
935 | 1 | equemene | F77dtrsm( fside, fuplo, ftran, fdiag, &F77M, &F77N, &alpha, |
936 | 1 | equemene | A, &F77lda, B, &F77ldb ); |
937 | 1 | equemene | #endif
|
938 | 1 | equemene | #ifdef StringStructPtr
|
939 | 1 | equemene | fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; |
940 | 1 | equemene | ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; |
941 | 1 | equemene | F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77M, &F77N, &alpha, |
942 | 1 | equemene | A, &F77lda, B, &F77ldb ); |
943 | 1 | equemene | #endif
|
944 | 1 | equemene | } |
945 | 1 | equemene | else
|
946 | 1 | equemene | { |
947 | 1 | equemene | cside = ( SIDE == HplRight ? 'L' : 'R' ); |
948 | 1 | equemene | cuplo = ( UPLO == HplLower ? 'U' : 'L' ); |
949 | 1 | equemene | #ifdef StringSunStyle
|
950 | 1 | equemene | F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77N, &F77M, &alpha, |
951 | 1 | equemene | A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE ); |
952 | 1 | equemene | #endif
|
953 | 1 | equemene | #ifdef StringCrayStyle
|
954 | 1 | equemene | fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo ); |
955 | 1 | equemene | ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); |
956 | 1 | equemene | F77dtrsm( fside, fuplo, ftran, fdiag, &F77N, &F77M, &alpha, |
957 | 1 | equemene | A, &F77lda, B, &F77ldb ); |
958 | 1 | equemene | #endif
|
959 | 1 | equemene | #ifdef StringStructVal
|
960 | 1 | equemene | fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; |
961 | 1 | equemene | ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; |
962 | 1 | equemene | F77dtrsm( fside, fuplo, ftran, fdiag, &F77N, &F77M, &alpha, |
963 | 1 | equemene | A, &F77lda, B, &F77ldb ); |
964 | 1 | equemene | #endif
|
965 | 1 | equemene | #ifdef StringStructPtr
|
966 | 1 | equemene | fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; |
967 | 1 | equemene | ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; |
968 | 1 | equemene | F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77N, &F77M, &alpha, |
969 | 1 | equemene | A, &F77lda, B, &F77ldb ); |
970 | 1 | equemene | #endif
|
971 | 1 | equemene | } |
972 | 1 | equemene | #endif
|
973 | 9 | equemene | |
974 | 9 | equemene | #ifdef HPL_CALL_CUBLAS
|
975 | 9 | equemene | double alpha = ALPHA;
|
976 | 9 | equemene | |
977 | 9 | equemene | int IONE = 1; |
978 | 9 | equemene | |
979 | 9 | equemene | #define CUBLASM M
|
980 | 9 | equemene | #define CUBLASN N
|
981 | 9 | equemene | #define CUBLASlda LDA
|
982 | 9 | equemene | #define CUBLASldb LDB
|
983 | 9 | equemene | |
984 | 9 | equemene | char cside, cuplo, ctran, cdiag;
|
985 | 9 | equemene | |
986 | 9 | equemene | if( TRANS == HplNoTrans ) ctran = 'N'; |
987 | 9 | equemene | else if( TRANS == HplTrans ) ctran = 'T'; |
988 | 9 | equemene | else ctran = 'C'; |
989 | 9 | equemene | cdiag = ( DIAG == HplUnit ? 'U' : 'N' ); |
990 | 9 | equemene | |
991 | 9 | equemene | if( ORDER == HplColumnMajor )
|
992 | 9 | equemene | { |
993 | 9 | equemene | cside = ( SIDE == HplRight ? 'R' : 'L' ); |
994 | 9 | equemene | cuplo = ( UPLO == HplLower ? 'L' : 'U' ); |
995 | 9 | equemene | |
996 | 9 | equemene | CUBLAS_DTRSM( &cside, &cuplo, &ctran, &cdiag, &CUBLASM, &CUBLASN, &alpha, |
997 | 9 | equemene | A, &CUBLASlda, B, &CUBLASldb, &IONE, &IONE, &IONE, &IONE ); |
998 | 9 | equemene | } |
999 | 9 | equemene | else
|
1000 | 9 | equemene | { |
1001 | 9 | equemene | cside = ( SIDE == HplRight ? 'L' : 'R' ); |
1002 | 9 | equemene | cuplo = ( UPLO == HplLower ? 'U' : 'L' ); |
1003 | 9 | equemene | |
1004 | 9 | equemene | CUBLAS_DTRSM( &cside, &cuplo, &ctran, &cdiag, &CUBLASN, &CUBLASM, &alpha, |
1005 | 9 | equemene | A, &CUBLASlda, B, &CUBLASldb, &IONE, &IONE, &IONE, &IONE ); |
1006 | 9 | equemene | } |
1007 | 9 | equemene | #endif
|
1008 | 1 | equemene | /*
|
1009 | 1 | equemene | * End of HPL_dtrsm
|
1010 | 1 | equemene | */
|
1011 | 1 | equemene | } |
1012 | 1 | equemene | |
1013 | 1 | equemene | #endif |