root / src / blas / HPL_dtrsm.c @ 9
Historique | Voir | Annoter | Télécharger (32,94 ko)
1 |
/*
|
---|---|
2 |
* -- High Performance Computing Linpack Benchmark (HPL)
|
3 |
* HPL - 2.0 - September 10, 2008
|
4 |
* Antoine P. Petitet
|
5 |
* University of Tennessee, Knoxville
|
6 |
* Innovative Computing Laboratory
|
7 |
* (C) Copyright 2000-2008 All Rights Reserved
|
8 |
*
|
9 |
* -- Copyright notice and Licensing terms:
|
10 |
*
|
11 |
* Redistribution and use in source and binary forms, with or without
|
12 |
* modification, are permitted provided that the following conditions
|
13 |
* are met:
|
14 |
*
|
15 |
* 1. Redistributions of source code must retain the above copyright
|
16 |
* notice, this list of conditions and the following disclaimer.
|
17 |
*
|
18 |
* 2. Redistributions in binary form must reproduce the above copyright
|
19 |
* notice, this list of conditions, and the following disclaimer in the
|
20 |
* documentation and/or other materials provided with the distribution.
|
21 |
*
|
22 |
* 3. All advertising materials mentioning features or use of this
|
23 |
* software must display the following acknowledgement:
|
24 |
* This product includes software developed at the University of
|
25 |
* Tennessee, Knoxville, Innovative Computing Laboratory.
|
26 |
*
|
27 |
* 4. The name of the University, the name of the Laboratory, or the
|
28 |
* names of its contributors may not be used to endorse or promote
|
29 |
* products derived from this software without specific written
|
30 |
* permission.
|
31 |
*
|
32 |
* -- Disclaimer:
|
33 |
*
|
34 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
35 |
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
36 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
37 |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
|
38 |
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
39 |
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
40 |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
41 |
* DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
42 |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
43 |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
44 |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
45 |
* ---------------------------------------------------------------------
|
46 |
*/
|
47 |
/*
|
48 |
* Include files
|
49 |
*/
|
50 |
#include "hpl.h" |
51 |
|
52 |
#ifndef HPL_dtrsm
|
53 |
|
54 |
#ifdef HPL_CALL_VSIPL
|
55 |
|
56 |
#ifdef STDC_HEADERS
|
57 |
static void HPL_dtrsmLLNN |
58 |
( |
59 |
const int M, |
60 |
const int N, |
61 |
const double ALPHA, |
62 |
const double * A, |
63 |
const int LDA, |
64 |
double * B,
|
65 |
const int LDB |
66 |
) |
67 |
#else
|
68 |
static void HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ) |
69 |
const int LDA, LDB, M, N; |
70 |
const double ALPHA; |
71 |
const double * A; |
72 |
double * B;
|
73 |
#endif
|
74 |
{ |
75 |
int i, iaik, ibij, ibkj, j, jak, jbj, k;
|
76 |
|
77 |
for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) |
78 |
{ |
79 |
for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } |
80 |
for( k = 0, jak = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 ) |
81 |
{ |
82 |
B[ibkj] /= A[k+jak]; |
83 |
for( i = k+1, iaik = k+1+jak, ibij = k+1+jbj; |
84 |
i < M; i++, iaik +=1, ibij += 1 ) |
85 |
{ B[ibij] -= B[ibkj] * A[iaik]; } |
86 |
} |
87 |
} |
88 |
} |
89 |
|
90 |
#ifdef STDC_HEADERS
|
91 |
static void HPL_dtrsmLLNU |
92 |
( |
93 |
const int M, |
94 |
const int N, |
95 |
const double ALPHA, |
96 |
const double * A, |
97 |
const int LDA, |
98 |
double * B,
|
99 |
const int LDB |
100 |
) |
101 |
#else
|
102 |
static void HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ) |
103 |
const int LDA, LDB, M, N; |
104 |
const double ALPHA; |
105 |
const double * A; |
106 |
double * B;
|
107 |
#endif
|
108 |
{ |
109 |
int i, iaik, ibij, ibkj, j, jak, jbj, k;
|
110 |
|
111 |
for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) |
112 |
{ |
113 |
for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } |
114 |
for( k = 0, jak = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 ) |
115 |
{ |
116 |
for( i = k+1, iaik = k+1+jak, ibij = k+1+jbj; |
117 |
i < M; i++, iaik +=1, ibij += 1 ) |
118 |
{ B[ibij] -= B[ibkj] * A[iaik]; } |
119 |
} |
120 |
} |
121 |
} |
122 |
|
123 |
#ifdef STDC_HEADERS
|
124 |
static void HPL_dtrsmLLTN |
125 |
( |
126 |
const int M, |
127 |
const int N, |
128 |
const double ALPHA, |
129 |
const double * A, |
130 |
const int LDA, |
131 |
double * B,
|
132 |
const int LDB |
133 |
) |
134 |
#else
|
135 |
static void HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ) |
136 |
const int LDA, LDB, M, N; |
137 |
const double ALPHA; |
138 |
const double * A; |
139 |
double * B;
|
140 |
#endif
|
141 |
{ |
142 |
register double t0; |
143 |
int i, iaki, ibij, ibkj, j, jai, jbj, k;
|
144 |
|
145 |
for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) |
146 |
{ |
147 |
for( i = M-1, jai = (M-1)*LDA, ibij = M-1+jbj; |
148 |
i >= 0; i--, jai -= LDA, ibij -= 1 ) |
149 |
{ |
150 |
t0 = ALPHA * B[ibij]; |
151 |
for( k = i+1, iaki = i+1+jai, ibkj = i+1+jbj; |
152 |
k < M; k++, iaki += 1, ibkj += 1 ) |
153 |
{ t0 -= A[iaki] * B[ibkj]; } |
154 |
t0 /= A[i+jai]; |
155 |
B[ibij] = t0; |
156 |
} |
157 |
} |
158 |
} |
159 |
|
160 |
#ifdef STDC_HEADERS
|
161 |
static void HPL_dtrsmLLTU |
162 |
( |
163 |
const int M, |
164 |
const int N, |
165 |
const double ALPHA, |
166 |
const double * A, |
167 |
const int LDA, |
168 |
double * B,
|
169 |
const int LDB |
170 |
) |
171 |
#else
|
172 |
static void HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ) |
173 |
const int LDA, LDB, M, N; |
174 |
const double ALPHA; |
175 |
const double * A; |
176 |
double * B;
|
177 |
#endif
|
178 |
{ |
179 |
register double t0; |
180 |
int i, iaki, ibij, ibkj, j, jai, jbj, k;
|
181 |
|
182 |
for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) |
183 |
{ |
184 |
for( i = M-1, jai = (M-1)*LDA, ibij = M-1+jbj; |
185 |
i >= 0; i--, jai -= LDA, ibij -= 1 ) |
186 |
{ |
187 |
t0 = ALPHA * B[ibij]; |
188 |
for( k = i+1, iaki = i+1+jai, ibkj = i+1+jbj; |
189 |
k < M; k++, iaki += 1, ibkj += 1 ) |
190 |
{ t0 -= A[iaki] * B[ibkj]; } |
191 |
B[ibij] = t0; |
192 |
} |
193 |
} |
194 |
} |
195 |
|
196 |
#ifdef STDC_HEADERS
|
197 |
static void HPL_dtrsmLUNN |
198 |
( |
199 |
const int M, |
200 |
const int N, |
201 |
const double ALPHA, |
202 |
const double * A, |
203 |
const int LDA, |
204 |
double * B,
|
205 |
const int LDB |
206 |
) |
207 |
#else
|
208 |
static void HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ) |
209 |
const int LDA, LDB, M, N; |
210 |
const double ALPHA; |
211 |
const double * A; |
212 |
double * B;
|
213 |
#endif
|
214 |
{ |
215 |
int i, iaik, ibij, ibkj, j, jak, jbj, k;
|
216 |
|
217 |
for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) |
218 |
{ |
219 |
for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } |
220 |
for( k = M-1, jak = (M-1)*LDA, ibkj = M-1+jbj; |
221 |
k >= 0; k--, jak -= LDA, ibkj -= 1 ) |
222 |
{ |
223 |
B[ibkj] /= A[k+jak]; |
224 |
for( i = 0, iaik = jak, ibij = jbj; |
225 |
i < k; i++, iaik += 1, ibij += 1 ) |
226 |
{ B[ibij] -= B[ibkj] * A[iaik]; } |
227 |
} |
228 |
} |
229 |
} |
230 |
|
231 |
|
232 |
#ifdef STDC_HEADERS
|
233 |
static void HPL_dtrsmLUNU |
234 |
( |
235 |
const int M, |
236 |
const int N, |
237 |
const double ALPHA, |
238 |
const double * A, |
239 |
const int LDA, |
240 |
double * B,
|
241 |
const int LDB |
242 |
) |
243 |
#else
|
244 |
static void HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ) |
245 |
const int LDA, LDB, M, N; |
246 |
const double ALPHA; |
247 |
const double * A; |
248 |
double * B;
|
249 |
#endif
|
250 |
{ |
251 |
int i, iaik, ibij, ibkj, j, jak, jbj, k;
|
252 |
|
253 |
for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) |
254 |
{ |
255 |
for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } |
256 |
for( k = M-1, jak = (M-1)*LDA, ibkj = M-1+jbj; |
257 |
k >= 0; k--, jak -= LDA, ibkj -= 1 ) |
258 |
{ |
259 |
for( i = 0, iaik = jak, ibij = jbj; |
260 |
i < k; i++, iaik += 1, ibij += 1 ) |
261 |
{ B[ibij] -= B[ibkj] * A[iaik]; } |
262 |
} |
263 |
} |
264 |
} |
265 |
|
266 |
|
267 |
#ifdef STDC_HEADERS
|
268 |
static void HPL_dtrsmLUTN |
269 |
( |
270 |
const int M, |
271 |
const int N, |
272 |
const double ALPHA, |
273 |
const double * A, |
274 |
const int LDA, |
275 |
double * B,
|
276 |
const int LDB |
277 |
) |
278 |
#else
|
279 |
static void HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ) |
280 |
const int LDA, LDB, M, N; |
281 |
const double ALPHA; |
282 |
const double * A; |
283 |
double * B;
|
284 |
#endif
|
285 |
{ |
286 |
int i, iaki, ibij, ibkj, j, jai, jbj, k;
|
287 |
register double t0; |
288 |
|
289 |
for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) |
290 |
{ |
291 |
for( i = 0, jai = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 ) |
292 |
{ |
293 |
t0 = ALPHA * B[ibij]; |
294 |
for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 ) |
295 |
{ t0 -= A[iaki] * B[ibkj]; } |
296 |
t0 /= A[i+jai]; |
297 |
B[ibij] = t0; |
298 |
} |
299 |
} |
300 |
} |
301 |
|
302 |
|
303 |
#ifdef STDC_HEADERS
|
304 |
static void HPL_dtrsmLUTU |
305 |
( |
306 |
const int M, |
307 |
const int N, |
308 |
const double ALPHA, |
309 |
const double * A, |
310 |
const int LDA, |
311 |
double * B,
|
312 |
const int LDB |
313 |
) |
314 |
#else
|
315 |
static void HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ) |
316 |
const int LDA, LDB, M, N; |
317 |
const double ALPHA; |
318 |
const double * A; |
319 |
double * B;
|
320 |
#endif
|
321 |
{ |
322 |
register double t0; |
323 |
int i, iaki, ibij, ibkj, j, jai, jbj, k;
|
324 |
|
325 |
for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) |
326 |
{ |
327 |
for( i = 0, jai = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 ) |
328 |
{ |
329 |
t0 = ALPHA * B[ibij]; |
330 |
for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 ) |
331 |
{ t0 -= A[iaki] * B[ibkj]; } |
332 |
B[ibij] = t0; |
333 |
} |
334 |
} |
335 |
} |
336 |
|
337 |
|
338 |
#ifdef STDC_HEADERS
|
339 |
static void HPL_dtrsmRLNN |
340 |
( |
341 |
const int M, |
342 |
const int N, |
343 |
const double ALPHA, |
344 |
const double * A, |
345 |
const int LDA, |
346 |
double * B,
|
347 |
const int LDB |
348 |
) |
349 |
#else
|
350 |
static void HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ) |
351 |
const int LDA, LDB, M, N; |
352 |
const double ALPHA; |
353 |
const double * A; |
354 |
double * B;
|
355 |
#endif
|
356 |
{ |
357 |
int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
|
358 |
|
359 |
for( j = N-1, jaj = (N-1)*LDA, jbj = (N-1)*LDB; |
360 |
j >= 0; j--, jaj -= LDA, jbj -= LDB )
|
361 |
{ |
362 |
for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } |
363 |
for( k = j+1, iakj = j+1+jaj, jbk = (j+1)*LDB; |
364 |
k < N; k++, iakj += 1, jbk += LDB )
|
365 |
{ |
366 |
for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) |
367 |
{ B[ibij] -= A[iakj] * B[ibik]; } |
368 |
} |
369 |
for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; } |
370 |
} |
371 |
} |
372 |
|
373 |
|
374 |
#ifdef STDC_HEADERS
|
375 |
static void HPL_dtrsmRLNU |
376 |
( |
377 |
const int M, |
378 |
const int N, |
379 |
const double ALPHA, |
380 |
const double * A, |
381 |
const int LDA, |
382 |
double * B,
|
383 |
const int LDB |
384 |
) |
385 |
#else
|
386 |
static void HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ) |
387 |
const int LDA, LDB, M, N; |
388 |
const double ALPHA; |
389 |
const double * A; |
390 |
double * B;
|
391 |
#endif
|
392 |
{ |
393 |
int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
|
394 |
|
395 |
for( j = N-1, jaj = (N-1)*LDA, jbj = (N-1)*LDB; |
396 |
j >= 0; j--, jaj -= LDA, jbj -= LDB )
|
397 |
{ |
398 |
for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } |
399 |
for( k = j+1, iakj = j+1+jaj, jbk = (j+1)*LDB; |
400 |
k < N; k++, iakj += 1, jbk += LDB )
|
401 |
{ |
402 |
for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) |
403 |
{ B[ibij] -= A[iakj] * B[ibik]; } |
404 |
} |
405 |
} |
406 |
} |
407 |
|
408 |
|
409 |
#ifdef STDC_HEADERS
|
410 |
static void HPL_dtrsmRLTN |
411 |
( |
412 |
const int M, |
413 |
const int N, |
414 |
const double ALPHA, |
415 |
const double * A, |
416 |
const int LDA, |
417 |
double * B,
|
418 |
const int LDB |
419 |
) |
420 |
#else
|
421 |
static void HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ) |
422 |
const int LDA, LDB, M, N; |
423 |
const double ALPHA; |
424 |
const double * A; |
425 |
double * B;
|
426 |
#endif
|
427 |
{ |
428 |
register double t0; |
429 |
int i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
|
430 |
|
431 |
for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB ) |
432 |
{ |
433 |
for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; } |
434 |
for( j = k+1, iajk = (k+1)+jak, jbj = (k+1)*LDB; |
435 |
j < N; j++, iajk += 1, jbj += LDB )
|
436 |
{ |
437 |
t0 = A[iajk]; |
438 |
for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) |
439 |
{ B[ibij] -= t0 * B[ibik]; } |
440 |
} |
441 |
for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } |
442 |
} |
443 |
} |
444 |
|
445 |
|
446 |
#ifdef STDC_HEADERS
|
447 |
static void HPL_dtrsmRLTU |
448 |
( |
449 |
const int M, |
450 |
const int N, |
451 |
const double ALPHA, |
452 |
const double * A, |
453 |
const int LDA, |
454 |
double * B,
|
455 |
const int LDB |
456 |
) |
457 |
#else
|
458 |
static void HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ) |
459 |
const int LDA, LDB, M, N; |
460 |
const double ALPHA; |
461 |
const double * A; |
462 |
double * B;
|
463 |
#endif
|
464 |
{ |
465 |
register double t0; |
466 |
int i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
|
467 |
|
468 |
for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB ) |
469 |
{ |
470 |
for( j = k+1, iajk = (k+1)+jak, jbj = (k+1)*LDB; |
471 |
j < N; j++, iajk += 1, jbj += LDB )
|
472 |
{ |
473 |
t0 = A[iajk]; |
474 |
for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) |
475 |
{ B[ibij] -= t0 * B[ibik]; } |
476 |
} |
477 |
for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } |
478 |
} |
479 |
} |
480 |
|
481 |
|
482 |
#ifdef STDC_HEADERS
|
483 |
static void HPL_dtrsmRUNN |
484 |
( |
485 |
const int M, |
486 |
const int N, |
487 |
const double ALPHA, |
488 |
const double * A, |
489 |
const int LDA, |
490 |
double * B,
|
491 |
const int LDB |
492 |
) |
493 |
#else
|
494 |
static void HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ) |
495 |
const int LDA, LDB, M, N; |
496 |
const double ALPHA; |
497 |
const double * A; |
498 |
double * B;
|
499 |
#endif
|
500 |
{ |
501 |
int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
|
502 |
|
503 |
for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB ) |
504 |
{ |
505 |
for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } |
506 |
for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB ) |
507 |
{ |
508 |
for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) |
509 |
{ B[ibij] -= A[iakj] * B[ibik]; } |
510 |
} |
511 |
for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; } |
512 |
} |
513 |
} |
514 |
|
515 |
|
516 |
#ifdef STDC_HEADERS
|
517 |
static void HPL_dtrsmRUNU |
518 |
( |
519 |
const int M, |
520 |
const int N, |
521 |
const double ALPHA, |
522 |
const double * A, |
523 |
const int LDA, |
524 |
double * B,
|
525 |
const int LDB |
526 |
) |
527 |
#else
|
528 |
static void HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ) |
529 |
const int LDA, LDB, M, N; |
530 |
const double ALPHA; |
531 |
const double * A; |
532 |
double * B;
|
533 |
#endif
|
534 |
{ |
535 |
int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k;
|
536 |
|
537 |
for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB ) |
538 |
{ |
539 |
for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } |
540 |
for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB ) |
541 |
{ |
542 |
for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) |
543 |
{ B[ibij] -= A[iakj] * B[ibik]; } |
544 |
} |
545 |
} |
546 |
} |
547 |
|
548 |
|
549 |
#ifdef STDC_HEADERS
|
550 |
static void HPL_dtrsmRUTN |
551 |
( |
552 |
const int M, |
553 |
const int N, |
554 |
const double ALPHA, |
555 |
const double * A, |
556 |
const int LDA, |
557 |
double * B,
|
558 |
const int LDB |
559 |
) |
560 |
#else
|
561 |
static void HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ) |
562 |
const int LDA, LDB, M, N; |
563 |
const double ALPHA; |
564 |
const double * A; |
565 |
double * B;
|
566 |
#endif
|
567 |
{ |
568 |
register double t0; |
569 |
int i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
|
570 |
|
571 |
for( k = N-1, jak = (N-1)*LDA, jbk = (N-1)*LDB; |
572 |
k >= 0; k--, jak -= LDA, jbk -= LDB )
|
573 |
{ |
574 |
for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; } |
575 |
for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB ) |
576 |
{ |
577 |
t0 = A[iajk]; |
578 |
for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) |
579 |
{ B[ibij] -= t0 * B[ibik]; } |
580 |
} |
581 |
for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } |
582 |
} |
583 |
} |
584 |
|
585 |
#ifdef STDC_HEADERS
|
586 |
static void HPL_dtrsmRUTU |
587 |
( |
588 |
const int M, |
589 |
const int N, |
590 |
const double ALPHA, |
591 |
const double * A, |
592 |
const int LDA, |
593 |
double * B,
|
594 |
const int LDB |
595 |
) |
596 |
#else
|
597 |
static void HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ) |
598 |
const int LDA, LDB, M, N; |
599 |
const double ALPHA; |
600 |
const double * A; |
601 |
double * B;
|
602 |
#endif
|
603 |
{ |
604 |
register double t0; |
605 |
int i, iajk, ibij, ibik, j, jak, jbj, jbk, k;
|
606 |
|
607 |
for( k = N-1, jak = (N-1)*LDA, jbk = (N-1)*LDB; |
608 |
k >= 0; k--, jak -= LDA, jbk -= LDB )
|
609 |
{ |
610 |
for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB ) |
611 |
{ |
612 |
t0 = A[iajk]; |
613 |
for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) |
614 |
{ B[ibij] -= t0 * B[ibik]; } |
615 |
} |
616 |
for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } |
617 |
} |
618 |
} |
619 |
|
620 |
#ifdef STDC_HEADERS
|
621 |
static void HPL_dtrsm0 |
622 |
( |
623 |
const enum HPL_SIDE SIDE, |
624 |
const enum HPL_UPLO UPLO, |
625 |
const enum HPL_TRANS TRANS, |
626 |
const enum HPL_DIAG DIAG, |
627 |
const int M, |
628 |
const int N, |
629 |
const double ALPHA, |
630 |
const double * A, |
631 |
const int LDA, |
632 |
double * B,
|
633 |
const int LDB |
634 |
) |
635 |
#else
|
636 |
static void HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ) |
637 |
const enum HPL_SIDE SIDE; |
638 |
const enum HPL_UPLO UPLO; |
639 |
const enum HPL_TRANS TRANS; |
640 |
const enum HPL_DIAG DIAG; |
641 |
const int LDA, LDB, M, N; |
642 |
const double ALPHA; |
643 |
const double * A; |
644 |
double * B;
|
645 |
#endif
|
646 |
{ |
647 |
int i, j;
|
648 |
|
649 |
if( ( M == 0 ) || ( N == 0 ) ) return; |
650 |
|
651 |
if( ALPHA == HPL_rzero )
|
652 |
{ |
653 |
for( j = 0; j < N; j++ ) |
654 |
{ for( i = 0; i < M; i++ ) *(B+i+j*LDB) = HPL_rzero; } |
655 |
return;
|
656 |
} |
657 |
|
658 |
if( SIDE == HplLeft )
|
659 |
{ |
660 |
if( UPLO == HplUpper )
|
661 |
{ |
662 |
if( TRANS == HplNoTrans )
|
663 |
{ |
664 |
if( DIAG == HplNonUnit )
|
665 |
{ HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ); } |
666 |
else { HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ); }
|
667 |
} |
668 |
else
|
669 |
{ |
670 |
if( DIAG == HplNonUnit )
|
671 |
{ HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ); } |
672 |
else { HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ); }
|
673 |
} |
674 |
} |
675 |
else
|
676 |
{ |
677 |
if( TRANS == HplNoTrans )
|
678 |
{ |
679 |
if( DIAG == HplNonUnit )
|
680 |
{ HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ); } |
681 |
else { HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ); }
|
682 |
} |
683 |
else
|
684 |
{ |
685 |
if( DIAG == HplNonUnit )
|
686 |
{ HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ); } |
687 |
else { HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ); }
|
688 |
} |
689 |
} |
690 |
} |
691 |
else
|
692 |
{ |
693 |
if( UPLO == HplUpper )
|
694 |
{ |
695 |
if( TRANS == HplNoTrans )
|
696 |
{ |
697 |
if( DIAG == HplNonUnit )
|
698 |
{ HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ); } |
699 |
else { HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ); }
|
700 |
} |
701 |
else
|
702 |
{ |
703 |
if( DIAG == HplNonUnit )
|
704 |
{ HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ); } |
705 |
else { HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ); }
|
706 |
} |
707 |
} |
708 |
else
|
709 |
{ |
710 |
if( TRANS == HplNoTrans )
|
711 |
{ |
712 |
if( DIAG == HplNonUnit )
|
713 |
{ HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ); } |
714 |
else { HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ); }
|
715 |
} |
716 |
else
|
717 |
{ |
718 |
if( DIAG == HplNonUnit )
|
719 |
{ HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ); } |
720 |
else { HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ); }
|
721 |
} |
722 |
} |
723 |
} |
724 |
} |
725 |
|
726 |
#endif
|
727 |
|
728 |
#ifdef STDC_HEADERS
|
729 |
void HPL_dtrsm
|
730 |
( |
731 |
const enum HPL_ORDER ORDER, |
732 |
const enum HPL_SIDE SIDE, |
733 |
const enum HPL_UPLO UPLO, |
734 |
const enum HPL_TRANS TRANS, |
735 |
const enum HPL_DIAG DIAG, |
736 |
const int M, |
737 |
const int N, |
738 |
const double ALPHA, |
739 |
const double * A, |
740 |
const int LDA, |
741 |
double * B,
|
742 |
const int LDB |
743 |
) |
744 |
#else
|
745 |
void HPL_dtrsm
|
746 |
( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ) |
747 |
const enum HPL_ORDER ORDER; |
748 |
const enum HPL_SIDE SIDE; |
749 |
const enum HPL_UPLO UPLO; |
750 |
const enum HPL_TRANS TRANS; |
751 |
const enum HPL_DIAG DIAG; |
752 |
const int M; |
753 |
const int N; |
754 |
const double ALPHA; |
755 |
const double * A; |
756 |
const int LDA; |
757 |
double * B;
|
758 |
const int LDB; |
759 |
#endif
|
760 |
{ |
761 |
/*
|
762 |
* Purpose
|
763 |
* =======
|
764 |
*
|
765 |
* HPL_dtrsm solves one of the matrix equations
|
766 |
*
|
767 |
* op( A ) * X = alpha * B, or X * op( A ) = alpha * B,
|
768 |
*
|
769 |
* where alpha is a scalar, X and B are m by n matrices, A is a unit, or
|
770 |
* non-unit, upper or lower triangular matrix and op(A) is one of
|
771 |
*
|
772 |
* op( A ) = A or op( A ) = A^T.
|
773 |
*
|
774 |
* The matrix X is overwritten on B.
|
775 |
*
|
776 |
* No test for singularity or near-singularity is included in this
|
777 |
* routine. Such tests must be performed before calling this routine.
|
778 |
*
|
779 |
* Arguments
|
780 |
* =========
|
781 |
*
|
782 |
* ORDER (local input) const enum HPL_ORDER
|
783 |
* On entry, ORDER specifies the storage format of the operands
|
784 |
* as follows:
|
785 |
* ORDER = HplRowMajor,
|
786 |
* ORDER = HplColumnMajor.
|
787 |
*
|
788 |
* SIDE (local input) const enum HPL_SIDE
|
789 |
* On entry, SIDE specifies whether op(A) appears on the left
|
790 |
* or right of X as follows:
|
791 |
* SIDE==HplLeft op( A ) * X = alpha * B,
|
792 |
* SIDE==HplRight X * op( A ) = alpha * B.
|
793 |
*
|
794 |
* UPLO (local input) const enum HPL_UPLO
|
795 |
* On entry, UPLO specifies whether the upper or lower
|
796 |
* triangular part of the array A is to be referenced. When
|
797 |
* UPLO==HplUpper, only the upper triangular part of A is to be
|
798 |
* referenced, otherwise only the lower triangular part of A is
|
799 |
* to be referenced.
|
800 |
*
|
801 |
* TRANS (local input) const enum HPL_TRANS
|
802 |
* On entry, TRANSA specifies the form of op(A) to be used in
|
803 |
* the matrix-matrix operation follows:
|
804 |
* TRANSA==HplNoTrans : op( A ) = A,
|
805 |
* TRANSA==HplTrans : op( A ) = A^T,
|
806 |
* TRANSA==HplConjTrans : op( A ) = A^T.
|
807 |
*
|
808 |
* DIAG (local input) const enum HPL_DIAG
|
809 |
* On entry, DIAG specifies whether A is unit triangular or
|
810 |
* not. When DIAG==HplUnit, A is assumed to be unit triangular,
|
811 |
* and otherwise, A is not assumed to be unit triangular.
|
812 |
*
|
813 |
* M (local input) const int
|
814 |
* On entry, M specifies the number of rows of the matrix B.
|
815 |
* M must be at least zero.
|
816 |
*
|
817 |
* N (local input) const int
|
818 |
* On entry, N specifies the number of columns of the matrix B.
|
819 |
* N must be at least zero.
|
820 |
*
|
821 |
* ALPHA (local input) const double
|
822 |
* On entry, ALPHA specifies the scalar alpha. When ALPHA is
|
823 |
* supplied as zero then the elements of the matrix B need not
|
824 |
* be set on input.
|
825 |
*
|
826 |
* A (local input) const double *
|
827 |
* On entry, A points to an array of size equal to or greater
|
828 |
* than LDA * k, where k is m when SIDE==HplLeft and is n
|
829 |
* otherwise. Before entry with UPLO==HplUpper, the leading
|
830 |
* k by k upper triangular part of the array A must contain the
|
831 |
* upper triangular matrix and the strictly lower triangular
|
832 |
* part of A is not referenced. When UPLO==HplLower on entry,
|
833 |
* the leading k by k lower triangular part of the array A must
|
834 |
* contain the lower triangular matrix and the strictly upper
|
835 |
* triangular part of A is not referenced.
|
836 |
*
|
837 |
* Note that when DIAG==HplUnit, the diagonal elements of A
|
838 |
* not referenced either, but are assumed to be unity.
|
839 |
*
|
840 |
* LDA (local input) const int
|
841 |
* On entry, LDA specifies the leading dimension of A as
|
842 |
* declared in the calling (sub) program. LDA must be at
|
843 |
* least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
|
844 |
*
|
845 |
* B (local input/output) double *
|
846 |
* On entry, B points to an array of size equal to or greater
|
847 |
* than LDB * n. Before entry, the leading m by n part of the
|
848 |
* array B must contain the matrix B, except when beta is zero,
|
849 |
* in which case B need not be set on entry. On exit, the array
|
850 |
* B is overwritten by the m by n solution matrix.
|
851 |
*
|
852 |
* LDB (local input) const int
|
853 |
* On entry, LDB specifies the leading dimension of B as
|
854 |
* declared in the calling (sub) program. LDB must be at
|
855 |
* least MAX(1,m).
|
856 |
*
|
857 |
* ---------------------------------------------------------------------
|
858 |
*/
|
859 |
#ifdef HPL_CALL_CBLAS
|
860 |
cblas_dtrsm( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ); |
861 |
#endif
|
862 |
#ifdef HPL_CALL_VSIPL
|
863 |
if( ORDER == HplColumnMajor )
|
864 |
{ |
865 |
HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ); |
866 |
} |
867 |
else
|
868 |
{ |
869 |
HPL_dtrsm0( ( SIDE == HplRight ? HplLeft : HplRight ), |
870 |
( UPLO == HplLower ? HplUpper : HplLower ), |
871 |
TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB ); |
872 |
} |
873 |
#endif
|
874 |
|
875 |
#ifdef HPL_CALL_FBLAS
|
876 |
double alpha = ALPHA;
|
877 |
#ifdef StringSunStyle
|
878 |
#if defined( HPL_USE_F77_INTEGER_DEF )
|
879 |
F77_INTEGER IONE = 1;
|
880 |
#else
|
881 |
int IONE = 1; |
882 |
#endif
|
883 |
#endif
|
884 |
#ifdef StringStructVal
|
885 |
F77_CHAR fside; |
886 |
F77_CHAR fuplo; |
887 |
F77_CHAR ftran; |
888 |
F77_CHAR fdiag; |
889 |
#endif
|
890 |
#ifdef StringStructPtr
|
891 |
F77_CHAR fside; |
892 |
F77_CHAR fuplo; |
893 |
F77_CHAR ftran; |
894 |
F77_CHAR fdiag; |
895 |
#endif
|
896 |
#ifdef StringCrayStyle
|
897 |
F77_CHAR fside; |
898 |
F77_CHAR fuplo; |
899 |
F77_CHAR ftran; |
900 |
F77_CHAR fdiag; |
901 |
#endif
|
902 |
#ifdef HPL_USE_F77_INTEGER_DEF
|
903 |
const F77_INTEGER F77M = M, F77N = N,
|
904 |
F77lda = LDA, F77ldb = LDB; |
905 |
#else
|
906 |
#define F77M M
|
907 |
#define F77N N
|
908 |
#define F77lda LDA
|
909 |
#define F77ldb LDB
|
910 |
#endif
|
911 |
char cside, cuplo, ctran, cdiag;
|
912 |
|
913 |
if( TRANS == HplNoTrans ) ctran = 'N'; |
914 |
else if( TRANS == HplTrans ) ctran = 'T'; |
915 |
else ctran = 'C'; |
916 |
cdiag = ( DIAG == HplUnit ? 'U' : 'N' ); |
917 |
|
918 |
if( ORDER == HplColumnMajor )
|
919 |
{ |
920 |
cside = ( SIDE == HplRight ? 'R' : 'L' ); |
921 |
cuplo = ( UPLO == HplLower ? 'L' : 'U' ); |
922 |
#ifdef StringSunStyle
|
923 |
F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77M, &F77N, &alpha, |
924 |
A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE ); |
925 |
#endif
|
926 |
#ifdef StringCrayStyle
|
927 |
fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo ); |
928 |
ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); |
929 |
F77dtrsm( fside, fuplo, ftran, fdiag, &F77M, &F77N, &alpha, |
930 |
A, &F77lda, B, &F77ldb ); |
931 |
#endif
|
932 |
#ifdef StringStructVal
|
933 |
fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; |
934 |
ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; |
935 |
F77dtrsm( fside, fuplo, ftran, fdiag, &F77M, &F77N, &alpha, |
936 |
A, &F77lda, B, &F77ldb ); |
937 |
#endif
|
938 |
#ifdef StringStructPtr
|
939 |
fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; |
940 |
ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; |
941 |
F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77M, &F77N, &alpha, |
942 |
A, &F77lda, B, &F77ldb ); |
943 |
#endif
|
944 |
} |
945 |
else
|
946 |
{ |
947 |
cside = ( SIDE == HplRight ? 'L' : 'R' ); |
948 |
cuplo = ( UPLO == HplLower ? 'U' : 'L' ); |
949 |
#ifdef StringSunStyle
|
950 |
F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77N, &F77M, &alpha, |
951 |
A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE ); |
952 |
#endif
|
953 |
#ifdef StringCrayStyle
|
954 |
fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo ); |
955 |
ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); |
956 |
F77dtrsm( fside, fuplo, ftran, fdiag, &F77N, &F77M, &alpha, |
957 |
A, &F77lda, B, &F77ldb ); |
958 |
#endif
|
959 |
#ifdef StringStructVal
|
960 |
fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; |
961 |
ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; |
962 |
F77dtrsm( fside, fuplo, ftran, fdiag, &F77N, &F77M, &alpha, |
963 |
A, &F77lda, B, &F77ldb ); |
964 |
#endif
|
965 |
#ifdef StringStructPtr
|
966 |
fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; |
967 |
ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; |
968 |
F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77N, &F77M, &alpha, |
969 |
A, &F77lda, B, &F77ldb ); |
970 |
#endif
|
971 |
} |
972 |
#endif
|
973 |
|
974 |
#ifdef HPL_CALL_CUBLAS
|
975 |
double alpha = ALPHA;
|
976 |
|
977 |
int IONE = 1; |
978 |
|
979 |
#define CUBLASM M
|
980 |
#define CUBLASN N
|
981 |
#define CUBLASlda LDA
|
982 |
#define CUBLASldb LDB
|
983 |
|
984 |
char cside, cuplo, ctran, cdiag;
|
985 |
|
986 |
if( TRANS == HplNoTrans ) ctran = 'N'; |
987 |
else if( TRANS == HplTrans ) ctran = 'T'; |
988 |
else ctran = 'C'; |
989 |
cdiag = ( DIAG == HplUnit ? 'U' : 'N' ); |
990 |
|
991 |
if( ORDER == HplColumnMajor )
|
992 |
{ |
993 |
cside = ( SIDE == HplRight ? 'R' : 'L' ); |
994 |
cuplo = ( UPLO == HplLower ? 'L' : 'U' ); |
995 |
|
996 |
CUBLAS_DTRSM( &cside, &cuplo, &ctran, &cdiag, &CUBLASM, &CUBLASN, &alpha, |
997 |
A, &CUBLASlda, B, &CUBLASldb, &IONE, &IONE, &IONE, &IONE ); |
998 |
} |
999 |
else
|
1000 |
{ |
1001 |
cside = ( SIDE == HplRight ? 'L' : 'R' ); |
1002 |
cuplo = ( UPLO == HplLower ? 'U' : 'L' ); |
1003 |
|
1004 |
CUBLAS_DTRSM( &cside, &cuplo, &ctran, &cdiag, &CUBLASN, &CUBLASM, &alpha, |
1005 |
A, &CUBLASlda, B, &CUBLASldb, &IONE, &IONE, &IONE, &IONE ); |
1006 |
} |
1007 |
#endif
|
1008 |
/*
|
1009 |
* End of HPL_dtrsm
|
1010 |
*/
|
1011 |
} |
1012 |
|
1013 |
#endif
|