root / src / blas / dgemm.f @ 8
Historique | Voir | Annoter | Télécharger (9,44 ko)
1 | 1 | pfleura2 | SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) |
---|---|---|---|
2 | 1 | pfleura2 | * .. Scalar Arguments .. |
3 | 1 | pfleura2 | DOUBLE PRECISION ALPHA,BETA |
4 | 1 | pfleura2 | INTEGER K,LDA,LDB,LDC,M,N |
5 | 1 | pfleura2 | CHARACTER TRANSA,TRANSB |
6 | 1 | pfleura2 | * .. |
7 | 1 | pfleura2 | * .. Array Arguments .. |
8 | 1 | pfleura2 | DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) |
9 | 1 | pfleura2 | * .. |
10 | 1 | pfleura2 | * |
11 | 1 | pfleura2 | * Purpose |
12 | 1 | pfleura2 | * ======= |
13 | 1 | pfleura2 | * |
14 | 1 | pfleura2 | * DGEMM performs one of the matrix-matrix operations |
15 | 1 | pfleura2 | * |
16 | 1 | pfleura2 | * C := alpha*op( A )*op( B ) + beta*C, |
17 | 1 | pfleura2 | * |
18 | 1 | pfleura2 | * where op( X ) is one of |
19 | 1 | pfleura2 | * |
20 | 1 | pfleura2 | * op( X ) = X or op( X ) = X', |
21 | 1 | pfleura2 | * |
22 | 1 | pfleura2 | * alpha and beta are scalars, and A, B and C are matrices, with op( A ) |
23 | 1 | pfleura2 | * an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. |
24 | 1 | pfleura2 | * |
25 | 1 | pfleura2 | * Arguments |
26 | 1 | pfleura2 | * ========== |
27 | 1 | pfleura2 | * |
28 | 1 | pfleura2 | * TRANSA - CHARACTER*1. |
29 | 1 | pfleura2 | * On entry, TRANSA specifies the form of op( A ) to be used in |
30 | 1 | pfleura2 | * the matrix multiplication as follows: |
31 | 1 | pfleura2 | * |
32 | 1 | pfleura2 | * TRANSA = 'N' or 'n', op( A ) = A. |
33 | 1 | pfleura2 | * |
34 | 1 | pfleura2 | * TRANSA = 'T' or 't', op( A ) = A'. |
35 | 1 | pfleura2 | * |
36 | 1 | pfleura2 | * TRANSA = 'C' or 'c', op( A ) = A'. |
37 | 1 | pfleura2 | * |
38 | 1 | pfleura2 | * Unchanged on exit. |
39 | 1 | pfleura2 | * |
40 | 1 | pfleura2 | * TRANSB - CHARACTER*1. |
41 | 1 | pfleura2 | * On entry, TRANSB specifies the form of op( B ) to be used in |
42 | 1 | pfleura2 | * the matrix multiplication as follows: |
43 | 1 | pfleura2 | * |
44 | 1 | pfleura2 | * TRANSB = 'N' or 'n', op( B ) = B. |
45 | 1 | pfleura2 | * |
46 | 1 | pfleura2 | * TRANSB = 'T' or 't', op( B ) = B'. |
47 | 1 | pfleura2 | * |
48 | 1 | pfleura2 | * TRANSB = 'C' or 'c', op( B ) = B'. |
49 | 1 | pfleura2 | * |
50 | 1 | pfleura2 | * Unchanged on exit. |
51 | 1 | pfleura2 | * |
52 | 1 | pfleura2 | * M - INTEGER. |
53 | 1 | pfleura2 | * On entry, M specifies the number of rows of the matrix |
54 | 1 | pfleura2 | * op( A ) and of the matrix C. M must be at least zero. |
55 | 1 | pfleura2 | * Unchanged on exit. |
56 | 1 | pfleura2 | * |
57 | 1 | pfleura2 | * N - INTEGER. |
58 | 1 | pfleura2 | * On entry, N specifies the number of columns of the matrix |
59 | 1 | pfleura2 | * op( B ) and the number of columns of the matrix C. N must be |
60 | 1 | pfleura2 | * at least zero. |
61 | 1 | pfleura2 | * Unchanged on exit. |
62 | 1 | pfleura2 | * |
63 | 1 | pfleura2 | * K - INTEGER. |
64 | 1 | pfleura2 | * On entry, K specifies the number of columns of the matrix |
65 | 1 | pfleura2 | * op( A ) and the number of rows of the matrix op( B ). K must |
66 | 1 | pfleura2 | * be at least zero. |
67 | 1 | pfleura2 | * Unchanged on exit. |
68 | 1 | pfleura2 | * |
69 | 1 | pfleura2 | * ALPHA - DOUBLE PRECISION. |
70 | 1 | pfleura2 | * On entry, ALPHA specifies the scalar alpha. |
71 | 1 | pfleura2 | * Unchanged on exit. |
72 | 1 | pfleura2 | * |
73 | 1 | pfleura2 | * A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is |
74 | 1 | pfleura2 | * k when TRANSA = 'N' or 'n', and is m otherwise. |
75 | 1 | pfleura2 | * Before entry with TRANSA = 'N' or 'n', the leading m by k |
76 | 1 | pfleura2 | * part of the array A must contain the matrix A, otherwise |
77 | 1 | pfleura2 | * the leading k by m part of the array A must contain the |
78 | 1 | pfleura2 | * matrix A. |
79 | 1 | pfleura2 | * Unchanged on exit. |
80 | 1 | pfleura2 | * |
81 | 1 | pfleura2 | * LDA - INTEGER. |
82 | 1 | pfleura2 | * On entry, LDA specifies the first dimension of A as declared |
83 | 1 | pfleura2 | * in the calling (sub) program. When TRANSA = 'N' or 'n' then |
84 | 1 | pfleura2 | * LDA must be at least max( 1, m ), otherwise LDA must be at |
85 | 1 | pfleura2 | * least max( 1, k ). |
86 | 1 | pfleura2 | * Unchanged on exit. |
87 | 1 | pfleura2 | * |
88 | 1 | pfleura2 | * B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is |
89 | 1 | pfleura2 | * n when TRANSB = 'N' or 'n', and is k otherwise. |
90 | 1 | pfleura2 | * Before entry with TRANSB = 'N' or 'n', the leading k by n |
91 | 1 | pfleura2 | * part of the array B must contain the matrix B, otherwise |
92 | 1 | pfleura2 | * the leading n by k part of the array B must contain the |
93 | 1 | pfleura2 | * matrix B. |
94 | 1 | pfleura2 | * Unchanged on exit. |
95 | 1 | pfleura2 | * |
96 | 1 | pfleura2 | * LDB - INTEGER. |
97 | 1 | pfleura2 | * On entry, LDB specifies the first dimension of B as declared |
98 | 1 | pfleura2 | * in the calling (sub) program. When TRANSB = 'N' or 'n' then |
99 | 1 | pfleura2 | * LDB must be at least max( 1, k ), otherwise LDB must be at |
100 | 1 | pfleura2 | * least max( 1, n ). |
101 | 1 | pfleura2 | * Unchanged on exit. |
102 | 1 | pfleura2 | * |
103 | 1 | pfleura2 | * BETA - DOUBLE PRECISION. |
104 | 1 | pfleura2 | * On entry, BETA specifies the scalar beta. When BETA is |
105 | 1 | pfleura2 | * supplied as zero then C need not be set on input. |
106 | 1 | pfleura2 | * Unchanged on exit. |
107 | 1 | pfleura2 | * |
108 | 1 | pfleura2 | * C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). |
109 | 1 | pfleura2 | * Before entry, the leading m by n part of the array C must |
110 | 1 | pfleura2 | * contain the matrix C, except when beta is zero, in which |
111 | 1 | pfleura2 | * case C need not be set on entry. |
112 | 1 | pfleura2 | * On exit, the array C is overwritten by the m by n matrix |
113 | 1 | pfleura2 | * ( alpha*op( A )*op( B ) + beta*C ). |
114 | 1 | pfleura2 | * |
115 | 1 | pfleura2 | * LDC - INTEGER. |
116 | 1 | pfleura2 | * On entry, LDC specifies the first dimension of C as declared |
117 | 1 | pfleura2 | * in the calling (sub) program. LDC must be at least |
118 | 1 | pfleura2 | * max( 1, m ). |
119 | 1 | pfleura2 | * Unchanged on exit. |
120 | 1 | pfleura2 | * |
121 | 1 | pfleura2 | * |
122 | 1 | pfleura2 | * Level 3 Blas routine. |
123 | 1 | pfleura2 | * |
124 | 1 | pfleura2 | * -- Written on 8-February-1989. |
125 | 1 | pfleura2 | * Jack Dongarra, Argonne National Laboratory. |
126 | 1 | pfleura2 | * Iain Duff, AERE Harwell. |
127 | 1 | pfleura2 | * Jeremy Du Croz, Numerical Algorithms Group Ltd. |
128 | 1 | pfleura2 | * Sven Hammarling, Numerical Algorithms Group Ltd. |
129 | 1 | pfleura2 | * |
130 | 1 | pfleura2 | * |
131 | 1 | pfleura2 | * .. External Functions .. |
132 | 1 | pfleura2 | LOGICAL LSAME |
133 | 1 | pfleura2 | EXTERNAL LSAME |
134 | 1 | pfleura2 | * .. |
135 | 1 | pfleura2 | * .. External Subroutines .. |
136 | 1 | pfleura2 | EXTERNAL XERBLA |
137 | 1 | pfleura2 | * .. |
138 | 1 | pfleura2 | * .. Intrinsic Functions .. |
139 | 1 | pfleura2 | INTRINSIC MAX |
140 | 1 | pfleura2 | * .. |
141 | 1 | pfleura2 | * .. Local Scalars .. |
142 | 1 | pfleura2 | DOUBLE PRECISION TEMP |
143 | 1 | pfleura2 | INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB |
144 | 1 | pfleura2 | LOGICAL NOTA,NOTB |
145 | 1 | pfleura2 | * .. |
146 | 1 | pfleura2 | * .. Parameters .. |
147 | 1 | pfleura2 | DOUBLE PRECISION ONE,ZERO |
148 | 1 | pfleura2 | PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) |
149 | 1 | pfleura2 | * .. |
150 | 1 | pfleura2 | * |
151 | 1 | pfleura2 | * Set NOTA and NOTB as true if A and B respectively are not |
152 | 1 | pfleura2 | * transposed and set NROWA, NCOLA and NROWB as the number of rows |
153 | 1 | pfleura2 | * and columns of A and the number of rows of B respectively. |
154 | 1 | pfleura2 | * |
155 | 1 | pfleura2 | NOTA = LSAME(TRANSA,'N') |
156 | 1 | pfleura2 | NOTB = LSAME(TRANSB,'N') |
157 | 1 | pfleura2 | IF (NOTA) THEN |
158 | 1 | pfleura2 | NROWA = M |
159 | 1 | pfleura2 | NCOLA = K |
160 | 1 | pfleura2 | ELSE |
161 | 1 | pfleura2 | NROWA = K |
162 | 1 | pfleura2 | NCOLA = M |
163 | 1 | pfleura2 | END IF |
164 | 1 | pfleura2 | IF (NOTB) THEN |
165 | 1 | pfleura2 | NROWB = K |
166 | 1 | pfleura2 | ELSE |
167 | 1 | pfleura2 | NROWB = N |
168 | 1 | pfleura2 | END IF |
169 | 1 | pfleura2 | * |
170 | 1 | pfleura2 | * Test the input parameters. |
171 | 1 | pfleura2 | * |
172 | 1 | pfleura2 | INFO = 0 |
173 | 1 | pfleura2 | IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND. |
174 | 1 | pfleura2 | + (.NOT.LSAME(TRANSA,'T'))) THEN |
175 | 1 | pfleura2 | INFO = 1 |
176 | 1 | pfleura2 | ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND. |
177 | 1 | pfleura2 | + (.NOT.LSAME(TRANSB,'T'))) THEN |
178 | 1 | pfleura2 | INFO = 2 |
179 | 1 | pfleura2 | ELSE IF (M.LT.0) THEN |
180 | 1 | pfleura2 | INFO = 3 |
181 | 1 | pfleura2 | ELSE IF (N.LT.0) THEN |
182 | 1 | pfleura2 | INFO = 4 |
183 | 1 | pfleura2 | ELSE IF (K.LT.0) THEN |
184 | 1 | pfleura2 | INFO = 5 |
185 | 1 | pfleura2 | ELSE IF (LDA.LT.MAX(1,NROWA)) THEN |
186 | 1 | pfleura2 | INFO = 8 |
187 | 1 | pfleura2 | ELSE IF (LDB.LT.MAX(1,NROWB)) THEN |
188 | 1 | pfleura2 | INFO = 10 |
189 | 1 | pfleura2 | ELSE IF (LDC.LT.MAX(1,M)) THEN |
190 | 1 | pfleura2 | INFO = 13 |
191 | 1 | pfleura2 | END IF |
192 | 1 | pfleura2 | IF (INFO.NE.0) THEN |
193 | 1 | pfleura2 | CALL XERBLA('DGEMM ',INFO) |
194 | 1 | pfleura2 | RETURN |
195 | 1 | pfleura2 | END IF |
196 | 1 | pfleura2 | * |
197 | 1 | pfleura2 | * Quick return if possible. |
198 | 1 | pfleura2 | * |
199 | 1 | pfleura2 | IF ((M.EQ.0) .OR. (N.EQ.0) .OR. |
200 | 1 | pfleura2 | + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN |
201 | 1 | pfleura2 | * |
202 | 1 | pfleura2 | * And if alpha.eq.zero. |
203 | 1 | pfleura2 | * |
204 | 1 | pfleura2 | IF (ALPHA.EQ.ZERO) THEN |
205 | 1 | pfleura2 | IF (BETA.EQ.ZERO) THEN |
206 | 1 | pfleura2 | DO 20 J = 1,N |
207 | 1 | pfleura2 | DO 10 I = 1,M |
208 | 1 | pfleura2 | C(I,J) = ZERO |
209 | 1 | pfleura2 | 10 CONTINUE |
210 | 1 | pfleura2 | 20 CONTINUE |
211 | 1 | pfleura2 | ELSE |
212 | 1 | pfleura2 | DO 40 J = 1,N |
213 | 1 | pfleura2 | DO 30 I = 1,M |
214 | 1 | pfleura2 | C(I,J) = BETA*C(I,J) |
215 | 1 | pfleura2 | 30 CONTINUE |
216 | 1 | pfleura2 | 40 CONTINUE |
217 | 1 | pfleura2 | END IF |
218 | 1 | pfleura2 | RETURN |
219 | 1 | pfleura2 | END IF |
220 | 1 | pfleura2 | * |
221 | 1 | pfleura2 | * Start the operations. |
222 | 1 | pfleura2 | * |
223 | 1 | pfleura2 | IF (NOTB) THEN |
224 | 1 | pfleura2 | IF (NOTA) THEN |
225 | 1 | pfleura2 | * |
226 | 1 | pfleura2 | * Form C := alpha*A*B + beta*C. |
227 | 1 | pfleura2 | * |
228 | 1 | pfleura2 | DO 90 J = 1,N |
229 | 1 | pfleura2 | IF (BETA.EQ.ZERO) THEN |
230 | 1 | pfleura2 | DO 50 I = 1,M |
231 | 1 | pfleura2 | C(I,J) = ZERO |
232 | 1 | pfleura2 | 50 CONTINUE |
233 | 1 | pfleura2 | ELSE IF (BETA.NE.ONE) THEN |
234 | 1 | pfleura2 | DO 60 I = 1,M |
235 | 1 | pfleura2 | C(I,J) = BETA*C(I,J) |
236 | 1 | pfleura2 | 60 CONTINUE |
237 | 1 | pfleura2 | END IF |
238 | 1 | pfleura2 | DO 80 L = 1,K |
239 | 1 | pfleura2 | IF (B(L,J).NE.ZERO) THEN |
240 | 1 | pfleura2 | TEMP = ALPHA*B(L,J) |
241 | 1 | pfleura2 | DO 70 I = 1,M |
242 | 1 | pfleura2 | C(I,J) = C(I,J) + TEMP*A(I,L) |
243 | 1 | pfleura2 | 70 CONTINUE |
244 | 1 | pfleura2 | END IF |
245 | 1 | pfleura2 | 80 CONTINUE |
246 | 1 | pfleura2 | 90 CONTINUE |
247 | 1 | pfleura2 | ELSE |
248 | 1 | pfleura2 | * |
249 | 1 | pfleura2 | * Form C := alpha*A'*B + beta*C |
250 | 1 | pfleura2 | * |
251 | 1 | pfleura2 | DO 120 J = 1,N |
252 | 1 | pfleura2 | DO 110 I = 1,M |
253 | 1 | pfleura2 | TEMP = ZERO |
254 | 1 | pfleura2 | DO 100 L = 1,K |
255 | 1 | pfleura2 | TEMP = TEMP + A(L,I)*B(L,J) |
256 | 1 | pfleura2 | 100 CONTINUE |
257 | 1 | pfleura2 | IF (BETA.EQ.ZERO) THEN |
258 | 1 | pfleura2 | C(I,J) = ALPHA*TEMP |
259 | 1 | pfleura2 | ELSE |
260 | 1 | pfleura2 | C(I,J) = ALPHA*TEMP + BETA*C(I,J) |
261 | 1 | pfleura2 | END IF |
262 | 1 | pfleura2 | 110 CONTINUE |
263 | 1 | pfleura2 | 120 CONTINUE |
264 | 1 | pfleura2 | END IF |
265 | 1 | pfleura2 | ELSE |
266 | 1 | pfleura2 | IF (NOTA) THEN |
267 | 1 | pfleura2 | * |
268 | 1 | pfleura2 | * Form C := alpha*A*B' + beta*C |
269 | 1 | pfleura2 | * |
270 | 1 | pfleura2 | DO 170 J = 1,N |
271 | 1 | pfleura2 | IF (BETA.EQ.ZERO) THEN |
272 | 1 | pfleura2 | DO 130 I = 1,M |
273 | 1 | pfleura2 | C(I,J) = ZERO |
274 | 1 | pfleura2 | 130 CONTINUE |
275 | 1 | pfleura2 | ELSE IF (BETA.NE.ONE) THEN |
276 | 1 | pfleura2 | DO 140 I = 1,M |
277 | 1 | pfleura2 | C(I,J) = BETA*C(I,J) |
278 | 1 | pfleura2 | 140 CONTINUE |
279 | 1 | pfleura2 | END IF |
280 | 1 | pfleura2 | DO 160 L = 1,K |
281 | 1 | pfleura2 | IF (B(J,L).NE.ZERO) THEN |
282 | 1 | pfleura2 | TEMP = ALPHA*B(J,L) |
283 | 1 | pfleura2 | DO 150 I = 1,M |
284 | 1 | pfleura2 | C(I,J) = C(I,J) + TEMP*A(I,L) |
285 | 1 | pfleura2 | 150 CONTINUE |
286 | 1 | pfleura2 | END IF |
287 | 1 | pfleura2 | 160 CONTINUE |
288 | 1 | pfleura2 | 170 CONTINUE |
289 | 1 | pfleura2 | ELSE |
290 | 1 | pfleura2 | * |
291 | 1 | pfleura2 | * Form C := alpha*A'*B' + beta*C |
292 | 1 | pfleura2 | * |
293 | 1 | pfleura2 | DO 200 J = 1,N |
294 | 1 | pfleura2 | DO 190 I = 1,M |
295 | 1 | pfleura2 | TEMP = ZERO |
296 | 1 | pfleura2 | DO 180 L = 1,K |
297 | 1 | pfleura2 | TEMP = TEMP + A(L,I)*B(J,L) |
298 | 1 | pfleura2 | 180 CONTINUE |
299 | 1 | pfleura2 | IF (BETA.EQ.ZERO) THEN |
300 | 1 | pfleura2 | C(I,J) = ALPHA*TEMP |
301 | 1 | pfleura2 | ELSE |
302 | 1 | pfleura2 | C(I,J) = ALPHA*TEMP + BETA*C(I,J) |
303 | 1 | pfleura2 | END IF |
304 | 1 | pfleura2 | 190 CONTINUE |
305 | 1 | pfleura2 | 200 CONTINUE |
306 | 1 | pfleura2 | END IF |
307 | 1 | pfleura2 | END IF |
308 | 1 | pfleura2 | * |
309 | 1 | pfleura2 | RETURN |
310 | 1 | pfleura2 | * |
311 | 1 | pfleura2 | * End of DGEMM . |
312 | 1 | pfleura2 | * |
313 | 1 | pfleura2 | END |