root / BLAS / xGEMM / xGEMM.c @ 19
Historique | Voir | Annoter | Télécharger (12,43 ko)
1 | 1 | equemene | /*
|
---|---|---|---|
2 | 1 | equemene | Performs matrix multiply
|
3 | 1 | equemene | |
4 | 1 | equemene | Thanks for help from aurel32@debian.org
|
5 | 1 | equemene | */
|
6 | 1 | equemene | |
7 | 1 | equemene | #include <stdio.h> |
8 | 1 | equemene | #include <math.h> |
9 | 1 | equemene | #include <stdlib.h> |
10 | 1 | equemene | #include <sys/time.h> |
11 | 1 | equemene | #include <string.h> |
12 | 1 | equemene | |
13 | 1 | equemene | #ifdef CUBLAS
|
14 | 1 | equemene | #include <cublas.h> |
15 | 1 | equemene | #define CUBLAS_WRAPPER_ERROR_NOERR 0 |
16 | 1 | equemene | #define CUBLAS_WRAPPER_ERROR_ALLOC 1 |
17 | 1 | equemene | #define CUBLAS_WRAPPER_ERROR_SET 2 |
18 | 1 | equemene | #define CUBLAS_WRAPPER_ERROR_GET 3 |
19 | 1 | equemene | #define CUBLAS_WRAPPER_ERROR_STUB 4 |
20 | 1 | equemene | #elif THUNKING
|
21 | 1 | equemene | #include <cublas.h> |
22 | 7 | equemene | #include "fortran_common.h" |
23 | 7 | equemene | #include "fortran_thunking.h" |
24 | 1 | equemene | #elif FBLAS
|
25 | 1 | equemene | #include <cblas_f77.h> |
26 | 1 | equemene | #elif GSL
|
27 | 1 | equemene | #include <gsl_cblas.h> |
28 | 1 | equemene | #elif ACML
|
29 | 1 | equemene | #include <acml.h> |
30 | 1 | equemene | #include <acml_mv.h> |
31 | 1 | equemene | #else
|
32 | 1 | equemene | #include <cblas.h> |
33 | 1 | equemene | #endif
|
34 | 1 | equemene | |
35 | 1 | equemene | #ifdef DOUBLE
|
36 | 1 | equemene | #define LENGTH double |
37 | 1 | equemene | #else
|
38 | 1 | equemene | #define LENGTH float |
39 | 1 | equemene | #endif
|
40 | 1 | equemene | |
41 | 7 | equemene | #ifdef FBLAS
|
42 | 1 | equemene | |
43 | 1 | equemene | #ifdef DOUBLE
|
44 | 1 | equemene | |
45 | 1 | equemene | void dgemm_(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, |
46 | 1 | equemene | const double *, FINT, const double *, double *, FINT); |
47 | 1 | equemene | |
48 | 1 | equemene | #else
|
49 | 1 | equemene | |
50 | 1 | equemene | void sgemm_(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, |
51 | 1 | equemene | const float *, FINT, const float *, float *, FINT); |
52 | 1 | equemene | |
53 | 1 | equemene | #endif
|
54 | 1 | equemene | #endif
|
55 | 1 | equemene | |
56 | 1 | equemene | /* Matrix with only defined triangular terms */
|
57 | 1 | equemene | /* Even if there are 0 in matrix, must be defined at all ! */
|
58 | 1 | equemene | |
59 | 1 | equemene | /* Get from fortran.c */
|
60 | 1 | equemene | |
61 | 1 | equemene | #ifdef CUBLAS
|
62 | 1 | equemene | static char *errMsg[5] = |
63 | 1 | equemene | { |
64 | 1 | equemene | "no error",
|
65 | 1 | equemene | "allocation error",
|
66 | 1 | equemene | "setVector/setMatrix error",
|
67 | 1 | equemene | "getVector/getMatrix error",
|
68 | 1 | equemene | "not implemented"
|
69 | 1 | equemene | }; |
70 | 1 | equemene | |
71 | 1 | equemene | static void wrapperError (const char *funcName, int error) |
72 | 1 | equemene | { |
73 | 1 | equemene | printf ("cublas%s wrapper: %s\n", funcName, errMsg[error]);
|
74 | 1 | equemene | fflush (stdout); |
75 | 1 | equemene | } |
76 | 1 | equemene | #endif
|
77 | 1 | equemene | |
78 | 1 | equemene | int printVector(const int dimVector,const LENGTH *dataVector, |
79 | 1 | equemene | char *nameVector,char *mesgVector) |
80 | 1 | equemene | { |
81 | 1 | equemene | #ifndef QUIET
|
82 | 1 | equemene | |
83 | 1 | equemene | int i;
|
84 | 1 | equemene | printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
85 | 1 | equemene | for (i=0;i<dimVector;i++) |
86 | 1 | equemene | { |
87 | 1 | equemene | printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
|
88 | 1 | equemene | } |
89 | 1 | equemene | #endif
|
90 | 1 | equemene | |
91 | 1 | equemene | return 0; |
92 | 1 | equemene | } |
93 | 1 | equemene | |
94 | 1 | equemene | int printResults(const int dimVector,const LENGTH *dataVector, |
95 | 1 | equemene | char *nameVector,char *mesgVector) |
96 | 1 | equemene | { |
97 | 1 | equemene | #ifdef RESULTS
|
98 | 1 | equemene | int i;
|
99 | 1 | equemene | |
100 | 1 | equemene | printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
101 | 1 | equemene | for (i=0;i<dimVector;i++) |
102 | 1 | equemene | { |
103 | 1 | equemene | printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
|
104 | 1 | equemene | } |
105 | 1 | equemene | #endif
|
106 | 1 | equemene | return 0; |
107 | 1 | equemene | } |
108 | 1 | equemene | |
109 | 1 | equemene | #ifdef CUBLAS
|
110 | 1 | equemene | int printVectorGPU(const int dimVector,const LENGTH *dataVector, |
111 | 1 | equemene | char *nameVector,char *mesgVector) |
112 | 1 | equemene | { |
113 | 1 | equemene | #ifndef QUIET
|
114 | 1 | equemene | int i;
|
115 | 1 | equemene | cublasStatus stat; |
116 | 1 | equemene | LENGTH *P=0;
|
117 | 1 | equemene | int incx=1; |
118 | 1 | equemene | |
119 | 1 | equemene | P=malloc(dimVector*sizeof(LENGTH));
|
120 | 1 | equemene | |
121 | 1 | equemene | stat=cublasGetVector(dimVector,sizeof(P[0]),dataVector,incx,P,incx); |
122 | 1 | equemene | |
123 | 1 | equemene | if (stat != CUBLAS_STATUS_SUCCESS) {
|
124 | 1 | equemene | wrapperError ("ToGet", CUBLAS_WRAPPER_ERROR_GET);
|
125 | 1 | equemene | } |
126 | 1 | equemene | |
127 | 1 | equemene | printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
128 | 1 | equemene | for (i=0;i<dimVector;i++) |
129 | 1 | equemene | { |
130 | 1 | equemene | printf("%s[%i]=%2.10e\n",nameVector,i,P[i]);
|
131 | 1 | equemene | } |
132 | 1 | equemene | |
133 | 1 | equemene | free(P); |
134 | 1 | equemene | #endif
|
135 | 1 | equemene | |
136 | 1 | equemene | return 0; |
137 | 1 | equemene | } |
138 | 1 | equemene | #endif
|
139 | 1 | equemene | |
140 | 1 | equemene | int bench(int dim,int RUNS) |
141 | 1 | equemene | { |
142 | 1 | equemene | /*
|
143 | 1 | equemene | int dim=1000;
|
144 | 1 | equemene | int RUNS=100;
|
145 | 1 | equemene | int incx=1;
|
146 | 1 | equemene | */
|
147 | 1 | equemene | #ifdef PRINT
|
148 | 1 | equemene | LENGTH factor=1.;
|
149 | 1 | equemene | #endif
|
150 | 1 | equemene | |
151 | 1 | equemene | LENGTH alpha=1.,beta=0.; |
152 | 1 | equemene | LENGTH *A,*B,*C,*D; |
153 | 1 | equemene | |
154 | 1 | equemene | /* checkBefore checkAfter checks */
|
155 | 1 | equemene | LENGTH *checksA,*checksB; |
156 | 1 | equemene | |
157 | 1 | equemene | int i=0, j=0; |
158 | 1 | equemene | |
159 | 1 | equemene | double duration;
|
160 | 1 | equemene | |
161 | 1 | equemene | struct timeval tv1,tv2;
|
162 | 1 | equemene | struct timezone tz;
|
163 | 1 | equemene | |
164 | 1 | equemene | /* Create 4 Matrix of dimension dim by dim */
|
165 | 1 | equemene | |
166 | 1 | equemene | A=malloc(dim*dim*sizeof(LENGTH));
|
167 | 1 | equemene | B=malloc(dim*dim*sizeof(LENGTH));
|
168 | 1 | equemene | C=malloc(dim*dim*sizeof(LENGTH));
|
169 | 1 | equemene | D=malloc(dim*dim*sizeof(LENGTH));
|
170 | 1 | equemene | |
171 | 1 | equemene | /* Create 2 vectors for checker Before and After */
|
172 | 1 | equemene | |
173 | 1 | equemene | checksA=malloc(RUNS*sizeof(LENGTH));
|
174 | 1 | equemene | checksB=malloc(RUNS*sizeof(LENGTH));
|
175 | 1 | equemene | |
176 | 1 | equemene | /* Initialize elements with random numbers */
|
177 | 1 | equemene | /* Initialize the seed for rand() */
|
178 | 1 | equemene | /* srand(time()); */
|
179 | 1 | equemene | |
180 | 1 | equemene | for (i=0; i<dim; i++) { |
181 | 1 | equemene | for (j=0; j<dim; j++) { |
182 | 1 | equemene | A[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.)
|
183 | 1 | equemene | *(LENGTH)(i+1.)/(LENGTH)(j+1.); |
184 | 1 | equemene | B[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.)
|
185 | 1 | equemene | *(LENGTH)(i+1.)/(LENGTH)(j+1.); |
186 | 1 | equemene | C[i*dim+j]=0.;
|
187 | 1 | equemene | D[i*dim+j]=0.;
|
188 | 1 | equemene | } |
189 | 1 | equemene | } |
190 | 1 | equemene | /*
|
191 | 1 | equemene | A[0]=1;
|
192 | 1 | equemene | A[1]=2;
|
193 | 1 | equemene | A[2]=3;
|
194 | 1 | equemene | A[3]=4;
|
195 | 1 | equemene | |
196 | 1 | equemene | B[0]=5;
|
197 | 1 | equemene | B[1]=6;
|
198 | 1 | equemene | B[2]=7;
|
199 | 1 | equemene | B[3]=8;
|
200 | 1 | equemene | */
|
201 | 1 | equemene | |
202 | 1 | equemene | /* Print the matrix */
|
203 | 1 | equemene | |
204 | 1 | equemene | #ifdef QUIET
|
205 | 1 | equemene | #else
|
206 | 1 | equemene | for (i=0; i<dim; i++) { |
207 | 1 | equemene | for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]); |
208 | 1 | equemene | putchar('\n');
|
209 | 1 | equemene | } |
210 | 1 | equemene | putchar('\n');
|
211 | 1 | equemene | for (i=0; i<dim; i++) { |
212 | 1 | equemene | for (j=0; j<dim; j++) printf("B[%i,%i]=%1.5f ", i,j,B[i*dim+j]); |
213 | 1 | equemene | putchar('\n');
|
214 | 1 | equemene | } |
215 | 1 | equemene | putchar('\n');
|
216 | 1 | equemene | #endif
|
217 | 1 | equemene | |
218 | 1 | equemene | /* Get first timer before launching */
|
219 | 1 | equemene | gettimeofday(&tv1, &tz); |
220 | 1 | equemene | |
221 | 1 | equemene | /* Compute with CuBLAS library */
|
222 | 1 | equemene | #ifdef CUBLAS
|
223 | 1 | equemene | LENGTH *devPtrA=0, *devPtrB=0, *devPtrC=0, *devPtrD=0; |
224 | 1 | equemene | cublasStatus stat1, stat2, stat3, stat4; |
225 | 1 | equemene | struct timeval tv3,tv4;
|
226 | 1 | equemene | |
227 | 1 | equemene | /* Order is Row */
|
228 | 1 | equemene | /* Have to swap uplo and trans */
|
229 | 1 | equemene | char transa='N',transb='T'; |
230 | 1 | equemene | |
231 | 1 | equemene | printf("Using CuBLAS: %i iterations for %ix%i matrix\n",
|
232 | 1 | equemene | RUNS,dim,dim); |
233 | 1 | equemene | |
234 | 1 | equemene | stat1=cublasAlloc(dim*dim,sizeof(devPtrA[0]),(void**)&devPtrA); |
235 | 1 | equemene | stat2=cublasAlloc(dim*dim,sizeof(devPtrB[0]),(void**)&devPtrB); |
236 | 1 | equemene | stat3=cublasAlloc(dim*dim,sizeof(devPtrC[0]),(void**)&devPtrC); |
237 | 1 | equemene | stat4=cublasAlloc(dim*dim,sizeof(devPtrD[0]),(void**)&devPtrD); |
238 | 1 | equemene | |
239 | 1 | equemene | if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
|
240 | 1 | equemene | (stat2 != CUBLAS_STATUS_SUCCESS) || |
241 | 1 | equemene | (stat3 != CUBLAS_STATUS_SUCCESS) || |
242 | 1 | equemene | (stat4 != CUBLAS_STATUS_SUCCESS) ) { |
243 | 5 | equemene | wrapperError ("xGEMM", CUBLAS_WRAPPER_ERROR_ALLOC);
|
244 | 1 | equemene | cublasFree (devPtrA); |
245 | 1 | equemene | cublasFree (devPtrB); |
246 | 1 | equemene | cublasFree (devPtrC); |
247 | 1 | equemene | cublasFree (devPtrD); |
248 | 1 | equemene | return 1; |
249 | 1 | equemene | } |
250 | 1 | equemene | |
251 | 1 | equemene | stat1=cublasSetMatrix(dim,dim,sizeof(A[0]),A,dim,devPtrA,dim); |
252 | 1 | equemene | stat2=cublasSetMatrix(dim,dim,sizeof(B[0]),B,dim,devPtrB,dim); |
253 | 1 | equemene | stat3=cublasSetMatrix(dim,dim,sizeof(C[0]),C,dim,devPtrC,dim); |
254 | 1 | equemene | stat4=cublasSetMatrix(dim,dim,sizeof(D[0]),D,dim,devPtrD,dim); |
255 | 1 | equemene | |
256 | 1 | equemene | if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
|
257 | 1 | equemene | (stat2 != CUBLAS_STATUS_SUCCESS) || |
258 | 1 | equemene | (stat3 != CUBLAS_STATUS_SUCCESS) || |
259 | 1 | equemene | (stat4 != CUBLAS_STATUS_SUCCESS) ) { |
260 | 5 | equemene | wrapperError ("xGEMM", CUBLAS_WRAPPER_ERROR_SET);
|
261 | 1 | equemene | cublasFree (devPtrA); |
262 | 1 | equemene | cublasFree (devPtrB); |
263 | 1 | equemene | cublasFree (devPtrC); |
264 | 1 | equemene | cublasFree (devPtrD); |
265 | 1 | equemene | return 1; |
266 | 1 | equemene | } |
267 | 1 | equemene | |
268 | 1 | equemene | /* Get third timer after memory operation */
|
269 | 1 | equemene | gettimeofday(&tv3, &tz); |
270 | 1 | equemene | |
271 | 1 | equemene | #ifdef DOUBLE
|
272 | 1 | equemene | |
273 | 1 | equemene | for (i=0;i<RUNS;i++) |
274 | 1 | equemene | { |
275 | 1 | equemene | cublasDgemm(transa,transa,dim,dim,dim,alpha,devPtrB,dim, |
276 | 1 | equemene | devPtrA,dim,beta,devPtrC,dim); |
277 | 1 | equemene | cublasDgemm(transb,transb,dim,dim,dim,alpha,devPtrA,dim, |
278 | 1 | equemene | devPtrB,dim,beta,devPtrD,dim); |
279 | 1 | equemene | } |
280 | 1 | equemene | |
281 | 1 | equemene | #else
|
282 | 1 | equemene | |
283 | 1 | equemene | for (i=0;i<RUNS;i++) |
284 | 1 | equemene | { |
285 | 1 | equemene | cublasSgemm(transa,transa,dim,dim,dim,alpha,devPtrB,dim, |
286 | 1 | equemene | devPtrA,dim,beta,devPtrC,dim); |
287 | 1 | equemene | cublasSgemm(transb,transb,dim,dim,dim,alpha,devPtrA,dim, |
288 | 1 | equemene | devPtrB,dim,beta,devPtrD,dim); |
289 | 1 | equemene | } |
290 | 1 | equemene | |
291 | 1 | equemene | #endif
|
292 | 1 | equemene | |
293 | 1 | equemene | stat3=cublasGetMatrix(dim,dim,sizeof(C[0]),devPtrC,dim,C,dim); |
294 | 1 | equemene | stat4=cublasGetMatrix(dim,dim,sizeof(D[0]),devPtrD,dim,D,dim); |
295 | 1 | equemene | |
296 | 1 | equemene | cublasFree (devPtrA); |
297 | 1 | equemene | cublasFree (devPtrB); |
298 | 1 | equemene | cublasFree (devPtrC); |
299 | 1 | equemene | cublasFree (devPtrD); |
300 | 1 | equemene | |
301 | 1 | equemene | if ((stat1 != CUBLAS_STATUS_SUCCESS) ) {
|
302 | 5 | equemene | wrapperError ("xGEMM", CUBLAS_WRAPPER_ERROR_GET);
|
303 | 1 | equemene | } |
304 | 1 | equemene | |
305 | 1 | equemene | /* Get fourth timer after memory free */
|
306 | 1 | equemene | gettimeofday(&tv4, &tz); |
307 | 1 | equemene | |
308 | 1 | equemene | #elif THUNKING
|
309 | 1 | equemene | |
310 | 1 | equemene | /* Order is Row : Have to swap uplo='U' and trans='N' */
|
311 | 1 | equemene | char transa='N',transb='T'; |
312 | 1 | equemene | printf("Using CuBLAS/Thunking: %i iterations for %ix%i matrix\n",
|
313 | 1 | equemene | RUNS,dim,dim); |
314 | 1 | equemene | |
315 | 1 | equemene | #ifdef DOUBLE
|
316 | 1 | equemene | |
317 | 1 | equemene | for (i=0;i<RUNS;i++) |
318 | 1 | equemene | { |
319 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
320 | 1 | equemene | CUBLAS_DGEMM(&transa,&transa, |
321 | 7 | equemene | &dim,&dim,&dim,&alpha,B,&dim,A,&dim,&beta,C,&dim); |
322 | 1 | equemene | CUBLAS_DGEMM(&transb,&transb, |
323 | 7 | equemene | &dim,&dim,&dim,&alpha,A,&dim,B,&dim,&beta,D,&dim); |
324 | 1 | equemene | } |
325 | 1 | equemene | |
326 | 1 | equemene | #else
|
327 | 1 | equemene | |
328 | 1 | equemene | for (i=0;i<RUNS;i++) |
329 | 1 | equemene | { |
330 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
331 | 1 | equemene | CUBLAS_SGEMM(&transa,&transa, |
332 | 7 | equemene | &dim,&dim,&dim,&alpha,B,&dim,A,&dim,&beta,C,&dim); |
333 | 1 | equemene | CUBLAS_SGEMM(&transb,&transb, |
334 | 7 | equemene | &dim,&dim,&dim,&alpha,A,&dim,B,&dim,&beta,D,&dim); |
335 | 1 | equemene | } |
336 | 1 | equemene | |
337 | 1 | equemene | #endif
|
338 | 1 | equemene | |
339 | 1 | equemene | #elif FBLAS
|
340 | 1 | equemene | |
341 | 1 | equemene | /* Order is Row : Have to swap uplo='U' and trans='N' */
|
342 | 1 | equemene | char transa='N',transb='T'; |
343 | 1 | equemene | |
344 | 1 | equemene | printf("Using FBLAS: %i iterations for %ix%i matrix\n",
|
345 | 1 | equemene | RUNS,dim,dim); |
346 | 1 | equemene | |
347 | 1 | equemene | #ifdef DOUBLE
|
348 | 1 | equemene | |
349 | 1 | equemene | for (i=0;i<RUNS;i++) |
350 | 1 | equemene | { |
351 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
352 | 1 | equemene | dgemm_(&transa,&transa,&dim,&dim,&dim,&alpha,B,&dim,A,&dim,&beta,C,&dim); |
353 | 1 | equemene | dgemm_(&transb,&transb,&dim,&dim,&dim,&alpha,A,&dim,B,&dim,&beta,D,&dim); |
354 | 1 | equemene | } |
355 | 1 | equemene | |
356 | 1 | equemene | #else
|
357 | 1 | equemene | |
358 | 1 | equemene | for (i=0;i<RUNS;i++) |
359 | 1 | equemene | { |
360 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
361 | 1 | equemene | sgemm_(&transa,&transa,&dim,&dim,&dim,&alpha,B,&dim,A,&dim,&beta,C,&dim); |
362 | 1 | equemene | sgemm_(&transb,&transb,&dim,&dim,&dim,&alpha,A,&dim,B,&dim,&beta,D,&dim); |
363 | 1 | equemene | } |
364 | 1 | equemene | |
365 | 1 | equemene | #endif
|
366 | 1 | equemene | |
367 | 1 | equemene | #elif ACML
|
368 | 1 | equemene | |
369 | 1 | equemene | /* Order is Row : Have to swap uplo='U' and trans='N' */
|
370 | 1 | equemene | char transa='N',transb='T'; |
371 | 1 | equemene | |
372 | 1 | equemene | printf("Using ACML: %i iterations for %ix%i matrix\n",
|
373 | 1 | equemene | RUNS,dim,dim); |
374 | 1 | equemene | |
375 | 1 | equemene | #ifdef DOUBLE
|
376 | 1 | equemene | |
377 | 1 | equemene | for (i=0;i<RUNS;i++) |
378 | 1 | equemene | { |
379 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
380 | 1 | equemene | dgemm(transa,transa,dim,dim,dim,alpha,B,dim,A,dim,beta,C,dim); |
381 | 1 | equemene | dgemm(transb,transb,dim,dim,dim,alpha,A,dim,B,dim,beta,D,dim); |
382 | 1 | equemene | } |
383 | 1 | equemene | |
384 | 1 | equemene | #else
|
385 | 1 | equemene | |
386 | 1 | equemene | for (i=0;i<RUNS;i++) |
387 | 1 | equemene | { |
388 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
389 | 1 | equemene | sgemm(transa,transa,dim,dim,dim,alpha,B,dim,A,dim,beta,C,dim); |
390 | 1 | equemene | sgemm(transb,transb,dim,dim,dim,alpha,A,dim,B,dim,beta,D,dim); |
391 | 1 | equemene | } |
392 | 1 | equemene | |
393 | 1 | equemene | #endif
|
394 | 1 | equemene | |
395 | 1 | equemene | #elif GSL
|
396 | 1 | equemene | |
397 | 1 | equemene | printf("Using GSL: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
|
398 | 1 | equemene | |
399 | 1 | equemene | /*
|
400 | 1 | equemene | RowMajor : Matrix is read row by row
|
401 | 1 | equemene | Upper : the no null elements are on top
|
402 | 1 | equemene | NoTrans : no transposition before estimation
|
403 | 1 | equemene | NonUnit : Matrix is not unit
|
404 | 1 | equemene | */
|
405 | 1 | equemene | |
406 | 1 | equemene | #ifdef DOUBLE
|
407 | 1 | equemene | |
408 | 1 | equemene | for (i=0;i<RUNS;i++) |
409 | 1 | equemene | { |
410 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
411 | 1 | equemene | cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans, |
412 | 1 | equemene | dim,dim,dim,alpha,A,dim,B,dim,beta,C,dim); |
413 | 1 | equemene | cblas_dgemm(CblasRowMajor,CblasTrans,CblasTrans, |
414 | 1 | equemene | dim,dim,dim,alpha,B,dim,A,dim,beta,D,dim); |
415 | 1 | equemene | } |
416 | 1 | equemene | |
417 | 1 | equemene | #else
|
418 | 1 | equemene | |
419 | 1 | equemene | for (i=0;i<RUNS;i++) |
420 | 1 | equemene | { |
421 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
422 | 1 | equemene | cblas_sgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans, |
423 | 1 | equemene | dim,dim,dim,alpha,A,dim,B,dim,beta,C,dim); |
424 | 1 | equemene | cblas_sgemm(CblasRowMajor,CblasTrans,CblasTrans, |
425 | 1 | equemene | dim,dim,dim,alpha,B,dim,A,dim,beta,D,dim); |
426 | 1 | equemene | } |
427 | 1 | equemene | |
428 | 1 | equemene | #endif
|
429 | 1 | equemene | |
430 | 1 | equemene | #else
|
431 | 1 | equemene | |
432 | 1 | equemene | printf("Using CBLAS: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
|
433 | 1 | equemene | |
434 | 1 | equemene | /*
|
435 | 1 | equemene | RowMajor : Matrix is read row bu row
|
436 | 1 | equemene | Upper : the no null elements are on top
|
437 | 1 | equemene | NoTrans : no transposition before estimation
|
438 | 1 | equemene | NonUnit : Matrix is not unit
|
439 | 1 | equemene | */
|
440 | 1 | equemene | |
441 | 1 | equemene | #ifdef DOUBLE
|
442 | 1 | equemene | |
443 | 1 | equemene | for (i=0;i<RUNS;i++) |
444 | 1 | equemene | { |
445 | 1 | equemene | cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans, |
446 | 1 | equemene | dim,dim,dim,alpha,A,dim,B,dim,beta,C,dim); |
447 | 1 | equemene | cblas_dgemm(CblasRowMajor,CblasTrans,CblasTrans, |
448 | 1 | equemene | dim,dim,dim,alpha,B,dim,A,dim,beta,D,dim); |
449 | 1 | equemene | } |
450 | 1 | equemene | |
451 | 1 | equemene | #else
|
452 | 1 | equemene | |
453 | 1 | equemene | for (i=0;i<RUNS;i++) |
454 | 1 | equemene | { |
455 | 1 | equemene | cblas_sgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans, |
456 | 1 | equemene | dim,dim,dim,alpha,A,dim,B,dim,beta,C,dim); |
457 | 1 | equemene | cblas_sgemm(CblasRowMajor,CblasTrans,CblasTrans, |
458 | 1 | equemene | dim,dim,dim,alpha,B,dim,A,dim,beta,D,dim); |
459 | 1 | equemene | } |
460 | 1 | equemene | |
461 | 1 | equemene | #endif
|
462 | 1 | equemene | |
463 | 1 | equemene | #endif
|
464 | 1 | equemene | |
465 | 1 | equemene | /* Get second timer after launching */
|
466 | 1 | equemene | gettimeofday(&tv2, &tz); |
467 | 1 | equemene | |
468 | 1 | equemene | /* Store the checker of errors */
|
469 | 1 | equemene | checksA[0]=0.; |
470 | 1 | equemene | |
471 | 1 | equemene | for (i=0; i<dim; i++) { |
472 | 1 | equemene | for (j=0; j<dim; j++) { |
473 | 1 | equemene | checksA[0]=checksA[0]+fabs(D[i*dim+j]-C[j*dim+i]); |
474 | 1 | equemene | } |
475 | 1 | equemene | } |
476 | 1 | equemene | |
477 | 1 | equemene | /* Print the matrix */
|
478 | 1 | equemene | |
479 | 1 | equemene | #ifdef QUIET
|
480 | 1 | equemene | #else
|
481 | 1 | equemene | for (i=0; i<dim; i++) { |
482 | 1 | equemene | for (j=0; j<dim; j++) printf("C[%i,%i]=%1.5f ", i,j,C[i*dim+j]); |
483 | 1 | equemene | putchar('\n');
|
484 | 1 | equemene | } |
485 | 1 | equemene | putchar('\n');
|
486 | 1 | equemene | for (i=0; i<dim; i++) { |
487 | 1 | equemene | for (j=0; j<dim; j++) printf("D[%i,%i]=%1.5f ", i,j,D[i*dim+j]); |
488 | 1 | equemene | putchar('\n');
|
489 | 1 | equemene | } |
490 | 1 | equemene | putchar('\n');
|
491 | 1 | equemene | #endif
|
492 | 1 | equemene | |
493 | 1 | equemene | /* Free 1 Matrix and 2 Vectors of dimension dim */
|
494 | 1 | equemene | |
495 | 1 | equemene | free(A); |
496 | 1 | equemene | free(B); |
497 | 1 | equemene | free(C); |
498 | 1 | equemene | free(D); |
499 | 1 | equemene | |
500 | 1 | equemene | putchar('\n');
|
501 | 1 | equemene | |
502 | 1 | equemene | #ifdef CUBLAS
|
503 | 1 | equemene | double memoryIn,memoryOut;
|
504 | 1 | equemene | |
505 | 1 | equemene | memoryIn=(double)((tv3.tv_sec-tv1.tv_sec) * 1000000L + \ |
506 | 1 | equemene | (tv3.tv_usec-tv1.tv_usec))/1000000.; |
507 | 1 | equemene | |
508 | 1 | equemene | memoryOut=(double)((tv2.tv_sec-tv4.tv_sec) * 1000000L + \ |
509 | 1 | equemene | (tv2.tv_usec-tv4.tv_usec))/1000000.; |
510 | 1 | equemene | |
511 | 1 | equemene | duration=(double)((tv4.tv_sec-tv3.tv_sec) * 1000000L + \ |
512 | 1 | equemene | (tv4.tv_usec-tv3.tv_usec))/1000000./RUNS; |
513 | 1 | equemene | |
514 | 1 | equemene | printf("Duration of memory allocation : %2.10f s\n",memoryIn);
|
515 | 1 | equemene | printf("Duration of memory free : %2.10f s\n",memoryOut);
|
516 | 1 | equemene | #else
|
517 | 1 | equemene | duration=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L + \ |
518 | 1 | equemene | (tv2.tv_usec-tv1.tv_usec))/1000000./RUNS; |
519 | 1 | equemene | |
520 | 1 | equemene | #endif
|
521 | 1 | equemene | |
522 | 1 | equemene | printf("Duration of each cycle : %2.10f s\n",duration);
|
523 | 1 | equemene | |
524 | 1 | equemene | printf("Number of GFlops : %2.3f \n",
|
525 | 1 | equemene | dim*dim*2.*(2.*dim-1)/duration/1000000000.); |
526 | 1 | equemene | |
527 | 1 | equemene | printf("Error %1.10f\n",checksA[0]); |
528 | 1 | equemene | printResults(RUNS,checksA,"C","Errors cumulated"); |
529 | 1 | equemene | |
530 | 1 | equemene | putchar('\n');
|
531 | 1 | equemene | |
532 | 1 | equemene | /* Free 2 vectors for checker Before and After */
|
533 | 1 | equemene | |
534 | 1 | equemene | free(checksA); |
535 | 1 | equemene | free(checksB); |
536 | 1 | equemene | |
537 | 1 | equemene | return 0; |
538 | 1 | equemene | } |
539 | 1 | equemene | |
540 | 1 | equemene | int main(int argc,char **argv) |
541 | 1 | equemene | { |
542 | 1 | equemene | if ((argc==1)|| |
543 | 1 | equemene | (strcmp(argv[1],"-h")==0)|| |
544 | 1 | equemene | (strcmp(argv[1],"--help")==0)) |
545 | 1 | equemene | { |
546 | 1 | equemene | printf("\nPerforms a bench using BLAS library implementation:\n\n"
|
547 | 1 | equemene | "\t#1 Size of square matrices \n"
|
548 | 1 | equemene | "\t#2 Number of iterations\n\n");
|
549 | 1 | equemene | } |
550 | 1 | equemene | else if ((atoi(argv[1])>=2)&& |
551 | 1 | equemene | (atoi(argv[2])>=1)) |
552 | 1 | equemene | { |
553 | 1 | equemene | bench(atoi(argv[1]),atoi(argv[2])); |
554 | 1 | equemene | } |
555 | 1 | equemene | |
556 | 1 | equemene | return 0; |
557 | 1 | equemene | } |