root / BLAS / xGEMM / xGEMM.c @ 6
Historique | Voir | Annoter | Télécharger (12,4 ko)
1 | 1 | equemene | /*
|
---|---|---|---|
2 | 1 | equemene | Performs matrix multiply
|
3 | 1 | equemene | |
4 | 1 | equemene | Thanks for help from aurel32@debian.org
|
5 | 1 | equemene | */
|
6 | 1 | equemene | |
7 | 1 | equemene | #include <stdio.h> |
8 | 1 | equemene | #include <math.h> |
9 | 1 | equemene | #include <stdlib.h> |
10 | 1 | equemene | #include <sys/time.h> |
11 | 1 | equemene | #include <string.h> |
12 | 1 | equemene | |
13 | 1 | equemene | #ifdef CUBLAS
|
14 | 1 | equemene | #include <cublas.h> |
15 | 1 | equemene | #define CUBLAS_WRAPPER_ERROR_NOERR 0 |
16 | 1 | equemene | #define CUBLAS_WRAPPER_ERROR_ALLOC 1 |
17 | 1 | equemene | #define CUBLAS_WRAPPER_ERROR_SET 2 |
18 | 1 | equemene | #define CUBLAS_WRAPPER_ERROR_GET 3 |
19 | 1 | equemene | #define CUBLAS_WRAPPER_ERROR_STUB 4 |
20 | 1 | equemene | #elif THUNKING
|
21 | 1 | equemene | #include <cublas.h> |
22 | 1 | equemene | #elif FBLAS
|
23 | 1 | equemene | #include <cblas_f77.h> |
24 | 1 | equemene | #elif GSL
|
25 | 1 | equemene | #include <gsl_cblas.h> |
26 | 1 | equemene | #elif ACML
|
27 | 1 | equemene | #include <acml.h> |
28 | 1 | equemene | #include <acml_mv.h> |
29 | 1 | equemene | #else
|
30 | 1 | equemene | #include <cblas.h> |
31 | 1 | equemene | #endif
|
32 | 1 | equemene | |
33 | 1 | equemene | #ifdef DOUBLE
|
34 | 1 | equemene | #define LENGTH double |
35 | 1 | equemene | #else
|
36 | 1 | equemene | #define LENGTH float |
37 | 1 | equemene | #endif
|
38 | 1 | equemene | |
39 | 1 | equemene | #ifdef THUNKING
|
40 | 1 | equemene | #include "fortran_thunking.h" |
41 | 1 | equemene | |
42 | 1 | equemene | #elif FBLAS
|
43 | 1 | equemene | |
44 | 1 | equemene | #ifdef DOUBLE
|
45 | 1 | equemene | |
46 | 1 | equemene | void dgemm_(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, |
47 | 1 | equemene | const double *, FINT, const double *, double *, FINT); |
48 | 1 | equemene | |
49 | 1 | equemene | #else
|
50 | 1 | equemene | |
51 | 1 | equemene | void sgemm_(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, |
52 | 1 | equemene | const float *, FINT, const float *, float *, FINT); |
53 | 1 | equemene | |
54 | 1 | equemene | #endif
|
55 | 1 | equemene | |
56 | 1 | equemene | #endif
|
57 | 1 | equemene | |
58 | 1 | equemene | /* Matrix with only defined triangular terms */
|
59 | 1 | equemene | /* Even if there are 0 in matrix, must be defined at all ! */
|
60 | 1 | equemene | |
61 | 1 | equemene | /* Get from fortran.c */
|
62 | 1 | equemene | |
63 | 1 | equemene | #ifdef CUBLAS
|
64 | 1 | equemene | static char *errMsg[5] = |
65 | 1 | equemene | { |
66 | 1 | equemene | "no error",
|
67 | 1 | equemene | "allocation error",
|
68 | 1 | equemene | "setVector/setMatrix error",
|
69 | 1 | equemene | "getVector/getMatrix error",
|
70 | 1 | equemene | "not implemented"
|
71 | 1 | equemene | }; |
72 | 1 | equemene | |
73 | 1 | equemene | static void wrapperError (const char *funcName, int error) |
74 | 1 | equemene | { |
75 | 1 | equemene | printf ("cublas%s wrapper: %s\n", funcName, errMsg[error]);
|
76 | 1 | equemene | fflush (stdout); |
77 | 1 | equemene | } |
78 | 1 | equemene | #endif
|
79 | 1 | equemene | |
80 | 1 | equemene | int printVector(const int dimVector,const LENGTH *dataVector, |
81 | 1 | equemene | char *nameVector,char *mesgVector) |
82 | 1 | equemene | { |
83 | 1 | equemene | #ifndef QUIET
|
84 | 1 | equemene | |
85 | 1 | equemene | int i;
|
86 | 1 | equemene | printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
87 | 1 | equemene | for (i=0;i<dimVector;i++) |
88 | 1 | equemene | { |
89 | 1 | equemene | printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
|
90 | 1 | equemene | } |
91 | 1 | equemene | #endif
|
92 | 1 | equemene | |
93 | 1 | equemene | return 0; |
94 | 1 | equemene | } |
95 | 1 | equemene | |
96 | 1 | equemene | int printResults(const int dimVector,const LENGTH *dataVector, |
97 | 1 | equemene | char *nameVector,char *mesgVector) |
98 | 1 | equemene | { |
99 | 1 | equemene | #ifdef RESULTS
|
100 | 1 | equemene | int i;
|
101 | 1 | equemene | |
102 | 1 | equemene | printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
103 | 1 | equemene | for (i=0;i<dimVector;i++) |
104 | 1 | equemene | { |
105 | 1 | equemene | printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
|
106 | 1 | equemene | } |
107 | 1 | equemene | #endif
|
108 | 1 | equemene | return 0; |
109 | 1 | equemene | } |
110 | 1 | equemene | |
111 | 1 | equemene | #ifdef CUBLAS
|
112 | 1 | equemene | int printVectorGPU(const int dimVector,const LENGTH *dataVector, |
113 | 1 | equemene | char *nameVector,char *mesgVector) |
114 | 1 | equemene | { |
115 | 1 | equemene | #ifndef QUIET
|
116 | 1 | equemene | int i;
|
117 | 1 | equemene | cublasStatus stat; |
118 | 1 | equemene | LENGTH *P=0;
|
119 | 1 | equemene | int incx=1; |
120 | 1 | equemene | |
121 | 1 | equemene | P=malloc(dimVector*sizeof(LENGTH));
|
122 | 1 | equemene | |
123 | 1 | equemene | stat=cublasGetVector(dimVector,sizeof(P[0]),dataVector,incx,P,incx); |
124 | 1 | equemene | |
125 | 1 | equemene | if (stat != CUBLAS_STATUS_SUCCESS) {
|
126 | 1 | equemene | wrapperError ("ToGet", CUBLAS_WRAPPER_ERROR_GET);
|
127 | 1 | equemene | } |
128 | 1 | equemene | |
129 | 1 | equemene | printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
130 | 1 | equemene | for (i=0;i<dimVector;i++) |
131 | 1 | equemene | { |
132 | 1 | equemene | printf("%s[%i]=%2.10e\n",nameVector,i,P[i]);
|
133 | 1 | equemene | } |
134 | 1 | equemene | |
135 | 1 | equemene | free(P); |
136 | 1 | equemene | #endif
|
137 | 1 | equemene | |
138 | 1 | equemene | return 0; |
139 | 1 | equemene | } |
140 | 1 | equemene | #endif
|
141 | 1 | equemene | |
142 | 1 | equemene | int bench(int dim,int RUNS) |
143 | 1 | equemene | { |
144 | 1 | equemene | /*
|
145 | 1 | equemene | int dim=1000;
|
146 | 1 | equemene | int RUNS=100;
|
147 | 1 | equemene | int incx=1;
|
148 | 1 | equemene | */
|
149 | 1 | equemene | #ifdef PRINT
|
150 | 1 | equemene | LENGTH factor=1.;
|
151 | 1 | equemene | #endif
|
152 | 1 | equemene | |
153 | 1 | equemene | LENGTH alpha=1.,beta=0.; |
154 | 1 | equemene | LENGTH *A,*B,*C,*D; |
155 | 1 | equemene | |
156 | 1 | equemene | /* checkBefore checkAfter checks */
|
157 | 1 | equemene | LENGTH *checksA,*checksB; |
158 | 1 | equemene | |
159 | 1 | equemene | int i=0, j=0; |
160 | 1 | equemene | |
161 | 1 | equemene | double duration;
|
162 | 1 | equemene | |
163 | 1 | equemene | struct timeval tv1,tv2;
|
164 | 1 | equemene | struct timezone tz;
|
165 | 1 | equemene | |
166 | 1 | equemene | /* Create 4 Matrix of dimension dim by dim */
|
167 | 1 | equemene | |
168 | 1 | equemene | A=malloc(dim*dim*sizeof(LENGTH));
|
169 | 1 | equemene | B=malloc(dim*dim*sizeof(LENGTH));
|
170 | 1 | equemene | C=malloc(dim*dim*sizeof(LENGTH));
|
171 | 1 | equemene | D=malloc(dim*dim*sizeof(LENGTH));
|
172 | 1 | equemene | |
173 | 1 | equemene | /* Create 2 vectors for checker Before and After */
|
174 | 1 | equemene | |
175 | 1 | equemene | checksA=malloc(RUNS*sizeof(LENGTH));
|
176 | 1 | equemene | checksB=malloc(RUNS*sizeof(LENGTH));
|
177 | 1 | equemene | |
178 | 1 | equemene | /* Initialize elements with random numbers */
|
179 | 1 | equemene | /* Initialize the seed for rand() */
|
180 | 1 | equemene | /* srand(time()); */
|
181 | 1 | equemene | |
182 | 1 | equemene | for (i=0; i<dim; i++) { |
183 | 1 | equemene | for (j=0; j<dim; j++) { |
184 | 1 | equemene | A[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.)
|
185 | 1 | equemene | *(LENGTH)(i+1.)/(LENGTH)(j+1.); |
186 | 1 | equemene | B[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.)
|
187 | 1 | equemene | *(LENGTH)(i+1.)/(LENGTH)(j+1.); |
188 | 1 | equemene | C[i*dim+j]=0.;
|
189 | 1 | equemene | D[i*dim+j]=0.;
|
190 | 1 | equemene | } |
191 | 1 | equemene | } |
192 | 1 | equemene | /*
|
193 | 1 | equemene | A[0]=1;
|
194 | 1 | equemene | A[1]=2;
|
195 | 1 | equemene | A[2]=3;
|
196 | 1 | equemene | A[3]=4;
|
197 | 1 | equemene | |
198 | 1 | equemene | B[0]=5;
|
199 | 1 | equemene | B[1]=6;
|
200 | 1 | equemene | B[2]=7;
|
201 | 1 | equemene | B[3]=8;
|
202 | 1 | equemene | */
|
203 | 1 | equemene | |
204 | 1 | equemene | /* Print the matrix */
|
205 | 1 | equemene | |
206 | 1 | equemene | #ifdef QUIET
|
207 | 1 | equemene | #else
|
208 | 1 | equemene | for (i=0; i<dim; i++) { |
209 | 1 | equemene | for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]); |
210 | 1 | equemene | putchar('\n');
|
211 | 1 | equemene | } |
212 | 1 | equemene | putchar('\n');
|
213 | 1 | equemene | for (i=0; i<dim; i++) { |
214 | 1 | equemene | for (j=0; j<dim; j++) printf("B[%i,%i]=%1.5f ", i,j,B[i*dim+j]); |
215 | 1 | equemene | putchar('\n');
|
216 | 1 | equemene | } |
217 | 1 | equemene | putchar('\n');
|
218 | 1 | equemene | #endif
|
219 | 1 | equemene | |
220 | 1 | equemene | /* Get first timer before launching */
|
221 | 1 | equemene | gettimeofday(&tv1, &tz); |
222 | 1 | equemene | |
223 | 1 | equemene | /* Compute with CuBLAS library */
|
224 | 1 | equemene | #ifdef CUBLAS
|
225 | 1 | equemene | LENGTH *devPtrA=0, *devPtrB=0, *devPtrC=0, *devPtrD=0; |
226 | 1 | equemene | cublasStatus stat1, stat2, stat3, stat4; |
227 | 1 | equemene | struct timeval tv3,tv4;
|
228 | 1 | equemene | |
229 | 1 | equemene | /* Order is Row */
|
230 | 1 | equemene | /* Have to swap uplo and trans */
|
231 | 1 | equemene | char transa='N',transb='T'; |
232 | 1 | equemene | |
233 | 1 | equemene | printf("Using CuBLAS: %i iterations for %ix%i matrix\n",
|
234 | 1 | equemene | RUNS,dim,dim); |
235 | 1 | equemene | |
236 | 1 | equemene | stat1=cublasAlloc(dim*dim,sizeof(devPtrA[0]),(void**)&devPtrA); |
237 | 1 | equemene | stat2=cublasAlloc(dim*dim,sizeof(devPtrB[0]),(void**)&devPtrB); |
238 | 1 | equemene | stat3=cublasAlloc(dim*dim,sizeof(devPtrC[0]),(void**)&devPtrC); |
239 | 1 | equemene | stat4=cublasAlloc(dim*dim,sizeof(devPtrD[0]),(void**)&devPtrD); |
240 | 1 | equemene | |
241 | 1 | equemene | if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
|
242 | 1 | equemene | (stat2 != CUBLAS_STATUS_SUCCESS) || |
243 | 1 | equemene | (stat3 != CUBLAS_STATUS_SUCCESS) || |
244 | 1 | equemene | (stat4 != CUBLAS_STATUS_SUCCESS) ) { |
245 | 5 | equemene | wrapperError ("xGEMM", CUBLAS_WRAPPER_ERROR_ALLOC);
|
246 | 1 | equemene | cublasFree (devPtrA); |
247 | 1 | equemene | cublasFree (devPtrB); |
248 | 1 | equemene | cublasFree (devPtrC); |
249 | 1 | equemene | cublasFree (devPtrD); |
250 | 1 | equemene | return 1; |
251 | 1 | equemene | } |
252 | 1 | equemene | |
253 | 1 | equemene | stat1=cublasSetMatrix(dim,dim,sizeof(A[0]),A,dim,devPtrA,dim); |
254 | 1 | equemene | stat2=cublasSetMatrix(dim,dim,sizeof(B[0]),B,dim,devPtrB,dim); |
255 | 1 | equemene | stat3=cublasSetMatrix(dim,dim,sizeof(C[0]),C,dim,devPtrC,dim); |
256 | 1 | equemene | stat4=cublasSetMatrix(dim,dim,sizeof(D[0]),D,dim,devPtrD,dim); |
257 | 1 | equemene | |
258 | 1 | equemene | if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
|
259 | 1 | equemene | (stat2 != CUBLAS_STATUS_SUCCESS) || |
260 | 1 | equemene | (stat3 != CUBLAS_STATUS_SUCCESS) || |
261 | 1 | equemene | (stat4 != CUBLAS_STATUS_SUCCESS) ) { |
262 | 5 | equemene | wrapperError ("xGEMM", CUBLAS_WRAPPER_ERROR_SET);
|
263 | 1 | equemene | cublasFree (devPtrA); |
264 | 1 | equemene | cublasFree (devPtrB); |
265 | 1 | equemene | cublasFree (devPtrC); |
266 | 1 | equemene | cublasFree (devPtrD); |
267 | 1 | equemene | return 1; |
268 | 1 | equemene | } |
269 | 1 | equemene | |
270 | 1 | equemene | /* Get third timer after memory operation */
|
271 | 1 | equemene | gettimeofday(&tv3, &tz); |
272 | 1 | equemene | |
273 | 1 | equemene | #ifdef DOUBLE
|
274 | 1 | equemene | |
275 | 1 | equemene | for (i=0;i<RUNS;i++) |
276 | 1 | equemene | { |
277 | 1 | equemene | cublasDgemm(transa,transa,dim,dim,dim,alpha,devPtrB,dim, |
278 | 1 | equemene | devPtrA,dim,beta,devPtrC,dim); |
279 | 1 | equemene | cublasDgemm(transb,transb,dim,dim,dim,alpha,devPtrA,dim, |
280 | 1 | equemene | devPtrB,dim,beta,devPtrD,dim); |
281 | 1 | equemene | } |
282 | 1 | equemene | |
283 | 1 | equemene | #else
|
284 | 1 | equemene | |
285 | 1 | equemene | for (i=0;i<RUNS;i++) |
286 | 1 | equemene | { |
287 | 1 | equemene | cublasSgemm(transa,transa,dim,dim,dim,alpha,devPtrB,dim, |
288 | 1 | equemene | devPtrA,dim,beta,devPtrC,dim); |
289 | 1 | equemene | cublasSgemm(transb,transb,dim,dim,dim,alpha,devPtrA,dim, |
290 | 1 | equemene | devPtrB,dim,beta,devPtrD,dim); |
291 | 1 | equemene | } |
292 | 1 | equemene | |
293 | 1 | equemene | #endif
|
294 | 1 | equemene | |
295 | 1 | equemene | stat3=cublasGetMatrix(dim,dim,sizeof(C[0]),devPtrC,dim,C,dim); |
296 | 1 | equemene | stat4=cublasGetMatrix(dim,dim,sizeof(D[0]),devPtrD,dim,D,dim); |
297 | 1 | equemene | |
298 | 1 | equemene | cublasFree (devPtrA); |
299 | 1 | equemene | cublasFree (devPtrB); |
300 | 1 | equemene | cublasFree (devPtrC); |
301 | 1 | equemene | cublasFree (devPtrD); |
302 | 1 | equemene | |
303 | 1 | equemene | if ((stat1 != CUBLAS_STATUS_SUCCESS) ) {
|
304 | 5 | equemene | wrapperError ("xGEMM", CUBLAS_WRAPPER_ERROR_GET);
|
305 | 1 | equemene | } |
306 | 1 | equemene | |
307 | 1 | equemene | /* Get fourth timer after memory free */
|
308 | 1 | equemene | gettimeofday(&tv4, &tz); |
309 | 1 | equemene | |
310 | 1 | equemene | #elif THUNKING
|
311 | 1 | equemene | |
312 | 1 | equemene | /* Order is Row : Have to swap uplo='U' and trans='N' */
|
313 | 1 | equemene | char transa='N',transb='T'; |
314 | 1 | equemene | printf("Using CuBLAS/Thunking: %i iterations for %ix%i matrix\n",
|
315 | 1 | equemene | RUNS,dim,dim); |
316 | 1 | equemene | |
317 | 1 | equemene | #ifdef DOUBLE
|
318 | 1 | equemene | |
319 | 1 | equemene | for (i=0;i<RUNS;i++) |
320 | 1 | equemene | { |
321 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
322 | 1 | equemene | CUBLAS_DGEMM(&transa,&transa, |
323 | 1 | equemene | &dim,&dim,&dim,&alpha,B,&dim,A,&dim,&beta,C,&dim); |
324 | 1 | equemene | CUBLAS_DGEMM(&transb,&transb, |
325 | 1 | equemene | &dim,&dim,&dim,&alpha,A,&dim,B,&dim,&beta,D,&dim); |
326 | 1 | equemene | } |
327 | 1 | equemene | |
328 | 1 | equemene | #else
|
329 | 1 | equemene | |
330 | 1 | equemene | for (i=0;i<RUNS;i++) |
331 | 1 | equemene | { |
332 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
333 | 1 | equemene | CUBLAS_SGEMM(&transa,&transa, |
334 | 1 | equemene | &dim,&dim,&dim,&alpha,B,&dim,A,&dim,&beta,C,&dim); |
335 | 1 | equemene | CUBLAS_SGEMM(&transb,&transb, |
336 | 1 | equemene | &dim,&dim,&dim,&alpha,A,&dim,B,&dim,&beta,D,&dim); |
337 | 1 | equemene | } |
338 | 1 | equemene | |
339 | 1 | equemene | #endif
|
340 | 1 | equemene | |
341 | 1 | equemene | #elif FBLAS
|
342 | 1 | equemene | |
343 | 1 | equemene | /* Order is Row : Have to swap uplo='U' and trans='N' */
|
344 | 1 | equemene | char transa='N',transb='T'; |
345 | 1 | equemene | |
346 | 1 | equemene | printf("Using FBLAS: %i iterations for %ix%i matrix\n",
|
347 | 1 | equemene | RUNS,dim,dim); |
348 | 1 | equemene | |
349 | 1 | equemene | #ifdef DOUBLE
|
350 | 1 | equemene | |
351 | 1 | equemene | for (i=0;i<RUNS;i++) |
352 | 1 | equemene | { |
353 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
354 | 1 | equemene | dgemm_(&transa,&transa,&dim,&dim,&dim,&alpha,B,&dim,A,&dim,&beta,C,&dim); |
355 | 1 | equemene | dgemm_(&transb,&transb,&dim,&dim,&dim,&alpha,A,&dim,B,&dim,&beta,D,&dim); |
356 | 1 | equemene | } |
357 | 1 | equemene | |
358 | 1 | equemene | #else
|
359 | 1 | equemene | |
360 | 1 | equemene | for (i=0;i<RUNS;i++) |
361 | 1 | equemene | { |
362 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
363 | 1 | equemene | sgemm_(&transa,&transa,&dim,&dim,&dim,&alpha,B,&dim,A,&dim,&beta,C,&dim); |
364 | 1 | equemene | sgemm_(&transb,&transb,&dim,&dim,&dim,&alpha,A,&dim,B,&dim,&beta,D,&dim); |
365 | 1 | equemene | } |
366 | 1 | equemene | |
367 | 1 | equemene | #endif
|
368 | 1 | equemene | |
369 | 1 | equemene | #elif ACML
|
370 | 1 | equemene | |
371 | 1 | equemene | /* Order is Row : Have to swap uplo='U' and trans='N' */
|
372 | 1 | equemene | char transa='N',transb='T'; |
373 | 1 | equemene | |
374 | 1 | equemene | printf("Using ACML: %i iterations for %ix%i matrix\n",
|
375 | 1 | equemene | RUNS,dim,dim); |
376 | 1 | equemene | |
377 | 1 | equemene | #ifdef DOUBLE
|
378 | 1 | equemene | |
379 | 1 | equemene | for (i=0;i<RUNS;i++) |
380 | 1 | equemene | { |
381 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
382 | 1 | equemene | dgemm(transa,transa,dim,dim,dim,alpha,B,dim,A,dim,beta,C,dim); |
383 | 1 | equemene | dgemm(transb,transb,dim,dim,dim,alpha,A,dim,B,dim,beta,D,dim); |
384 | 1 | equemene | } |
385 | 1 | equemene | |
386 | 1 | equemene | #else
|
387 | 1 | equemene | |
388 | 1 | equemene | for (i=0;i<RUNS;i++) |
389 | 1 | equemene | { |
390 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
391 | 1 | equemene | sgemm(transa,transa,dim,dim,dim,alpha,B,dim,A,dim,beta,C,dim); |
392 | 1 | equemene | sgemm(transb,transb,dim,dim,dim,alpha,A,dim,B,dim,beta,D,dim); |
393 | 1 | equemene | } |
394 | 1 | equemene | |
395 | 1 | equemene | #endif
|
396 | 1 | equemene | |
397 | 1 | equemene | #elif GSL
|
398 | 1 | equemene | |
399 | 1 | equemene | printf("Using GSL: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
|
400 | 1 | equemene | |
401 | 1 | equemene | /*
|
402 | 1 | equemene | RowMajor : Matrix is read row by row
|
403 | 1 | equemene | Upper : the no null elements are on top
|
404 | 1 | equemene | NoTrans : no transposition before estimation
|
405 | 1 | equemene | NonUnit : Matrix is not unit
|
406 | 1 | equemene | */
|
407 | 1 | equemene | |
408 | 1 | equemene | #ifdef DOUBLE
|
409 | 1 | equemene | |
410 | 1 | equemene | for (i=0;i<RUNS;i++) |
411 | 1 | equemene | { |
412 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
413 | 1 | equemene | cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans, |
414 | 1 | equemene | dim,dim,dim,alpha,A,dim,B,dim,beta,C,dim); |
415 | 1 | equemene | cblas_dgemm(CblasRowMajor,CblasTrans,CblasTrans, |
416 | 1 | equemene | dim,dim,dim,alpha,B,dim,A,dim,beta,D,dim); |
417 | 1 | equemene | } |
418 | 1 | equemene | |
419 | 1 | equemene | #else
|
420 | 1 | equemene | |
421 | 1 | equemene | for (i=0;i<RUNS;i++) |
422 | 1 | equemene | { |
423 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
424 | 1 | equemene | cblas_sgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans, |
425 | 1 | equemene | dim,dim,dim,alpha,A,dim,B,dim,beta,C,dim); |
426 | 1 | equemene | cblas_sgemm(CblasRowMajor,CblasTrans,CblasTrans, |
427 | 1 | equemene | dim,dim,dim,alpha,B,dim,A,dim,beta,D,dim); |
428 | 1 | equemene | } |
429 | 1 | equemene | |
430 | 1 | equemene | #endif
|
431 | 1 | equemene | |
432 | 1 | equemene | #else
|
433 | 1 | equemene | |
434 | 1 | equemene | printf("Using CBLAS: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
|
435 | 1 | equemene | |
436 | 1 | equemene | /*
|
437 | 1 | equemene | RowMajor : Matrix is read row bu row
|
438 | 1 | equemene | Upper : the no null elements are on top
|
439 | 1 | equemene | NoTrans : no transposition before estimation
|
440 | 1 | equemene | NonUnit : Matrix is not unit
|
441 | 1 | equemene | */
|
442 | 1 | equemene | |
443 | 1 | equemene | #ifdef DOUBLE
|
444 | 1 | equemene | |
445 | 1 | equemene | for (i=0;i<RUNS;i++) |
446 | 1 | equemene | { |
447 | 1 | equemene | cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans, |
448 | 1 | equemene | dim,dim,dim,alpha,A,dim,B,dim,beta,C,dim); |
449 | 1 | equemene | cblas_dgemm(CblasRowMajor,CblasTrans,CblasTrans, |
450 | 1 | equemene | dim,dim,dim,alpha,B,dim,A,dim,beta,D,dim); |
451 | 1 | equemene | } |
452 | 1 | equemene | |
453 | 1 | equemene | #else
|
454 | 1 | equemene | |
455 | 1 | equemene | for (i=0;i<RUNS;i++) |
456 | 1 | equemene | { |
457 | 1 | equemene | cblas_sgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans, |
458 | 1 | equemene | dim,dim,dim,alpha,A,dim,B,dim,beta,C,dim); |
459 | 1 | equemene | cblas_sgemm(CblasRowMajor,CblasTrans,CblasTrans, |
460 | 1 | equemene | dim,dim,dim,alpha,B,dim,A,dim,beta,D,dim); |
461 | 1 | equemene | } |
462 | 1 | equemene | |
463 | 1 | equemene | #endif
|
464 | 1 | equemene | |
465 | 1 | equemene | #endif
|
466 | 1 | equemene | |
467 | 1 | equemene | /* Get second timer after launching */
|
468 | 1 | equemene | gettimeofday(&tv2, &tz); |
469 | 1 | equemene | |
470 | 1 | equemene | /* Store the checker of errors */
|
471 | 1 | equemene | checksA[0]=0.; |
472 | 1 | equemene | |
473 | 1 | equemene | for (i=0; i<dim; i++) { |
474 | 1 | equemene | for (j=0; j<dim; j++) { |
475 | 1 | equemene | checksA[0]=checksA[0]+fabs(D[i*dim+j]-C[j*dim+i]); |
476 | 1 | equemene | } |
477 | 1 | equemene | } |
478 | 1 | equemene | |
479 | 1 | equemene | /* Print the matrix */
|
480 | 1 | equemene | |
481 | 1 | equemene | #ifdef QUIET
|
482 | 1 | equemene | #else
|
483 | 1 | equemene | for (i=0; i<dim; i++) { |
484 | 1 | equemene | for (j=0; j<dim; j++) printf("C[%i,%i]=%1.5f ", i,j,C[i*dim+j]); |
485 | 1 | equemene | putchar('\n');
|
486 | 1 | equemene | } |
487 | 1 | equemene | putchar('\n');
|
488 | 1 | equemene | for (i=0; i<dim; i++) { |
489 | 1 | equemene | for (j=0; j<dim; j++) printf("D[%i,%i]=%1.5f ", i,j,D[i*dim+j]); |
490 | 1 | equemene | putchar('\n');
|
491 | 1 | equemene | } |
492 | 1 | equemene | putchar('\n');
|
493 | 1 | equemene | #endif
|
494 | 1 | equemene | |
495 | 1 | equemene | /* Free 1 Matrix and 2 Vectors of dimension dim */
|
496 | 1 | equemene | |
497 | 1 | equemene | free(A); |
498 | 1 | equemene | free(B); |
499 | 1 | equemene | free(C); |
500 | 1 | equemene | free(D); |
501 | 1 | equemene | |
502 | 1 | equemene | putchar('\n');
|
503 | 1 | equemene | |
504 | 1 | equemene | #ifdef CUBLAS
|
505 | 1 | equemene | double memoryIn,memoryOut;
|
506 | 1 | equemene | |
507 | 1 | equemene | memoryIn=(double)((tv3.tv_sec-tv1.tv_sec) * 1000000L + \ |
508 | 1 | equemene | (tv3.tv_usec-tv1.tv_usec))/1000000.; |
509 | 1 | equemene | |
510 | 1 | equemene | memoryOut=(double)((tv2.tv_sec-tv4.tv_sec) * 1000000L + \ |
511 | 1 | equemene | (tv2.tv_usec-tv4.tv_usec))/1000000.; |
512 | 1 | equemene | |
513 | 1 | equemene | duration=(double)((tv4.tv_sec-tv3.tv_sec) * 1000000L + \ |
514 | 1 | equemene | (tv4.tv_usec-tv3.tv_usec))/1000000./RUNS; |
515 | 1 | equemene | |
516 | 1 | equemene | printf("Duration of memory allocation : %2.10f s\n",memoryIn);
|
517 | 1 | equemene | printf("Duration of memory free : %2.10f s\n",memoryOut);
|
518 | 1 | equemene | #else
|
519 | 1 | equemene | duration=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L + \ |
520 | 1 | equemene | (tv2.tv_usec-tv1.tv_usec))/1000000./RUNS; |
521 | 1 | equemene | |
522 | 1 | equemene | #endif
|
523 | 1 | equemene | |
524 | 1 | equemene | printf("Duration of each cycle : %2.10f s\n",duration);
|
525 | 1 | equemene | |
526 | 1 | equemene | printf("Number of GFlops : %2.3f \n",
|
527 | 1 | equemene | dim*dim*2.*(2.*dim-1)/duration/1000000000.); |
528 | 1 | equemene | |
529 | 1 | equemene | printf("Error %1.10f\n",checksA[0]); |
530 | 1 | equemene | printResults(RUNS,checksA,"C","Errors cumulated"); |
531 | 1 | equemene | |
532 | 1 | equemene | putchar('\n');
|
533 | 1 | equemene | |
534 | 1 | equemene | /* Free 2 vectors for checker Before and After */
|
535 | 1 | equemene | |
536 | 1 | equemene | free(checksA); |
537 | 1 | equemene | free(checksB); |
538 | 1 | equemene | |
539 | 1 | equemene | return 0; |
540 | 1 | equemene | } |
541 | 1 | equemene | |
542 | 1 | equemene | int main(int argc,char **argv) |
543 | 1 | equemene | { |
544 | 1 | equemene | if ((argc==1)|| |
545 | 1 | equemene | (strcmp(argv[1],"-h")==0)|| |
546 | 1 | equemene | (strcmp(argv[1],"--help")==0)) |
547 | 1 | equemene | { |
548 | 1 | equemene | printf("\nPerforms a bench using BLAS library implementation:\n\n"
|
549 | 1 | equemene | "\t#1 Size of square matrices \n"
|
550 | 1 | equemene | "\t#2 Number of iterations\n\n");
|
551 | 1 | equemene | } |
552 | 1 | equemene | else if ((atoi(argv[1])>=2)&& |
553 | 1 | equemene | (atoi(argv[2])>=1)) |
554 | 1 | equemene | { |
555 | 1 | equemene | bench(atoi(argv[1]),atoi(argv[2])); |
556 | 1 | equemene | } |
557 | 1 | equemene | |
558 | 1 | equemene | return 0; |
559 | 1 | equemene | } |