Statistiques
| Révision :

root / BLAS / xGEMM / xGEMM.c @ 6

Historique | Voir | Annoter | Télécharger (12,4 ko)

1 1 equemene
/*
2 1 equemene
   Performs matrix multiply
3 1 equemene

4 1 equemene
   Thanks for help from aurel32@debian.org
5 1 equemene
*/
6 1 equemene
7 1 equemene
#include <stdio.h>
8 1 equemene
#include <math.h>
9 1 equemene
#include <stdlib.h>
10 1 equemene
#include <sys/time.h>
11 1 equemene
#include <string.h>
12 1 equemene
13 1 equemene
#ifdef CUBLAS
14 1 equemene
#include <cublas.h>
15 1 equemene
#define CUBLAS_WRAPPER_ERROR_NOERR      0
16 1 equemene
#define CUBLAS_WRAPPER_ERROR_ALLOC      1
17 1 equemene
#define CUBLAS_WRAPPER_ERROR_SET        2
18 1 equemene
#define CUBLAS_WRAPPER_ERROR_GET        3
19 1 equemene
#define CUBLAS_WRAPPER_ERROR_STUB       4
20 1 equemene
#elif THUNKING
21 1 equemene
#include <cublas.h>
22 1 equemene
#elif FBLAS
23 1 equemene
#include <cblas_f77.h>
24 1 equemene
#elif GSL
25 1 equemene
#include <gsl_cblas.h>
26 1 equemene
#elif ACML
27 1 equemene
#include <acml.h>
28 1 equemene
#include <acml_mv.h>
29 1 equemene
#else
30 1 equemene
#include <cblas.h>
31 1 equemene
#endif
32 1 equemene
33 1 equemene
#ifdef DOUBLE
34 1 equemene
#define LENGTH double
35 1 equemene
#else
36 1 equemene
#define LENGTH float
37 1 equemene
#endif
38 1 equemene
39 1 equemene
#ifdef THUNKING
40 1 equemene
#include "fortran_thunking.h"
41 1 equemene
42 1 equemene
#elif FBLAS
43 1 equemene
44 1 equemene
#ifdef DOUBLE
45 1 equemene
46 1 equemene
void dgemm_(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT,
47 1 equemene
               const double *, FINT, const double *, double *, FINT);
48 1 equemene
49 1 equemene
#else
50 1 equemene
51 1 equemene
void sgemm_(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT,
52 1 equemene
               const float *, FINT, const float *, float *, FINT);
53 1 equemene
54 1 equemene
#endif
55 1 equemene
56 1 equemene
#endif
57 1 equemene
58 1 equemene
/* Matrix with only defined triangular terms */
59 1 equemene
/* Even if there are 0 in matrix, must be defined at all ! */
60 1 equemene
61 1 equemene
/* Get from fortran.c */
62 1 equemene
63 1 equemene
#ifdef CUBLAS
64 1 equemene
static char *errMsg[5] =
65 1 equemene
{
66 1 equemene
    "no error",
67 1 equemene
    "allocation error",
68 1 equemene
    "setVector/setMatrix error",
69 1 equemene
    "getVector/getMatrix error",
70 1 equemene
    "not implemented"
71 1 equemene
};
72 1 equemene
73 1 equemene
static void wrapperError (const char *funcName, int error)
74 1 equemene
{
75 1 equemene
    printf ("cublas%s wrapper: %s\n", funcName, errMsg[error]);
76 1 equemene
    fflush (stdout);
77 1 equemene
}
78 1 equemene
#endif
79 1 equemene
80 1 equemene
int printVector(const int dimVector,const LENGTH *dataVector,
81 1 equemene
                char *nameVector,char *mesgVector)
82 1 equemene
{
83 1 equemene
#ifndef QUIET
84 1 equemene
85 1 equemene
  int i;
86 1 equemene
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
87 1 equemene
  for (i=0;i<dimVector;i++)
88 1 equemene
    {
89 1 equemene
      printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
90 1 equemene
    }
91 1 equemene
#endif
92 1 equemene
93 1 equemene
  return 0;
94 1 equemene
}
95 1 equemene
96 1 equemene
int printResults(const int dimVector,const LENGTH *dataVector,
97 1 equemene
                 char *nameVector,char *mesgVector)
98 1 equemene
{
99 1 equemene
#ifdef RESULTS
100 1 equemene
  int i;
101 1 equemene
102 1 equemene
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
103 1 equemene
  for (i=0;i<dimVector;i++)
104 1 equemene
    {
105 1 equemene
      printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
106 1 equemene
    }
107 1 equemene
#endif
108 1 equemene
  return 0;
109 1 equemene
}
110 1 equemene
111 1 equemene
#ifdef CUBLAS
112 1 equemene
int printVectorGPU(const int dimVector,const LENGTH *dataVector,
113 1 equemene
                   char *nameVector,char *mesgVector)
114 1 equemene
{
115 1 equemene
#ifndef QUIET
116 1 equemene
  int i;
117 1 equemene
  cublasStatus stat;
118 1 equemene
  LENGTH *P=0;
119 1 equemene
  int incx=1;
120 1 equemene
121 1 equemene
  P=malloc(dimVector*sizeof(LENGTH));
122 1 equemene
123 1 equemene
  stat=cublasGetVector(dimVector,sizeof(P[0]),dataVector,incx,P,incx);
124 1 equemene
125 1 equemene
  if (stat != CUBLAS_STATUS_SUCCESS) {
126 1 equemene
    wrapperError ("ToGet", CUBLAS_WRAPPER_ERROR_GET);
127 1 equemene
  }
128 1 equemene
129 1 equemene
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
130 1 equemene
  for (i=0;i<dimVector;i++)
131 1 equemene
    {
132 1 equemene
      printf("%s[%i]=%2.10e\n",nameVector,i,P[i]);
133 1 equemene
    }
134 1 equemene
135 1 equemene
  free(P);
136 1 equemene
#endif
137 1 equemene
138 1 equemene
  return 0;
139 1 equemene
}
140 1 equemene
#endif
141 1 equemene
142 1 equemene
int bench(int dim,int RUNS)
143 1 equemene
{
144 1 equemene
  /*
145 1 equemene
  int dim=1000;
146 1 equemene
  int RUNS=100;
147 1 equemene
  int incx=1;
148 1 equemene
  */
149 1 equemene
#ifdef PRINT
150 1 equemene
  LENGTH factor=1.;
151 1 equemene
#endif
152 1 equemene
153 1 equemene
  LENGTH alpha=1.,beta=0.;
154 1 equemene
  LENGTH *A,*B,*C,*D;
155 1 equemene
156 1 equemene
  /* checkBefore checkAfter checks */
157 1 equemene
  LENGTH *checksA,*checksB;
158 1 equemene
159 1 equemene
  int i=0, j=0;
160 1 equemene
161 1 equemene
  double duration;
162 1 equemene
163 1 equemene
  struct timeval tv1,tv2;
164 1 equemene
  struct timezone tz;
165 1 equemene
166 1 equemene
  /* Create 4 Matrix of dimension dim by dim  */
167 1 equemene
168 1 equemene
  A=malloc(dim*dim*sizeof(LENGTH));
169 1 equemene
  B=malloc(dim*dim*sizeof(LENGTH));
170 1 equemene
  C=malloc(dim*dim*sizeof(LENGTH));
171 1 equemene
  D=malloc(dim*dim*sizeof(LENGTH));
172 1 equemene
173 1 equemene
  /* Create 2 vectors for checker Before and After */
174 1 equemene
175 1 equemene
  checksA=malloc(RUNS*sizeof(LENGTH));
176 1 equemene
  checksB=malloc(RUNS*sizeof(LENGTH));
177 1 equemene
178 1 equemene
  /* Initialize elements with random numbers */
179 1 equemene
  /* Initialize the seed for rand() */
180 1 equemene
  /* srand(time()); */
181 1 equemene
182 1 equemene
  for (i=0; i<dim; i++) {
183 1 equemene
    for (j=0; j<dim; j++) {
184 1 equemene
        A[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.)
185 1 equemene
          *(LENGTH)(i+1.)/(LENGTH)(j+1.);
186 1 equemene
        B[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.)
187 1 equemene
          *(LENGTH)(i+1.)/(LENGTH)(j+1.);
188 1 equemene
        C[i*dim+j]=0.;
189 1 equemene
        D[i*dim+j]=0.;
190 1 equemene
    }
191 1 equemene
  }
192 1 equemene
  /*
193 1 equemene
  A[0]=1;
194 1 equemene
  A[1]=2;
195 1 equemene
  A[2]=3;
196 1 equemene
  A[3]=4;
197 1 equemene

198 1 equemene
  B[0]=5;
199 1 equemene
  B[1]=6;
200 1 equemene
  B[2]=7;
201 1 equemene
  B[3]=8;
202 1 equemene
  */
203 1 equemene
204 1 equemene
  /* Print the matrix */
205 1 equemene
206 1 equemene
#ifdef QUIET
207 1 equemene
#else
208 1 equemene
  for (i=0; i<dim; i++) {
209 1 equemene
    for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]);
210 1 equemene
    putchar('\n');
211 1 equemene
  }
212 1 equemene
  putchar('\n');
213 1 equemene
  for (i=0; i<dim; i++) {
214 1 equemene
    for (j=0; j<dim; j++) printf("B[%i,%i]=%1.5f ", i,j,B[i*dim+j]);
215 1 equemene
    putchar('\n');
216 1 equemene
  }
217 1 equemene
  putchar('\n');
218 1 equemene
#endif
219 1 equemene
220 1 equemene
 /* Get first timer before launching */
221 1 equemene
  gettimeofday(&tv1, &tz);
222 1 equemene
223 1 equemene
  /* Compute with CuBLAS library  */
224 1 equemene
#ifdef CUBLAS
225 1 equemene
  LENGTH *devPtrA=0, *devPtrB=0, *devPtrC=0, *devPtrD=0;
226 1 equemene
  cublasStatus stat1, stat2, stat3, stat4;
227 1 equemene
  struct timeval tv3,tv4;
228 1 equemene
229 1 equemene
  /* Order is Row */
230 1 equemene
  /* Have to swap uplo and trans */
231 1 equemene
  char transa='N',transb='T';
232 1 equemene
233 1 equemene
  printf("Using CuBLAS: %i iterations for %ix%i matrix\n",
234 1 equemene
         RUNS,dim,dim);
235 1 equemene
236 1 equemene
  stat1=cublasAlloc(dim*dim,sizeof(devPtrA[0]),(void**)&devPtrA);
237 1 equemene
  stat2=cublasAlloc(dim*dim,sizeof(devPtrB[0]),(void**)&devPtrB);
238 1 equemene
  stat3=cublasAlloc(dim*dim,sizeof(devPtrC[0]),(void**)&devPtrC);
239 1 equemene
  stat4=cublasAlloc(dim*dim,sizeof(devPtrD[0]),(void**)&devPtrD);
240 1 equemene
241 1 equemene
  if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
242 1 equemene
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
243 1 equemene
      (stat3 != CUBLAS_STATUS_SUCCESS) ||
244 1 equemene
      (stat4 != CUBLAS_STATUS_SUCCESS) ) {
245 5 equemene
    wrapperError ("xGEMM", CUBLAS_WRAPPER_ERROR_ALLOC);
246 1 equemene
    cublasFree (devPtrA);
247 1 equemene
    cublasFree (devPtrB);
248 1 equemene
    cublasFree (devPtrC);
249 1 equemene
    cublasFree (devPtrD);
250 1 equemene
    return 1;
251 1 equemene
  }
252 1 equemene
253 1 equemene
  stat1=cublasSetMatrix(dim,dim,sizeof(A[0]),A,dim,devPtrA,dim);
254 1 equemene
  stat2=cublasSetMatrix(dim,dim,sizeof(B[0]),B,dim,devPtrB,dim);
255 1 equemene
  stat3=cublasSetMatrix(dim,dim,sizeof(C[0]),C,dim,devPtrC,dim);
256 1 equemene
  stat4=cublasSetMatrix(dim,dim,sizeof(D[0]),D,dim,devPtrD,dim);
257 1 equemene
258 1 equemene
  if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
259 1 equemene
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
260 1 equemene
      (stat3 != CUBLAS_STATUS_SUCCESS) ||
261 1 equemene
      (stat4 != CUBLAS_STATUS_SUCCESS) ) {
262 5 equemene
    wrapperError ("xGEMM", CUBLAS_WRAPPER_ERROR_SET);
263 1 equemene
    cublasFree (devPtrA);
264 1 equemene
    cublasFree (devPtrB);
265 1 equemene
    cublasFree (devPtrC);
266 1 equemene
    cublasFree (devPtrD);
267 1 equemene
    return 1;
268 1 equemene
  }
269 1 equemene
270 1 equemene
  /* Get third timer after memory operation */
271 1 equemene
  gettimeofday(&tv3, &tz);
272 1 equemene
273 1 equemene
#ifdef DOUBLE
274 1 equemene
275 1 equemene
  for (i=0;i<RUNS;i++)
276 1 equemene
    {
277 1 equemene
      cublasDgemm(transa,transa,dim,dim,dim,alpha,devPtrB,dim,
278 1 equemene
                  devPtrA,dim,beta,devPtrC,dim);
279 1 equemene
      cublasDgemm(transb,transb,dim,dim,dim,alpha,devPtrA,dim,
280 1 equemene
                  devPtrB,dim,beta,devPtrD,dim);
281 1 equemene
    }
282 1 equemene
283 1 equemene
#else
284 1 equemene
285 1 equemene
  for (i=0;i<RUNS;i++)
286 1 equemene
    {
287 1 equemene
      cublasSgemm(transa,transa,dim,dim,dim,alpha,devPtrB,dim,
288 1 equemene
                  devPtrA,dim,beta,devPtrC,dim);
289 1 equemene
      cublasSgemm(transb,transb,dim,dim,dim,alpha,devPtrA,dim,
290 1 equemene
                  devPtrB,dim,beta,devPtrD,dim);
291 1 equemene
    }
292 1 equemene
293 1 equemene
#endif
294 1 equemene
295 1 equemene
  stat3=cublasGetMatrix(dim,dim,sizeof(C[0]),devPtrC,dim,C,dim);
296 1 equemene
  stat4=cublasGetMatrix(dim,dim,sizeof(D[0]),devPtrD,dim,D,dim);
297 1 equemene
298 1 equemene
  cublasFree (devPtrA);
299 1 equemene
  cublasFree (devPtrB);
300 1 equemene
  cublasFree (devPtrC);
301 1 equemene
  cublasFree (devPtrD);
302 1 equemene
303 1 equemene
  if ((stat1 != CUBLAS_STATUS_SUCCESS) ) {
304 5 equemene
    wrapperError ("xGEMM", CUBLAS_WRAPPER_ERROR_GET);
305 1 equemene
  }
306 1 equemene
307 1 equemene
  /* Get fourth timer after memory free */
308 1 equemene
  gettimeofday(&tv4, &tz);
309 1 equemene
310 1 equemene
#elif THUNKING
311 1 equemene
312 1 equemene
  /* Order is Row : Have to swap uplo='U' and trans='N' */
313 1 equemene
  char transa='N',transb='T';
314 1 equemene
  printf("Using CuBLAS/Thunking: %i iterations for %ix%i matrix\n",
315 1 equemene
         RUNS,dim,dim);
316 1 equemene
317 1 equemene
#ifdef DOUBLE
318 1 equemene
319 1 equemene
  for (i=0;i<RUNS;i++)
320 1 equemene
    {
321 1 equemene
      /* Multiply A by X as Y <- A.X */
322 1 equemene
      CUBLAS_DGEMM(&transa,&transa,
323 1 equemene
                   &dim,&dim,&dim,&alpha,B,&dim,A,&dim,&beta,C,&dim);
324 1 equemene
      CUBLAS_DGEMM(&transb,&transb,
325 1 equemene
                   &dim,&dim,&dim,&alpha,A,&dim,B,&dim,&beta,D,&dim);
326 1 equemene
    }
327 1 equemene
328 1 equemene
#else
329 1 equemene
330 1 equemene
  for (i=0;i<RUNS;i++)
331 1 equemene
    {
332 1 equemene
      /* Multiply A by X as Y <- A.X */
333 1 equemene
      CUBLAS_SGEMM(&transa,&transa,
334 1 equemene
                   &dim,&dim,&dim,&alpha,B,&dim,A,&dim,&beta,C,&dim);
335 1 equemene
      CUBLAS_SGEMM(&transb,&transb,
336 1 equemene
                   &dim,&dim,&dim,&alpha,A,&dim,B,&dim,&beta,D,&dim);
337 1 equemene
    }
338 1 equemene
339 1 equemene
#endif
340 1 equemene
341 1 equemene
#elif FBLAS
342 1 equemene
343 1 equemene
  /* Order is Row : Have to swap uplo='U' and trans='N' */
344 1 equemene
      char transa='N',transb='T';
345 1 equemene
346 1 equemene
  printf("Using FBLAS: %i iterations for %ix%i matrix\n",
347 1 equemene
         RUNS,dim,dim);
348 1 equemene
349 1 equemene
#ifdef DOUBLE
350 1 equemene
351 1 equemene
  for (i=0;i<RUNS;i++)
352 1 equemene
    {
353 1 equemene
      /* Multiply A by X as Y <- A.X */
354 1 equemene
      dgemm_(&transa,&transa,&dim,&dim,&dim,&alpha,B,&dim,A,&dim,&beta,C,&dim);
355 1 equemene
      dgemm_(&transb,&transb,&dim,&dim,&dim,&alpha,A,&dim,B,&dim,&beta,D,&dim);
356 1 equemene
    }
357 1 equemene
358 1 equemene
#else
359 1 equemene
360 1 equemene
  for (i=0;i<RUNS;i++)
361 1 equemene
    {
362 1 equemene
      /* Multiply A by X as Y <- A.X */
363 1 equemene
      sgemm_(&transa,&transa,&dim,&dim,&dim,&alpha,B,&dim,A,&dim,&beta,C,&dim);
364 1 equemene
      sgemm_(&transb,&transb,&dim,&dim,&dim,&alpha,A,&dim,B,&dim,&beta,D,&dim);
365 1 equemene
    }
366 1 equemene
367 1 equemene
#endif
368 1 equemene
369 1 equemene
#elif ACML
370 1 equemene
371 1 equemene
  /* Order is Row : Have to swap uplo='U' and trans='N' */
372 1 equemene
      char transa='N',transb='T';
373 1 equemene
374 1 equemene
  printf("Using ACML: %i iterations for %ix%i matrix\n",
375 1 equemene
         RUNS,dim,dim);
376 1 equemene
377 1 equemene
#ifdef DOUBLE
378 1 equemene
379 1 equemene
  for (i=0;i<RUNS;i++)
380 1 equemene
    {
381 1 equemene
      /* Multiply A by X as Y <- A.X */
382 1 equemene
      dgemm(transa,transa,dim,dim,dim,alpha,B,dim,A,dim,beta,C,dim);
383 1 equemene
      dgemm(transb,transb,dim,dim,dim,alpha,A,dim,B,dim,beta,D,dim);
384 1 equemene
    }
385 1 equemene
386 1 equemene
#else
387 1 equemene
388 1 equemene
  for (i=0;i<RUNS;i++)
389 1 equemene
    {
390 1 equemene
      /* Multiply A by X as Y <- A.X */
391 1 equemene
      sgemm(transa,transa,dim,dim,dim,alpha,B,dim,A,dim,beta,C,dim);
392 1 equemene
      sgemm(transb,transb,dim,dim,dim,alpha,A,dim,B,dim,beta,D,dim);
393 1 equemene
    }
394 1 equemene
395 1 equemene
#endif
396 1 equemene
397 1 equemene
#elif GSL
398 1 equemene
399 1 equemene
  printf("Using GSL: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
400 1 equemene
401 1 equemene
  /*
402 1 equemene
     RowMajor : Matrix is read row by row
403 1 equemene
     Upper : the no null elements are on top
404 1 equemene
     NoTrans : no transposition before estimation
405 1 equemene
     NonUnit : Matrix is not unit
406 1 equemene
   */
407 1 equemene
408 1 equemene
#ifdef DOUBLE
409 1 equemene
410 1 equemene
  for (i=0;i<RUNS;i++)
411 1 equemene
    {
412 1 equemene
      /* Multiply A by X as Y <- A.X */
413 1 equemene
      cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,
414 1 equemene
                  dim,dim,dim,alpha,A,dim,B,dim,beta,C,dim);
415 1 equemene
      cblas_dgemm(CblasRowMajor,CblasTrans,CblasTrans,
416 1 equemene
                  dim,dim,dim,alpha,B,dim,A,dim,beta,D,dim);
417 1 equemene
    }
418 1 equemene
419 1 equemene
#else
420 1 equemene
421 1 equemene
  for (i=0;i<RUNS;i++)
422 1 equemene
    {
423 1 equemene
      /* Multiply A by X as Y <- A.X */
424 1 equemene
      cblas_sgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,
425 1 equemene
                  dim,dim,dim,alpha,A,dim,B,dim,beta,C,dim);
426 1 equemene
      cblas_sgemm(CblasRowMajor,CblasTrans,CblasTrans,
427 1 equemene
                  dim,dim,dim,alpha,B,dim,A,dim,beta,D,dim);
428 1 equemene
    }
429 1 equemene
430 1 equemene
#endif
431 1 equemene
432 1 equemene
#else
433 1 equemene
434 1 equemene
  printf("Using CBLAS: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
435 1 equemene
436 1 equemene
  /*
437 1 equemene
     RowMajor : Matrix is read row bu row
438 1 equemene
     Upper : the no null elements are on top
439 1 equemene
     NoTrans : no transposition before estimation
440 1 equemene
     NonUnit : Matrix is not unit
441 1 equemene
   */
442 1 equemene
443 1 equemene
#ifdef DOUBLE
444 1 equemene
445 1 equemene
  for (i=0;i<RUNS;i++)
446 1 equemene
    {
447 1 equemene
      cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,
448 1 equemene
                  dim,dim,dim,alpha,A,dim,B,dim,beta,C,dim);
449 1 equemene
      cblas_dgemm(CblasRowMajor,CblasTrans,CblasTrans,
450 1 equemene
                  dim,dim,dim,alpha,B,dim,A,dim,beta,D,dim);
451 1 equemene
    }
452 1 equemene
453 1 equemene
#else
454 1 equemene
455 1 equemene
  for (i=0;i<RUNS;i++)
456 1 equemene
    {
457 1 equemene
      cblas_sgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,
458 1 equemene
                  dim,dim,dim,alpha,A,dim,B,dim,beta,C,dim);
459 1 equemene
      cblas_sgemm(CblasRowMajor,CblasTrans,CblasTrans,
460 1 equemene
                  dim,dim,dim,alpha,B,dim,A,dim,beta,D,dim);
461 1 equemene
    }
462 1 equemene
463 1 equemene
#endif
464 1 equemene
465 1 equemene
#endif
466 1 equemene
467 1 equemene
  /* Get second timer after launching */
468 1 equemene
  gettimeofday(&tv2, &tz);
469 1 equemene
470 1 equemene
  /* Store the checker of errors */
471 1 equemene
  checksA[0]=0.;
472 1 equemene
473 1 equemene
  for (i=0; i<dim; i++) {
474 1 equemene
    for (j=0; j<dim; j++) {
475 1 equemene
      checksA[0]=checksA[0]+fabs(D[i*dim+j]-C[j*dim+i]);
476 1 equemene
    }
477 1 equemene
  }
478 1 equemene
479 1 equemene
  /* Print the matrix */
480 1 equemene
481 1 equemene
#ifdef QUIET
482 1 equemene
#else
483 1 equemene
  for (i=0; i<dim; i++) {
484 1 equemene
    for (j=0; j<dim; j++) printf("C[%i,%i]=%1.5f ", i,j,C[i*dim+j]);
485 1 equemene
    putchar('\n');
486 1 equemene
  }
487 1 equemene
  putchar('\n');
488 1 equemene
  for (i=0; i<dim; i++) {
489 1 equemene
    for (j=0; j<dim; j++) printf("D[%i,%i]=%1.5f ", i,j,D[i*dim+j]);
490 1 equemene
    putchar('\n');
491 1 equemene
  }
492 1 equemene
  putchar('\n');
493 1 equemene
#endif
494 1 equemene
495 1 equemene
  /* Free 1 Matrix and 2 Vectors of dimension dim  */
496 1 equemene
497 1 equemene
  free(A);
498 1 equemene
  free(B);
499 1 equemene
  free(C);
500 1 equemene
  free(D);
501 1 equemene
502 1 equemene
  putchar('\n');
503 1 equemene
504 1 equemene
#ifdef CUBLAS
505 1 equemene
  double memoryIn,memoryOut;
506 1 equemene
507 1 equemene
  memoryIn=(double)((tv3.tv_sec-tv1.tv_sec) * 1000000L +        \
508 1 equemene
                    (tv3.tv_usec-tv1.tv_usec))/1000000.;
509 1 equemene
510 1 equemene
  memoryOut=(double)((tv2.tv_sec-tv4.tv_sec) * 1000000L +        \
511 1 equemene
                    (tv2.tv_usec-tv4.tv_usec))/1000000.;
512 1 equemene
513 1 equemene
  duration=(double)((tv4.tv_sec-tv3.tv_sec) * 1000000L +        \
514 1 equemene
                    (tv4.tv_usec-tv3.tv_usec))/1000000./RUNS;
515 1 equemene
516 1 equemene
  printf("Duration of memory allocation : %2.10f s\n",memoryIn);
517 1 equemene
  printf("Duration of memory free : %2.10f s\n",memoryOut);
518 1 equemene
#else
519 1 equemene
  duration=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +        \
520 1 equemene
                    (tv2.tv_usec-tv1.tv_usec))/1000000./RUNS;
521 1 equemene
522 1 equemene
#endif
523 1 equemene
524 1 equemene
  printf("Duration of each cycle : %2.10f s\n",duration);
525 1 equemene
526 1 equemene
  printf("Number of GFlops : %2.3f \n",
527 1 equemene
         dim*dim*2.*(2.*dim-1)/duration/1000000000.);
528 1 equemene
529 1 equemene
  printf("Error %1.10f\n",checksA[0]);
530 1 equemene
  printResults(RUNS,checksA,"C","Errors cumulated");
531 1 equemene
532 1 equemene
  putchar('\n');
533 1 equemene
534 1 equemene
  /* Free 2 vectors for checker Before and After */
535 1 equemene
536 1 equemene
  free(checksA);
537 1 equemene
  free(checksB);
538 1 equemene
539 1 equemene
  return 0;
540 1 equemene
}
541 1 equemene
542 1 equemene
int main(int argc,char **argv)
543 1 equemene
{
544 1 equemene
  if ((argc==1)||
545 1 equemene
      (strcmp(argv[1],"-h")==0)||
546 1 equemene
      (strcmp(argv[1],"--help")==0))
547 1 equemene
    {
548 1 equemene
      printf("\nPerforms a bench using BLAS library implementation:\n\n"
549 1 equemene
             "\t#1 Size of square matrices \n"
550 1 equemene
             "\t#2 Number of iterations\n\n");
551 1 equemene
    }
552 1 equemene
  else if ((atoi(argv[1])>=2)&&
553 1 equemene
           (atoi(argv[2])>=1))
554 1 equemene
    {
555 1 equemene
      bench(atoi(argv[1]),atoi(argv[2]));
556 1 equemene
    }
557 1 equemene
558 1 equemene
  return 0;
559 1 equemene
}