Statistiques
| Révision :

root / BLAS / xTRSV / xTRSV.c @ 251

Historique | Voir | Annoter | Télécharger (17,48 ko)

1 1 equemene
/*
2 1 equemene
   Performs a linear system solving of random generated system
3 1 equemene
   Estimates a test
4 1 equemene

5 1 equemene
   Matrix is triangular
6 1 equemene

7 1 equemene
   Thanks for help from aurel32@debian.org
8 1 equemene
*/
9 1 equemene
10 1 equemene
#include <stdio.h>
11 1 equemene
#include <math.h>
12 1 equemene
#include <stdlib.h>
13 1 equemene
#include <sys/time.h>
14 1 equemene
#include <string.h>
15 1 equemene
16 1 equemene
#ifdef CUBLAS
17 1 equemene
#include <cublas.h>
18 1 equemene
#define CUBLAS_WRAPPER_ERROR_NOERR      0
19 1 equemene
#define CUBLAS_WRAPPER_ERROR_ALLOC      1
20 1 equemene
#define CUBLAS_WRAPPER_ERROR_SET        2
21 1 equemene
#define CUBLAS_WRAPPER_ERROR_GET        3
22 1 equemene
#define CUBLAS_WRAPPER_ERROR_STUB       4
23 1 equemene
#elif THUNKING
24 1 equemene
#include <cublas.h>
25 7 equemene
#include "fortran_common.h"
26 7 equemene
#include "fortran_thunking.h"
27 1 equemene
#elif FBLAS
28 251 equemene
#include <f77blas.h>
29 1 equemene
#elif GSL
30 1 equemene
#include <gsl_cblas.h>
31 1 equemene
#elif ACML
32 1 equemene
#include <acml.h>
33 1 equemene
#else
34 1 equemene
#include <cblas.h>
35 251 equemene
// #include <blaswrap.h>
36 1 equemene
#endif
37 1 equemene
38 251 equemene
#ifdef FP64
39 1 equemene
#define LENGTH double
40 1 equemene
#else
41 1 equemene
#define LENGTH float
42 1 equemene
#endif
43 1 equemene
44 1 equemene
/* Matrix with only defined triangular terms */
45 1 equemene
/* Even if there are 0 in matrix, must be defined at all ! */
46 1 equemene
47 1 equemene
/* Get from fortran.c */
48 1 equemene
49 1 equemene
#ifdef CUBLAS
50 1 equemene
static char *errMsg[5] =
51 1 equemene
{
52 1 equemene
    "no error",
53 1 equemene
    "allocation error",
54 1 equemene
    "setVector/setMatrix error",
55 1 equemene
    "getVector/getMatrix error",
56 1 equemene
    "not implemented"
57 1 equemene
};
58 1 equemene
59 1 equemene
static void wrapperError (const char *funcName, int error)
60 1 equemene
{
61 1 equemene
    printf ("cublas%s wrapper: %s\n", funcName, errMsg[error]);
62 1 equemene
    fflush (stdout);
63 1 equemene
}
64 1 equemene
#endif
65 1 equemene
66 1 equemene
int printVector(const int dimVector,const LENGTH *dataVector,
67 1 equemene
                char *nameVector,char *mesgVector)
68 1 equemene
{
69 1 equemene
#ifndef QUIET
70 1 equemene
71 1 equemene
  int i;
72 1 equemene
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
73 1 equemene
  for (i=0;i<dimVector;i++)
74 1 equemene
    {
75 1 equemene
      printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
76 1 equemene
    }
77 1 equemene
#endif
78 1 equemene
79 1 equemene
  return 0;
80 1 equemene
}
81 1 equemene
82 1 equemene
int printResults(const int dimVector,const LENGTH *dataVector,
83 1 equemene
                 char *nameVector,char *mesgVector)
84 1 equemene
{
85 1 equemene
#ifdef RESULTS
86 1 equemene
  int i;
87 1 equemene
88 1 equemene
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
89 1 equemene
  for (i=0;i<dimVector;i++)
90 1 equemene
    {
91 1 equemene
      printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
92 1 equemene
    }
93 1 equemene
#endif
94 1 equemene
  return 0;
95 1 equemene
}
96 1 equemene
97 1 equemene
#ifdef CUBLAS
98 1 equemene
int printVectorGPU(const int dimVector,const LENGTH *dataVector,
99 1 equemene
                   char *nameVector,char *mesgVector)
100 1 equemene
{
101 1 equemene
#ifndef QUIET
102 1 equemene
  int i;
103 1 equemene
  cublasStatus stat;
104 1 equemene
  LENGTH *P=0;
105 1 equemene
  int incx=1;
106 1 equemene
107 1 equemene
  P=malloc(dimVector*sizeof(LENGTH));
108 1 equemene
109 1 equemene
  stat=cublasGetVector(dimVector,sizeof(P[0]),dataVector,incx,P,incx);
110 1 equemene
111 1 equemene
  if (stat != CUBLAS_STATUS_SUCCESS) {
112 1 equemene
    wrapperError ("ToGet", CUBLAS_WRAPPER_ERROR_GET);
113 1 equemene
  }
114 1 equemene
115 1 equemene
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
116 1 equemene
  for (i=0;i<dimVector;i++)
117 1 equemene
    {
118 1 equemene
      printf("%s[%i]=%2.10e\n",nameVector,i,P[i]);
119 1 equemene
    }
120 1 equemene
121 1 equemene
  free(P);
122 1 equemene
#endif
123 1 equemene
124 1 equemene
  return 0;
125 1 equemene
}
126 1 equemene
#endif
127 1 equemene
128 1 equemene
int bench(int dim,int RUNS)
129 1 equemene
{
130 1 equemene
  /*
131 1 equemene
  int dim=1000;
132 1 equemene
  int RUNS=100;
133 1 equemene
  */
134 1 equemene
  int incx=1;
135 1 equemene
#ifdef PRINT
136 1 equemene
  LENGTH factor=1.;
137 1 equemene
#endif
138 1 equemene
139 1 equemene
  LENGTH alpha=1.,beta=0.,beta2=-1.;
140 1 equemene
  LENGTH *A,*X,*Y;
141 1 equemene
142 1 equemene
  /* checkBefore checkAfter checks */
143 1 equemene
  LENGTH *checksA,*checksB;
144 1 equemene
145 1 equemene
  int i=0, j=0;
146 1 equemene
147 1 equemene
  double duration;
148 1 equemene
149 1 equemene
  struct timeval tv1,tv2;
150 1 equemene
  struct timezone tz;
151 1 equemene
152 1 equemene
  /* Create 1 Matrix and 2 Vectors of dimension dim  */
153 1 equemene
154 1 equemene
  A=malloc(dim*dim*sizeof(LENGTH));
155 1 equemene
  X=malloc(dim*sizeof(LENGTH));
156 1 equemene
  Y=malloc(dim*sizeof(LENGTH));
157 1 equemene
158 1 equemene
  /* Create 2 vectors for checker Before and After */
159 1 equemene
160 1 equemene
  checksA=malloc(RUNS*sizeof(double));
161 1 equemene
  checksB=malloc(RUNS*sizeof(double));
162 1 equemene
163 1 equemene
  /* Initialize elements with random numbers */
164 1 equemene
  /* Initialize the seed for rand() */
165 1 equemene
  /* srand(time()); */
166 1 equemene
167 1 equemene
#ifdef UNIT
168 1 equemene
  /* Fill the matrix and vector with random numbers */
169 1 equemene
  for (i=0; i<dim; i++) {
170 1 equemene
    for (j=0; j<dim; j++)
171 1 equemene
      if (j>=i)
172 1 equemene
        {
173 1 equemene
          /* Normalization is necessary to avoid problems */
174 1 equemene
          A[i*dim+j]=1.;
175 1 equemene
        }
176 1 equemene
      else
177 1 equemene
        {
178 1 equemene
           A[i*dim+j]=0.;
179 1 equemene
        }
180 1 equemene
    X[i]=1;
181 1 equemene
  }
182 1 equemene
#else
183 1 equemene
  for (i=0; i<dim; i++) {
184 1 equemene
    for (j=0; j<dim; j++)
185 1 equemene
      if (j>i)
186 1 equemene
        {
187 1 equemene
          /* Normalization is necessary to avoid problems */
188 180 equemene
          /* A[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.) */
189 180 equemene
          /*   *(LENGTH)(i+1.)/(LENGTH)(j+1.); */
190 1 equemene
          A[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.)
191 180 equemene
            /(LENGTH)(dim-j);
192 1 equemene
        }
193 1 equemene
      else if (j==i)
194 1 equemene
        {
195 1 equemene
           A[i*dim+j]=1.;
196 1 equemene
        }
197 1 equemene
      else
198 1 equemene
        {
199 1 equemene
           A[i*dim+j]=0.;
200 1 equemene
        }
201 1 equemene
    X[i]=(LENGTH)rand()/(RAND_MAX+1.);
202 1 equemene
  }
203 1 equemene
#endif
204 1 equemene
205 1 equemene
  /* Print the matrix */
206 1 equemene
207 1 equemene
#ifdef QUIET
208 1 equemene
#else
209 1 equemene
  for (i=0; i<dim; i++) {
210 1 equemene
    for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]);
211 1 equemene
    printf("\tX[%i]=%1.5f ", i,X[i]);
212 1 equemene
    putchar('\n');
213 1 equemene
  }
214 1 equemene
  putchar('\n');
215 1 equemene
#endif
216 1 equemene
217 1 equemene
  /* Get first timer before launching */
218 1 equemene
  gettimeofday(&tv1, &tz);
219 1 equemene
220 1 equemene
  /* Compute with CuBLAS library  */
221 1 equemene
#ifdef CUBLAS
222 1 equemene
  LENGTH *devPtrA=0, *devPtrX=0, *devPtrY=0;
223 1 equemene
  cublasStatus stat1, stat2, stat3;
224 1 equemene
  struct timeval tv3,tv4;
225 1 equemene
226 1 equemene
  /* Order is Row */
227 1 equemene
  /* Have to swap uplo and trans */
228 1 equemene
  char uplo='L',trans='T',diag='N';
229 1 equemene
230 1 equemene
  printf("Using CuBLAS: %i iterations for %ix%i matrix\n",
231 1 equemene
         RUNS,dim,dim);
232 1 equemene
233 1 equemene
  stat1=cublasAlloc(dim*dim,sizeof(devPtrA[0]),(void**)&devPtrA);
234 1 equemene
  stat2=cublasAlloc(dim,sizeof(devPtrX[0]),(void**)&devPtrX);
235 1 equemene
  stat3=cublasAlloc(dim,sizeof(devPtrY[0]),(void**)&devPtrY);
236 1 equemene
237 1 equemene
  if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
238 1 equemene
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
239 1 equemene
      (stat3 != CUBLAS_STATUS_SUCCESS)) {
240 1 equemene
    wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_ALLOC);
241 1 equemene
    cublasFree (devPtrA);
242 1 equemene
    cublasFree (devPtrX);
243 1 equemene
    cublasFree (devPtrY);
244 1 equemene
    return 1;
245 1 equemene
  }
246 1 equemene
247 1 equemene
  stat1=cublasSetMatrix(dim,dim,sizeof(A[0]),A,dim,devPtrA,dim);
248 1 equemene
  stat2=cublasSetVector(dim,sizeof(X[0]),X,incx,devPtrX,incx);
249 1 equemene
  stat3=cublasSetVector(dim,sizeof(Y[0]),Y,incx,devPtrY,incx);
250 1 equemene
251 1 equemene
  if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
252 1 equemene
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
253 1 equemene
      (stat3 != CUBLAS_STATUS_SUCCESS)) {
254 1 equemene
    wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_SET);
255 1 equemene
    cublasFree (devPtrA);
256 1 equemene
    cublasFree (devPtrX);
257 1 equemene
    cublasFree (devPtrY);
258 1 equemene
    return 1;
259 1 equemene
  }
260 1 equemene
261 1 equemene
  /* Get third timer after memory operation */
262 1 equemene
  gettimeofday(&tv3, &tz);
263 1 equemene
264 1 equemene
  for (i=0;i<RUNS;i++)
265 1 equemene
    {
266 251 equemene
#ifdef FP64
267 1 equemene
268 1 equemene
      printVectorGPU(dim,devPtrX,"X","Roots");
269 1 equemene
270 1 equemene
      /* Multiply Y <- A.X */
271 1 equemene
      cublasDgemv(trans,dim,dim,alpha,devPtrA,dim,
272 1 equemene
                  devPtrX,incx,beta,devPtrY,incx);
273 1 equemene
274 1 equemene
      printVectorGPU(dim,devPtrY,"Y","Results");
275 1 equemene
276 1 equemene
      /* Solve linear system A.X=Y : Y <- A-1.Y */
277 1 equemene
      cublasDtrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx);
278 1 equemene
279 1 equemene
      printVectorGPU(dim,devPtrY,"Y","Solutions");
280 1 equemene
281 1 equemene
      /* Estimate the difference between X and Y : Y <- -Y+X */
282 1 equemene
      cublasDaxpy(dim,beta2,devPtrY,incx,devPtrX,incx);
283 1 equemene
284 1 equemene
      printVectorGPU(dim,devPtrX,"X","Errors");
285 1 equemene
286 1 equemene
      /* Estimate the second checker */
287 180 equemene
      checksA[i]=(double)cublasDnrm2(dim,devPtrX,incx);
288 1 equemene
289 1 equemene
      /* Swap vector X and Y */
290 1 equemene
      cublasDswap(dim,devPtrX,incx,devPtrY,incx);
291 1 equemene
292 1 equemene
#else
293 1 equemene
294 1 equemene
      printVectorGPU(dim,devPtrX,"X","Roots");
295 1 equemene
296 1 equemene
      /* Multiply Y <- A.X */
297 1 equemene
      cublasSgemv(trans,dim,dim,alpha,devPtrA,dim,
298 1 equemene
                  devPtrX,incx,beta,devPtrY,incx);
299 1 equemene
300 1 equemene
      printVectorGPU(dim,devPtrY,"Y","Results");
301 1 equemene
302 1 equemene
      /* Solve linear system Y <- A-1.Y */
303 1 equemene
      cublasStrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx);
304 1 equemene
305 1 equemene
      printVectorGPU(dim,devPtrY,"Y","Solutions");
306 1 equemene
307 1 equemene
      /* Add vectors X and -Y */
308 1 equemene
      cublasSaxpy(dim,beta2,devPtrY,incx,devPtrX,incx);
309 1 equemene
310 1 equemene
      printVectorGPU(dim,devPtrX,"X","Errors");
311 1 equemene
312 1 equemene
      /* Estimate the second checker */
313 180 equemene
      checksA[i]=(double)cublasSnrm2(dim,devPtrX,incx);
314 1 equemene
315 1 equemene
      /* Swap vector X and Y */
316 1 equemene
      cublasSswap(dim,devPtrX,incx,devPtrY,incx);
317 1 equemene
318 1 equemene
#endif
319 1 equemene
320 1 equemene
    }
321 1 equemene
322 1 equemene
  stat1=cublasGetMatrix(dim,dim,sizeof(A[0]),devPtrA,dim,A,dim);
323 1 equemene
  stat2=cublasGetVector(dim,sizeof(X[0]),devPtrX,incx,X,incx);
324 1 equemene
  stat3=cublasGetVector(dim,sizeof(Y[0]),devPtrY,incx,Y,incx);
325 1 equemene
326 1 equemene
  cublasFree (devPtrA);
327 1 equemene
  cublasFree (devPtrX);
328 1 equemene
  cublasFree (devPtrY);
329 1 equemene
330 1 equemene
  if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
331 1 equemene
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
332 1 equemene
      (stat3 != CUBLAS_STATUS_SUCCESS)) {
333 1 equemene
    wrapperError ("LinearSystem", CUBLAS_WRAPPER_ERROR_GET);
334 1 equemene
  }
335 1 equemene
336 1 equemene
  /* Get fourth timer after memory free */
337 1 equemene
  gettimeofday(&tv4, &tz);
338 1 equemene
339 1 equemene
#elif THUNKING
340 1 equemene
341 1 equemene
  /* Order is Row : Have to swap uplo='U' and trans='N' */
342 1 equemene
  char uplo='L',trans='T',diag='N';
343 1 equemene
  printf("Using CuBLAS/Thunking: %i iterations for %ix%i matrix\n",
344 1 equemene
         RUNS,dim,dim);
345 1 equemene
346 1 equemene
  for (i=0;i<RUNS;i++)
347 1 equemene
    {
348 251 equemene
#ifdef FP64
349 1 equemene
350 1 equemene
      printVector(dim,X,"X","Roots");
351 1 equemene
352 1 equemene
      /* Multiply A by X as Y <- A.X */
353 1 equemene
      CUBLAS_DGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
354 1 equemene
355 1 equemene
      printVector(dim,Y,"Y","Results");
356 1 equemene
357 1 equemene
      /* Solve linear system */
358 1 equemene
      CUBLAS_DTRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
359 1 equemene
360 1 equemene
      printVector(dim,Y,"Y","Solutions");
361 1 equemene
362 1 equemene
      /* Compare the roots X and Y */
363 1 equemene
      CUBLAS_DAXPY(&dim,&beta2,Y,&incx,X,&incx);
364 1 equemene
365 1 equemene
      printVector(dim,X,"X","Errors");
366 1 equemene
367 1 equemene
      /* Store the checker of errors */
368 180 equemene
      checksA[i]=(double)CUBLAS_DNRM2(&dim,X,&incx);
369 1 equemene
370 1 equemene
      /* Swap vector X and Y */
371 1 equemene
      CUBLAS_DSWAP(&dim,X,&incx,Y,&incx);
372 1 equemene
#else
373 1 equemene
374 1 equemene
      printVector(dim,X,"X","Roots");
375 1 equemene
376 1 equemene
      /* Multiply A by X as Y <- A.X */
377 1 equemene
      CUBLAS_SGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
378 1 equemene
379 1 equemene
      printVector(dim,Y,"Y","Results");
380 1 equemene
381 1 equemene
      /* Solve linear system */
382 1 equemene
      CUBLAS_STRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
383 1 equemene
384 1 equemene
      printVector(dim,Y,"Y","Solutions");
385 1 equemene
386 1 equemene
      /* Compare the roots X and Y */
387 1 equemene
      CUBLAS_SAXPY(&dim,&beta2,Y,&incx,X,&incx);
388 1 equemene
389 1 equemene
      printVector(dim,X,"X","Errors");
390 1 equemene
391 1 equemene
      /* Store the checker of errors */
392 180 equemene
      checksA[i]=(double)CUBLAS_SNRM2(&dim,X,&incx);
393 1 equemene
394 1 equemene
      /* Swap vector X and Y */
395 1 equemene
      CUBLAS_SSWAP(&dim,X,&incx,Y,&incx);
396 1 equemene
#endif
397 1 equemene
398 1 equemene
#ifdef PRINT
399 1 equemene
      printf("Iteration %i, checker is %2.5f and error is %2.10f\n",
400 1 equemene
             i,checksA[i],fabs(checksB[i]-checksA[i])/factor);
401 1 equemene
#endif
402 1 equemene
    }
403 1 equemene
404 1 equemene
#elif FBLAS
405 1 equemene
406 1 equemene
  /* Order is Row : Have to swap uplo='U' and trans='N' */
407 1 equemene
  char uplo='L',trans='T',diag='N';
408 1 equemene
409 1 equemene
  printf("Using FBLAS: %i iterations for %ix%i matrix\n",
410 1 equemene
         RUNS,dim,dim);
411 1 equemene
412 1 equemene
  for (i=0;i<RUNS;i++)
413 1 equemene
    {
414 251 equemene
#ifdef FP64
415 1 equemene
416 180 equemene
      printVector(dim,X,"X","Initial roots");
417 1 equemene
418 251 equemene
      /* /\* Multiply A by X as Y <- A.X *\/ */
419 251 equemene
      /* dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); */
420 251 equemene
421 251 equemene
      /* printVector(dim,Y,"Y<-A.X","Estimated results"); */
422 251 equemene
423 251 equemene
      /* /\* Solve linear system *\/ */
424 251 equemene
      /* dtrsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); */
425 251 equemene
426 251 equemene
      /* printVector(dim,Y,"X","Solutions from A.X=Y"); */
427 251 equemene
428 251 equemene
      /* /\* Compare the roots X and Y *\/ */
429 251 equemene
      /* daxpy_(&dim,&beta2,Y,&incx,X,&incx); */
430 251 equemene
431 251 equemene
      /* printVector(dim,X,"X","Differences initial and estimated"); */
432 251 equemene
433 251 equemene
      /* /\* Store the checker of errors *\/ */
434 251 equemene
      /* dnrm2_(&dim,X,&incx,&checksA[i]); */
435 251 equemene
436 251 equemene
      /* /\* Swap vector X and Y *\/ */
437 251 equemene
      /* dswap_(&dim,X,&incx,Y,&incx); */
438 251 equemene
439 1 equemene
      /* Multiply A by X as Y <- A.X */
440 1 equemene
      dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
441 1 equemene
442 180 equemene
      printVector(dim,Y,"Y<-A.X","Estimated results");
443 1 equemene
444 1 equemene
      /* Solve linear system */
445 1 equemene
      dtrsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
446 1 equemene
447 180 equemene
      printVector(dim,Y,"X","Solutions from A.X=Y");
448 1 equemene
449 1 equemene
      /* Compare the roots X and Y */
450 1 equemene
      daxpy_(&dim,&beta2,Y,&incx,X,&incx);
451 1 equemene
452 180 equemene
      printVector(dim,X,"X","Differences initial and estimated");
453 1 equemene
454 1 equemene
      /* Store the checker of errors */
455 251 equemene
      checksA[i]=(double)dnrm2_(&dim,X,&incx);
456 1 equemene
457 1 equemene
      /* Swap vector X and Y */
458 1 equemene
      dswap_(&dim,X,&incx,Y,&incx);
459 1 equemene
460 1 equemene
#else
461 1 equemene
462 1 equemene
      printVector(dim,X,"X","Roots");
463 1 equemene
464 1 equemene
      /* Multiply A by X as Y <- A.X */
465 1 equemene
      sgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
466 1 equemene
467 1 equemene
      printVector(dim,Y,"Y","Results");
468 1 equemene
469 1 equemene
      /* Solve linear system */
470 1 equemene
      strsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
471 1 equemene
472 1 equemene
      printVector(dim,Y,"Y","Solutions");
473 1 equemene
474 1 equemene
      /* Compare the roots X and Y */
475 1 equemene
      saxpy_(&dim,&beta2,Y,&incx,X,&incx);
476 1 equemene
477 1 equemene
      printVector(dim,X,"X","Errors");
478 1 equemene
479 1 equemene
      /* Store the checker of errors */
480 251 equemene
      checksA[i]=(LENGTH)snrm2_(&dim,X,&incx);
481 1 equemene
482 1 equemene
      /* Swap vector X and Y */
483 1 equemene
      sswap_(&dim,X,&incx,Y,&incx);
484 1 equemene
#endif
485 1 equemene
486 1 equemene
    }
487 1 equemene
488 1 equemene
#elif ACML
489 1 equemene
490 1 equemene
  /* Order is Row : Have to swap uplo='U' and trans='N' */
491 1 equemene
  char uplo='L',trans='T',diag='N';
492 1 equemene
493 1 equemene
  printf("Using ACML: %i iterations for %ix%i matrix\n",
494 1 equemene
         RUNS,dim,dim);
495 1 equemene
496 1 equemene
  for (i=0;i<RUNS;i++)
497 1 equemene
    {
498 251 equemene
#ifdef FP64
499 1 equemene
500 1 equemene
      printVector(dim,X,"X","Roots");
501 1 equemene
502 1 equemene
      /* Multiply A by X as Y <- A.X */
503 1 equemene
      dgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
504 1 equemene
505 1 equemene
      printVector(dim,Y,"Y","Results");
506 1 equemene
507 1 equemene
      /* Solve linear system */
508 1 equemene
      dtrsv(uplo,trans,diag,dim,A,dim,Y,incx);
509 1 equemene
510 1 equemene
      printVector(dim,Y,"Y","Solutions");
511 1 equemene
512 1 equemene
      /* Compare the roots X and Y */
513 1 equemene
      daxpy(dim,beta2,Y,incx,X,incx);
514 1 equemene
515 1 equemene
      printVector(dim,X,"X","Errors");
516 1 equemene
517 1 equemene
      /* Store the checker of errors */
518 180 equemene
      dnrm2_(&dim,X,&incx,&checksA[i]);
519 1 equemene
520 1 equemene
      /* Swap vector X and Y */
521 1 equemene
      dswap(dim,X,incx,Y,incx);
522 1 equemene
523 1 equemene
#else
524 1 equemene
525 1 equemene
      printVector(dim,X,"X","Roots");
526 1 equemene
527 1 equemene
      /* Multiply A by X as Y <- A.X */
528 1 equemene
      sgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
529 1 equemene
530 1 equemene
      printVector(dim,Y,"Y","Results");
531 1 equemene
532 1 equemene
      /* Solve linear system */
533 1 equemene
      strsv(uplo,trans,diag,dim,A,dim,Y,incx);
534 1 equemene
535 1 equemene
      printVector(dim,Y,"Y","Solutions");
536 1 equemene
537 1 equemene
      /* Compare the roots X and Y */
538 1 equemene
      saxpy(dim,beta2,Y,incx,X,incx);
539 1 equemene
540 1 equemene
      printVector(dim,X,"X","Errors");
541 1 equemene
542 1 equemene
      /* Store the checker of errors */
543 180 equemene
      snrm2_(&dim,X,&incx,&checksA[i]);
544 1 equemene
545 1 equemene
      /* Swap vector X and Y */
546 1 equemene
      sswap(dim,X,incx,Y,incx);
547 1 equemene
#endif
548 1 equemene
549 1 equemene
    }
550 1 equemene
551 1 equemene
#elif GSL
552 1 equemene
553 1 equemene
  printf("Using GSL: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
554 1 equemene
555 1 equemene
  /*
556 1 equemene
     RowMajor : Matrix is read row by row
557 1 equemene
     Upper : the no null elements are on top
558 1 equemene
     NoTrans : no transposition before estimation
559 1 equemene
     NonUnit : Matrix is not unit
560 1 equemene
   */
561 1 equemene
562 1 equemene
  for (i=0;i<RUNS;i++)
563 1 equemene
    {
564 1 equemene
565 251 equemene
#ifdef FP64
566 1 equemene
567 1 equemene
      printVector(dim,X,"X","Roots");
568 1 equemene
569 1 equemene
      /* Multiply A by X as Y <- A.X */
570 1 equemene
      cblas_dgemv(CblasRowMajor,CblasNoTrans,
571 1 equemene
                  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
572 1 equemene
573 1 equemene
      printVector(dim,Y,"Y","Results");
574 1 equemene
575 1 equemene
      /* Solve linear system : Y <- A-1.Y */
576 1 equemene
      cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
577 1 equemene
                  dim,A,dim,Y,incx);
578 1 equemene
579 1 equemene
      printVector(dim,Y,"Y","Solutions");
580 1 equemene
581 1 equemene
      cblas_daxpy(dim,beta2,Y,incx,X,incx);
582 1 equemene
583 1 equemene
      printVector(dim,X,"X","Errors");
584 1 equemene
585 1 equemene
      /* Store the checker of errors */
586 180 equemene
      checksA[i]=(double)cblas_dnrm2(dim,X,incx);
587 1 equemene
588 1 equemene
      cblas_dswap(dim,X,incx,Y,incx);
589 1 equemene
590 1 equemene
#else
591 1 equemene
592 1 equemene
      printVector(dim,X,"X","Roots");
593 1 equemene
594 1 equemene
      /* Multiply A by X as Y <- A.X */
595 1 equemene
      cblas_sgemv(CblasRowMajor,CblasNoTrans,
596 1 equemene
                  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
597 1 equemene
598 1 equemene
      printVector(dim,Y,"Y","Results");
599 1 equemene
600 1 equemene
      /* Solve linear system : Y <- A-1.Y */
601 1 equemene
      cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
602 1 equemene
                  dim,A,dim,Y,incx);
603 1 equemene
604 1 equemene
      printVector(dim,Y,"Y","Solutions");
605 1 equemene
606 1 equemene
      cblas_saxpy(dim,beta2,Y,incx,X,incx);
607 1 equemene
608 1 equemene
      printVector(dim,X,"X","Errors");
609 1 equemene
610 1 equemene
      /* Store the checker of errors */
611 180 equemene
      checksA[i]=(double)cblas_snrm2(dim,X,incx);
612 1 equemene
613 1 equemene
      cblas_sswap(dim,X,incx,Y,incx);
614 1 equemene
615 1 equemene
#endif
616 1 equemene
617 1 equemene
    }
618 1 equemene
#else
619 1 equemene
620 1 equemene
  printf("Using CBLAS: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
621 1 equemene
622 1 equemene
  /*
623 1 equemene
     RowMajor : Matrix is read row bu row
624 1 equemene
     Upper : the no null elements are on top
625 1 equemene
     NoTrans : no transposition before estimation
626 1 equemene
     NonUnit : Matrix is not unit
627 1 equemene
   */
628 1 equemene
629 1 equemene
  for (i=0;i<RUNS;i++)
630 1 equemene
    {
631 1 equemene
632 251 equemene
#ifdef FP64
633 1 equemene
634 1 equemene
      printVector(dim,X,"X","Roots");
635 1 equemene
636 1 equemene
      /* Multiply A by X as Y <- A.X */
637 1 equemene
      cblas_dgemv(CblasRowMajor,CblasNoTrans,
638 1 equemene
                  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
639 1 equemene
640 1 equemene
      printVector(dim,Y,"Y","Results");
641 1 equemene
642 1 equemene
      /* Solve linear system : Y <- A-1.Y */
643 1 equemene
      cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
644 1 equemene
                  dim,A,dim,Y,incx);
645 1 equemene
646 1 equemene
      printVector(dim,Y,"Y","Solutions");
647 1 equemene
648 1 equemene
      cblas_daxpy(dim,beta2,Y,incx,X,incx);
649 1 equemene
650 1 equemene
      printVector(dim,X,"X","Errors");
651 1 equemene
652 1 equemene
      /* Store the checker of errors */
653 180 equemene
      checksA[i]=(double)cblas_dnrm2(dim,X,incx);
654 1 equemene
655 1 equemene
      cblas_dswap(dim,X,incx,Y,incx);
656 1 equemene
657 1 equemene
#else
658 1 equemene
659 1 equemene
      printVector(dim,X,"X","Roots");
660 1 equemene
661 1 equemene
      /* Multiply A by X as Y <- A.X */
662 1 equemene
      cblas_sgemv(CblasRowMajor,CblasNoTrans,
663 1 equemene
                  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
664 1 equemene
665 1 equemene
      printVector(dim,Y,"Y","Results");
666 1 equemene
667 1 equemene
      /* Solve linear system : Y <- A-1.Y */
668 1 equemene
      cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
669 1 equemene
                  dim,A,dim,Y,incx);
670 1 equemene
671 1 equemene
      printVector(dim,Y,"Y","Solutions");
672 1 equemene
673 1 equemene
      cblas_saxpy(dim,beta2,Y,incx,X,incx);
674 1 equemene
675 1 equemene
      printVector(dim,X,"X","Errors");
676 1 equemene
677 1 equemene
      /* Store the checker of errors */
678 180 equemene
      checksA[i]=(double)cblas_snrm2(dim,X,incx);
679 180 equemene
680 1 equemene
      cblas_sswap(dim,X,incx,Y,incx);
681 1 equemene
682 1 equemene
#endif
683 1 equemene
684 1 equemene
    }
685 1 equemene
#endif
686 1 equemene
  putchar('\n');
687 1 equemene
688 1 equemene
  /* Get second timer after launching */
689 1 equemene
  gettimeofday(&tv2, &tz);
690 1 equemene
691 1 equemene
#ifdef CUBLAS
692 1 equemene
  double memoryIn,memoryOut;
693 1 equemene
694 1 equemene
  memoryIn=(double)((tv3.tv_sec-tv1.tv_sec) * 1000000L +        \
695 1 equemene
                    (tv3.tv_usec-tv1.tv_usec))/1000000.;
696 1 equemene
697 1 equemene
  memoryOut=(double)((tv2.tv_sec-tv4.tv_sec) * 1000000L +        \
698 1 equemene
                    (tv2.tv_usec-tv4.tv_usec))/1000000.;
699 1 equemene
700 1 equemene
  duration=(double)((tv4.tv_sec-tv3.tv_sec) * 1000000L +        \
701 1 equemene
                    (tv4.tv_usec-tv3.tv_usec))/1000000./RUNS;
702 1 equemene
703 1 equemene
  printf("Duration of memory allocation : %2.10f s\n",memoryIn);
704 1 equemene
  printf("Duration of memory free : %2.10f s\n",memoryOut);
705 1 equemene
#else
706 1 equemene
  duration=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +        \
707 1 equemene
                    (tv2.tv_usec-tv1.tv_usec))/1000000./RUNS;
708 1 equemene
709 1 equemene
#endif
710 1 equemene
711 1 equemene
  printf("Duration of each cycle : %2.10f s\n",duration);
712 1 equemene
713 1 equemene
  printResults(RUNS,checksA,"C","Errors cumulated");
714 1 equemene
715 1 equemene
  putchar('\n');
716 1 equemene
717 1 equemene
  /*
718 1 equemene
#ifdef PRINT
719 1 equemene
  for (i=0; i<dim; i++) {
720 1 equemene
    for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]);
721 1 equemene
    putchar('\n');
722 1 equemene
  }
723 1 equemene

724 1 equemene
  for (i=0; i<dim; i++) {
725 1 equemene
    printf("X[%i]=%2.5f",i,X[i]);
726 1 equemene
    putchar('\n');
727 1 equemene
  }
728 1 equemene
  putchar('\n');
729 1 equemene
  for (i=0; i<dim; i++) {
730 1 equemene
    printf("Y[%i]=%2.5f",i,Y[i]);
731 1 equemene
    putchar('\n');
732 1 equemene
  }
733 1 equemene
#endif
734 1 equemene
  */
735 1 equemene
736 1 equemene
  return 0;
737 1 equemene
}
738 1 equemene
739 1 equemene
int main(int argc,char **argv)
740 1 equemene
{
741 1 equemene
  if ((argc==1)||
742 1 equemene
      (strcmp(argv[1],"-h")==0)||
743 1 equemene
      (strcmp(argv[1],"--help")==0))
744 1 equemene
    {
745 1 equemene
      printf("\nPerforms a bench using BLAS library implementation:\n\n"
746 1 equemene
             "\t#1 Size on triangular system\n"
747 1 equemene
             "\t#2 Number of iterations\n\n");
748 1 equemene
    }
749 1 equemene
  else if ((atoi(argv[1])>=2)&&
750 1 equemene
           (atoi(argv[2])>=1))
751 1 equemene
    {
752 1 equemene
      bench(atoi(argv[1]),atoi(argv[2]));
753 1 equemene
    }
754 1 equemene
755 1 equemene
  return 0;
756 1 equemene
}