Statistiques
| Révision :

root / BLAS / xTRSV / xTRSV.c @ 1

Historique | Voir | Annoter | Télécharger (19,31 ko)

1
/* 
2
   Performs a linear system solving of random generated system
3
   Estimates a test
4

5
   Matrix is triangular
6
   
7
   Thanks for help from aurel32@debian.org
8
*/
9

    
10
#include <stdio.h>
11
#include <math.h>
12
#include <stdlib.h>
13
#include <sys/time.h>
14
#include <string.h>
15

    
16
#ifdef CUBLAS
17
#include <cublas.h>
18
#define CUBLAS_WRAPPER_ERROR_NOERR      0
19
#define CUBLAS_WRAPPER_ERROR_ALLOC      1
20
#define CUBLAS_WRAPPER_ERROR_SET        2
21
#define CUBLAS_WRAPPER_ERROR_GET        3
22
#define CUBLAS_WRAPPER_ERROR_STUB       4
23
#elif THUNKING
24
#include <cublas.h>
25
#elif FBLAS
26
#include <cblas_f77.h>
27
#elif GSL
28
#include <gsl_cblas.h>
29
#elif ACML
30
#include <acml.h>
31
#include <acml_mv.h>
32
#else
33
#include <cblas.h>
34
#endif
35

    
36
#ifdef DOUBLE
37
#define LENGTH double
38
#else
39
#define LENGTH float
40
#endif
41

    
42
#ifdef THUNKING
43
/* WARNING !
44
Prototypes from fortran.c functions used MUST be defined here !
45
*/
46
#include "fortran_thunking.h"
47

    
48
/*
49
#ifdef DOUBLE
50

51
void CUBLAS_DCOPY (const int *n, const double *x, const int *incx, double *y,
52
                   const int *incy);
53

54
double CUBLAS_DNRM2 (const int *dim, const double *X, const int *incx);
55

56
void CUBLAS_DTRSV (const char *uplo, const char *trans, const char *diag,
57
                   const int *n, const double *A, const int *lda, double *x,
58
                   const int *incx);
59

60
void CUBLAS_DGEMV (const char *trans, const int *m, const int *n,
61
                   const double *alpha, const double *A, const int *lda,
62
                   const double *x, const int *incx, const double *beta,
63
                   double *y, const int *incy);
64

65
void CUBLAS_DSWAP (const int *n, double *x, const int *incx, double *y,
66
                   const int *incy);
67

68
void CUBLAS_DAXPY (const int *n, const double *alpha, const double *x, 
69
                   const int *incx, double *y, const int *incy);
70

71
#else
72
void CUBLAS_SCOPY (const int *n, const float *x, const int *incx, float *y,
73
                   const int *incy);
74

75
float CUBLAS_SNRM2 (const int *dim, const float *X, const int *incx);
76

77
void CUBLAS_STRSV (const char *uplo, const char *trans, const char *diag,
78
                   const int *n, const float *A, const int *lda, float *x,
79
                   const int *incx);
80

81
void CUBLAS_SGEMV (const char *trans, const int *m, const int *n,
82
                   const float *alpha, const float *A, const int *lda,
83
                   const float *x, const int *incx, const float *beta,
84
                   float *y, const int *incy);
85

86
void CUBLAS_SSWAP (const int *n, float *x, const int *incx, float *y,
87
                   const int *incy);
88

89
void CUBLAS_SAXPY (const int *n, const float *alpha, const float *x, 
90
                   const int *incx, float *y, const int *incy);
91

92
#endif
93
*/
94

    
95
#elif FBLAS
96

    
97
#ifdef DOUBLE
98

    
99
void dtrsv_( FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT);
100

    
101
void dgemv_(FCHAR, FINT, FINT, const double *, const double *, FINT, 
102
               const double *, FINT, const double *, double *, FINT);
103

    
104
void dswap_( FINT, double *, FINT, double *, FINT);
105

    
106
void daxpy_( FINT, const double *, const double *, FINT, double *, FINT);
107

    
108
void dnrm2_( FINT, const double *, FINT, double *);
109

    
110
#else
111

    
112
void strsv_( FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT);
113

    
114
void sgemv_(FCHAR, FINT, FINT, const float *, const float *, FINT, 
115
               const float *, FINT, const float *, float *, FINT);
116

    
117
void sswap_( FINT, float *, FINT, float *, FINT);
118

    
119
void saxpy_( FINT, const float *, const float *, FINT, float *, FINT);
120

    
121
void snrm2_( FINT, const float *, FINT, float *);
122

    
123
#endif
124

    
125
#endif
126

    
127
/* Matrix with only defined triangular terms */
128
/* Even if there are 0 in matrix, must be defined at all ! */
129

    
130
/* Get from fortran.c */
131

    
132
#ifdef CUBLAS
133
static char *errMsg[5] = 
134
{
135
    "no error",
136
    "allocation error",
137
    "setVector/setMatrix error",
138
    "getVector/getMatrix error",
139
    "not implemented"
140
};
141

    
142
static void wrapperError (const char *funcName, int error)
143
{
144
    printf ("cublas%s wrapper: %s\n", funcName, errMsg[error]);
145
    fflush (stdout);
146
}
147
#endif
148

    
149
int printVector(const int dimVector,const LENGTH *dataVector,
150
                char *nameVector,char *mesgVector)
151
{
152
#ifndef QUIET
153

    
154
  int i;
155
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
156
  for (i=0;i<dimVector;i++)
157
    {
158
      printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
159
    }
160
#endif
161

    
162
  return 0;
163
}
164
  
165
int printResults(const int dimVector,const LENGTH *dataVector,
166
                 char *nameVector,char *mesgVector)
167
{
168
#ifdef RESULTS
169
  int i;
170

    
171
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
172
  for (i=0;i<dimVector;i++)
173
    {
174
      printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
175
    }
176
#endif
177
  return 0;
178
}
179
  
180
#ifdef CUBLAS
181
int printVectorGPU(const int dimVector,const LENGTH *dataVector,
182
                   char *nameVector,char *mesgVector)
183
{
184
#ifndef QUIET
185
  int i;
186
  cublasStatus stat;
187
  LENGTH *P=0;
188
  int incx=1;
189

    
190
  P=malloc(dimVector*sizeof(LENGTH));
191
  
192
  stat=cublasGetVector(dimVector,sizeof(P[0]),dataVector,incx,P,incx);
193

    
194
  if (stat != CUBLAS_STATUS_SUCCESS) {
195
    wrapperError ("ToGet", CUBLAS_WRAPPER_ERROR_GET);
196
  }  
197

    
198
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
199
  for (i=0;i<dimVector;i++)
200
    {
201
      printf("%s[%i]=%2.10e\n",nameVector,i,P[i]);
202
    }
203

    
204
  free(P);  
205
#endif
206

    
207
  return 0;
208
}
209
#endif
210

    
211
int bench(int dim,int RUNS)
212
{
213
  /*
214
  int dim=1000;
215
  int RUNS=100;
216
  */
217
  int incx=1;
218
#ifdef PRINT
219
  LENGTH factor=1.;
220
#endif
221

    
222
  LENGTH alpha=1.,beta=0.,beta2=-1.;
223
  LENGTH *A,*X,*Y;
224

    
225
  /* checkBefore checkAfter checks */
226
  LENGTH *checksA,*checksB;
227

    
228
  int i=0, j=0;
229

    
230
  double duration;
231

    
232
  struct timeval tv1,tv2;
233
  struct timezone tz;
234

    
235
  /* Create 1 Matrix and 2 Vectors of dimension dim  */
236

    
237
  A=malloc(dim*dim*sizeof(LENGTH));
238
  X=malloc(dim*sizeof(LENGTH));
239
  Y=malloc(dim*sizeof(LENGTH));
240

    
241
  /* Create 2 vectors for checker Before and After */
242

    
243
  checksA=malloc(RUNS*sizeof(double));
244
  checksB=malloc(RUNS*sizeof(double));
245

    
246
  /* Initialize elements with random numbers */
247
  /* Initialize the seed for rand() */
248
  /* srand(time()); */
249

    
250
#ifdef UNIT
251
  /* Fill the matrix and vector with random numbers */
252
  for (i=0; i<dim; i++) {
253
    for (j=0; j<dim; j++) 
254
      if (j>=i)
255
        {
256
          /* Normalization is necessary to avoid problems */
257
          A[i*dim+j]=1.;
258
        }
259
      else
260
        {
261
           A[i*dim+j]=0.;
262
        }
263
    X[i]=1;
264
  }
265
#else
266
  for (i=0; i<dim; i++) {
267
    for (j=0; j<dim; j++) 
268
      if (j>i)
269
        {
270
          /* Normalization is necessary to avoid problems */
271
          A[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.)
272
            *(LENGTH)(i+1.)/(LENGTH)(j+1.);
273
        }
274
      else if (j==i)
275
        {
276
           A[i*dim+j]=1.;
277
        }
278
      else
279
        {
280
           A[i*dim+j]=0.;
281
        }
282
    X[i]=(LENGTH)rand()/(RAND_MAX+1.);
283
  }
284
#endif
285

    
286
  /* Print the matrix */
287

    
288
#ifdef QUIET
289
#else
290
  for (i=0; i<dim; i++) {
291
    for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]);
292
    printf("\tX[%i]=%1.5f ", i,X[i]);
293
    putchar('\n');
294
  }
295
  putchar('\n');
296
#endif
297

    
298
  /* Get first timer before launching */
299
  gettimeofday(&tv1, &tz);
300

    
301
  /* Compute with CuBLAS library  */
302
#ifdef CUBLAS
303
  LENGTH *devPtrA=0, *devPtrX=0, *devPtrY=0;
304
  cublasStatus stat1, stat2, stat3;
305
  struct timeval tv3,tv4;
306

    
307
  /* Order is Row */
308
  /* Have to swap uplo and trans */
309
  char uplo='L',trans='T',diag='N';
310

    
311
  printf("Using CuBLAS: %i iterations for %ix%i matrix\n",
312
         RUNS,dim,dim);
313

    
314
  stat1=cublasAlloc(dim*dim,sizeof(devPtrA[0]),(void**)&devPtrA);
315
  stat2=cublasAlloc(dim,sizeof(devPtrX[0]),(void**)&devPtrX);
316
  stat3=cublasAlloc(dim,sizeof(devPtrY[0]),(void**)&devPtrY);
317

    
318
  if ((stat1 != CUBLAS_STATUS_SUCCESS) || 
319
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
320
      (stat3 != CUBLAS_STATUS_SUCCESS)) {
321
    wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_ALLOC);
322
    cublasFree (devPtrA);
323
    cublasFree (devPtrX);
324
    cublasFree (devPtrY);
325
    return 1;
326
  }
327

    
328
  stat1=cublasSetMatrix(dim,dim,sizeof(A[0]),A,dim,devPtrA,dim);
329
  stat2=cublasSetVector(dim,sizeof(X[0]),X,incx,devPtrX,incx);
330
  stat3=cublasSetVector(dim,sizeof(Y[0]),Y,incx,devPtrY,incx);
331
  
332
  if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
333
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
334
      (stat3 != CUBLAS_STATUS_SUCCESS)) {
335
    wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_SET);
336
    cublasFree (devPtrA);
337
    cublasFree (devPtrX);
338
    cublasFree (devPtrY);
339
    return 1;
340
  }
341

    
342
  /* Get third timer after memory operation */
343
  gettimeofday(&tv3, &tz);
344

    
345
  for (i=0;i<RUNS;i++)
346
    {
347
#ifdef DOUBLE
348

    
349
      printVectorGPU(dim,devPtrX,"X","Roots");
350

    
351
      /* Multiply Y <- A.X */
352
      cublasDgemv(trans,dim,dim,alpha,devPtrA,dim,
353
                  devPtrX,incx,beta,devPtrY,incx);
354

    
355
      printVectorGPU(dim,devPtrY,"Y","Results");
356

    
357
      /* Solve linear system A.X=Y : Y <- A-1.Y */
358
      cublasDtrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx);
359

    
360
      printVectorGPU(dim,devPtrY,"Y","Solutions");
361

    
362
      /* Estimate the difference between X and Y : Y <- -Y+X */
363
      cublasDaxpy(dim,beta2,devPtrY,incx,devPtrX,incx);
364

    
365
      printVectorGPU(dim,devPtrX,"X","Errors");
366

    
367
      /* Estimate the second checker */
368
/*       checksA[i]=(double)cublasDnrm2(dim,devPtrX,incx); */
369

    
370
      /* Swap vector X and Y */
371
      cublasDswap(dim,devPtrX,incx,devPtrY,incx);
372

    
373
#else
374

    
375
      printVectorGPU(dim,devPtrX,"X","Roots");
376

    
377
      /* Multiply Y <- A.X */
378
      cublasSgemv(trans,dim,dim,alpha,devPtrA,dim,
379
                  devPtrX,incx,beta,devPtrY,incx);
380

    
381
      printVectorGPU(dim,devPtrY,"Y","Results");
382

    
383
      /* Solve linear system Y <- A-1.Y */
384
      cublasStrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx);
385

    
386
      printVectorGPU(dim,devPtrY,"Y","Solutions");
387

    
388
      /* Add vectors X and -Y */
389
      cublasSaxpy(dim,beta2,devPtrY,incx,devPtrX,incx);
390

    
391
      printVectorGPU(dim,devPtrX,"X","Errors");
392

    
393
      /* Estimate the second checker */
394
/*       checksA[i]=(double)cublasSnrm2(dim,devPtrX,incx); */
395

    
396
      /* Swap vector X and Y */
397
      cublasSswap(dim,devPtrX,incx,devPtrY,incx);
398

    
399
#endif
400
  
401
    }
402

    
403
  stat1=cublasGetMatrix(dim,dim,sizeof(A[0]),devPtrA,dim,A,dim);
404
  stat2=cublasGetVector(dim,sizeof(X[0]),devPtrX,incx,X,incx);
405
  stat3=cublasGetVector(dim,sizeof(Y[0]),devPtrY,incx,Y,incx);
406
  
407
  cublasFree (devPtrA);
408
  cublasFree (devPtrX);
409
  cublasFree (devPtrY);
410
  
411
  if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
412
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
413
      (stat3 != CUBLAS_STATUS_SUCCESS)) {
414
    wrapperError ("LinearSystem", CUBLAS_WRAPPER_ERROR_GET);
415
  }
416
  
417
  /* Get fourth timer after memory free */
418
  gettimeofday(&tv4, &tz);
419

    
420
#elif THUNKING
421
  
422
  /* Order is Row : Have to swap uplo='U' and trans='N' */
423
  char uplo='L',trans='T',diag='N';
424
  printf("Using CuBLAS/Thunking: %i iterations for %ix%i matrix\n",
425
         RUNS,dim,dim);
426

    
427
  for (i=0;i<RUNS;i++)
428
    {
429
#ifdef DOUBLE
430
      
431
      printVector(dim,X,"X","Roots");
432
      
433
      /* Multiply A by X as Y <- A.X */
434
      CUBLAS_DGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
435
      
436
      printVector(dim,Y,"Y","Results");
437

    
438
      /* Solve linear system */
439
      CUBLAS_DTRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
440
      
441
      printVector(dim,Y,"Y","Solutions");
442

    
443
      /* Compare the roots X and Y */
444
      CUBLAS_DAXPY(&dim,&beta2,Y,&incx,X,&incx);
445

    
446
      printVector(dim,X,"X","Errors");
447

    
448
      /* Store the checker of errors */
449
/*       checksA[i]=(double)CUBLAS_DNRM2(&dim,X,&incx); */
450

    
451
      /* Swap vector X and Y */
452
      CUBLAS_DSWAP(&dim,X,&incx,Y,&incx);
453
#else
454

    
455
      printVector(dim,X,"X","Roots");
456
      
457
      /* Multiply A by X as Y <- A.X */
458
      CUBLAS_SGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
459
      
460
      printVector(dim,Y,"Y","Results");
461

    
462
      /* Solve linear system */
463
      CUBLAS_STRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
464
      
465
      printVector(dim,Y,"Y","Solutions");
466

    
467
      /* Compare the roots X and Y */
468
      CUBLAS_SAXPY(&dim,&beta2,Y,&incx,X,&incx);
469

    
470
      printVector(dim,X,"X","Errors");
471

    
472
      /* Store the checker of errors */
473
/*       checksA[i]=(double)CUBLAS_SNRM2(&dim,X,&incx); */
474

    
475
      /* Swap vector X and Y */
476
      CUBLAS_SSWAP(&dim,X,&incx,Y,&incx);
477
#endif
478

    
479
#ifdef PRINT
480
      printf("Iteration %i, checker is %2.5f and error is %2.10f\n",
481
             i,checksA[i],fabs(checksB[i]-checksA[i])/factor);
482
#endif
483
    }
484

    
485
#elif FBLAS
486
  
487
  /* Order is Row : Have to swap uplo='U' and trans='N' */
488
  char uplo='L',trans='T',diag='N';
489
  
490
  printf("Using FBLAS: %i iterations for %ix%i matrix\n",
491
         RUNS,dim,dim);
492
  
493
  for (i=0;i<RUNS;i++)
494
    {
495
#ifdef DOUBLE
496
      
497
      printVector(dim,X,"X","Roots");
498
      
499
      /* Multiply A by X as Y <- A.X */
500
      dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
501
      
502
      printVector(dim,Y,"Y","Results");
503
      
504
      /* Solve linear system */
505
      dtrsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
506
      
507
      printVector(dim,Y,"Y","Solutions");
508
      
509
      /* Compare the roots X and Y */
510
      daxpy_(&dim,&beta2,Y,&incx,X,&incx);
511
      
512
      printVector(dim,X,"X","Errors");
513
      
514
      /* Store the checker of errors */
515
/*       dnrm2_(&dim,X,&incx,&checksA[i]); */
516
            
517
      /* Swap vector X and Y */
518
      dswap_(&dim,X,&incx,Y,&incx);
519

    
520
#else
521

    
522
      printVector(dim,X,"X","Roots");
523
      
524
      /* Multiply A by X as Y <- A.X */
525
      sgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
526
      
527
      printVector(dim,Y,"Y","Results");
528

    
529
      /* Solve linear system */
530
      strsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
531
      
532
      printVector(dim,Y,"Y","Solutions");
533

    
534
      /* Compare the roots X and Y */
535
      saxpy_(&dim,&beta2,Y,&incx,X,&incx);
536

    
537
      printVector(dim,X,"X","Errors");
538

    
539
      /* Store the checker of errors */
540
/*       snrm2_(&dim,X,&incx,&checksA[i]); */
541

    
542
      /* Swap vector X and Y */
543
      sswap_(&dim,X,&incx,Y,&incx);
544
#endif
545

    
546
    }
547

    
548
#elif ACML
549
  
550
  /* Order is Row : Have to swap uplo='U' and trans='N' */
551
  char uplo='L',trans='T',diag='N';
552
  
553
  printf("Using ACML: %i iterations for %ix%i matrix\n",
554
         RUNS,dim,dim);
555
  
556
  for (i=0;i<RUNS;i++)
557
    {
558
#ifdef DOUBLE
559
      
560
      printVector(dim,X,"X","Roots");
561
      
562
      /* Multiply A by X as Y <- A.X */
563
      dgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
564
      
565
      printVector(dim,Y,"Y","Results");
566
      
567
      /* Solve linear system */
568
      dtrsv(uplo,trans,diag,dim,A,dim,Y,incx);
569
      
570
      printVector(dim,Y,"Y","Solutions");
571
      
572
      /* Compare the roots X and Y */
573
      daxpy(dim,beta2,Y,incx,X,incx);
574
      
575
      printVector(dim,X,"X","Errors");
576
      
577
      /* Store the checker of errors */
578
/*       dnrm2_(&dim,X,&incx,&checksA[i]); */
579
            
580
      /* Swap vector X and Y */
581
      dswap(dim,X,incx,Y,incx);
582

    
583
#else
584

    
585
      printVector(dim,X,"X","Roots");
586
      
587
      /* Multiply A by X as Y <- A.X */
588
      sgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
589
      
590
      printVector(dim,Y,"Y","Results");
591

    
592
      /* Solve linear system */
593
      strsv(uplo,trans,diag,dim,A,dim,Y,incx);
594
      
595
      printVector(dim,Y,"Y","Solutions");
596

    
597
      /* Compare the roots X and Y */
598
      saxpy(dim,beta2,Y,incx,X,incx);
599

    
600
      printVector(dim,X,"X","Errors");
601

    
602
      /* Store the checker of errors */
603
/*       snrm2_(&dim,X,&incx,&checksA[i]); */
604

    
605
      /* Swap vector X and Y */
606
      sswap(dim,X,incx,Y,incx);
607
#endif
608

    
609
    }
610

    
611
#elif GSL
612

    
613
  printf("Using GSL: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
614

    
615
  /* 
616
     RowMajor : Matrix is read row by row
617
     Upper : the no null elements are on top
618
     NoTrans : no transposition before estimation
619
     NonUnit : Matrix is not unit
620
   */
621

    
622
  for (i=0;i<RUNS;i++)
623
    {  
624

    
625
#ifdef DOUBLE
626

    
627
      printVector(dim,X,"X","Roots");
628

    
629
      /* Multiply A by X as Y <- A.X */
630
      cblas_dgemv(CblasRowMajor,CblasNoTrans,
631
                  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
632

    
633
      printVector(dim,Y,"Y","Results");
634

    
635
      /* Solve linear system : Y <- A-1.Y */
636
      cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
637
                  dim,A,dim,Y,incx);
638

    
639
      printVector(dim,Y,"Y","Solutions");
640
      
641
      cblas_daxpy(dim,beta2,Y,incx,X,incx);
642

    
643
      printVector(dim,X,"X","Errors");
644

    
645
      /* Store the checker of errors */
646
/*       checksA[i]=(double)cblas_dnrm2(dim,X,incx); */
647

    
648
      cblas_dswap(dim,X,incx,Y,incx);
649
      
650
#else
651

    
652
      printVector(dim,X,"X","Roots");
653

    
654
      /* Multiply A by X as Y <- A.X */
655
      cblas_sgemv(CblasRowMajor,CblasNoTrans,
656
                  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
657

    
658
      printVector(dim,Y,"Y","Results");
659

    
660
      /* Solve linear system : Y <- A-1.Y */
661
      cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
662
                  dim,A,dim,Y,incx);
663

    
664
      printVector(dim,Y,"Y","Solutions");
665
      
666
      cblas_saxpy(dim,beta2,Y,incx,X,incx);
667

    
668
      printVector(dim,X,"X","Errors");
669

    
670
      /* Store the checker of errors */
671
/*       checksA[i]=(double)cblas_snrm2(dim,X,incx); */
672

    
673
      cblas_sswap(dim,X,incx,Y,incx);
674
      
675
#endif
676
      
677
    }
678
#else
679

    
680
  printf("Using CBLAS: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
681

    
682
  /* 
683
     RowMajor : Matrix is read row bu row
684
     Upper : the no null elements are on top
685
     NoTrans : no transposition before estimation
686
     NonUnit : Matrix is not unit
687
   */
688

    
689
  for (i=0;i<RUNS;i++)
690
    {  
691

    
692
#ifdef DOUBLE
693

    
694
      printVector(dim,X,"X","Roots");
695

    
696
      /* Multiply A by X as Y <- A.X */
697
      cblas_dgemv(CblasRowMajor,CblasNoTrans,
698
                  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
699

    
700
      printVector(dim,Y,"Y","Results");
701

    
702
      /* Solve linear system : Y <- A-1.Y */
703
      cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
704
                  dim,A,dim,Y,incx);
705

    
706
      printVector(dim,Y,"Y","Solutions");
707
      
708
      cblas_daxpy(dim,beta2,Y,incx,X,incx);
709

    
710
      printVector(dim,X,"X","Errors");
711

    
712
      /* Store the checker of errors */
713
/*       checksA[i]=(double)cblas_dnrm2(dim,X,incx); */
714

    
715
      cblas_dswap(dim,X,incx,Y,incx);
716
      
717
#else
718

    
719
      printVector(dim,X,"X","Roots");
720

    
721
      /* Multiply A by X as Y <- A.X */
722
      cblas_sgemv(CblasRowMajor,CblasNoTrans,
723
                  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
724

    
725
      printVector(dim,Y,"Y","Results");
726

    
727
      /* Solve linear system : Y <- A-1.Y */
728
      cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
729
                  dim,A,dim,Y,incx);
730

    
731
      printVector(dim,Y,"Y","Solutions");
732
      
733
      cblas_saxpy(dim,beta2,Y,incx,X,incx);
734

    
735
      printVector(dim,X,"X","Errors");
736

    
737
      /* Store the checker of errors */
738
/*       checksA[i]=(double)cblas_snrm2(dim,X,incx); */
739

    
740
      cblas_sswap(dim,X,incx,Y,incx);
741
      
742
#endif
743

    
744
    }
745
#endif
746
  putchar('\n');
747

    
748
  /* Get second timer after launching */
749
  gettimeofday(&tv2, &tz);
750

    
751
#ifdef CUBLAS
752
  double memoryIn,memoryOut;
753

    
754
  memoryIn=(double)((tv3.tv_sec-tv1.tv_sec) * 1000000L +        \
755
                    (tv3.tv_usec-tv1.tv_usec))/1000000.;  
756

    
757
  memoryOut=(double)((tv2.tv_sec-tv4.tv_sec) * 1000000L +        \
758
                    (tv2.tv_usec-tv4.tv_usec))/1000000.;  
759

    
760
  duration=(double)((tv4.tv_sec-tv3.tv_sec) * 1000000L +        \
761
                    (tv4.tv_usec-tv3.tv_usec))/1000000./RUNS;  
762

    
763
  printf("Duration of memory allocation : %2.10f s\n",memoryIn);
764
  printf("Duration of memory free : %2.10f s\n",memoryOut);
765
#else
766
  duration=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +        \
767
                    (tv2.tv_usec-tv1.tv_usec))/1000000./RUNS;  
768

    
769
#endif
770

    
771
  printf("Duration of each cycle : %2.10f s\n",duration);
772

    
773
  printResults(RUNS,checksA,"C","Errors cumulated");
774

    
775
  putchar('\n');
776

    
777
  /*
778
#ifdef PRINT
779
  for (i=0; i<dim; i++) {
780
    for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]);
781
    putchar('\n');
782
  }
783

784
  for (i=0; i<dim; i++) {
785
    printf("X[%i]=%2.5f",i,X[i]);
786
    putchar('\n');
787
  }
788
  putchar('\n');
789
  for (i=0; i<dim; i++) {
790
    printf("Y[%i]=%2.5f",i,Y[i]);
791
    putchar('\n');
792
  }
793
#endif
794
  */
795

    
796
  return 0;
797
}
798

    
799
int main(int argc,char **argv)
800
{
801
  if ((argc==1)||
802
      (strcmp(argv[1],"-h")==0)||
803
      (strcmp(argv[1],"--help")==0))
804
    {
805
      printf("\nPerforms a bench using BLAS library implementation:\n\n"
806
             "\t#1 Size on triangular system\n"
807
             "\t#2 Number of iterations\n\n");
808
    }
809
  else if ((atoi(argv[1])>=2)&&
810
           (atoi(argv[2])>=1))
811
    {
812
      bench(atoi(argv[1]),atoi(argv[2]));
813
    }
814

    
815
  return 0;
816
}