Statistiques
| Révision :

root / BLAS / xTRSV / xTRSV.c @ 7

Historique | Voir | Annoter | Télécharger (17,49 ko)

1
/* 
2
   Performs a linear system solving of random generated system
3
   Estimates a test
4

5
   Matrix is triangular
6
   
7
   Thanks for help from aurel32@debian.org
8
*/
9

    
10
#include <stdio.h>
11
#include <math.h>
12
#include <stdlib.h>
13
#include <sys/time.h>
14
#include <string.h>
15

    
16
#ifdef CUBLAS
17
#include <cublas.h>
18
#define CUBLAS_WRAPPER_ERROR_NOERR      0
19
#define CUBLAS_WRAPPER_ERROR_ALLOC      1
20
#define CUBLAS_WRAPPER_ERROR_SET        2
21
#define CUBLAS_WRAPPER_ERROR_GET        3
22
#define CUBLAS_WRAPPER_ERROR_STUB       4
23
#elif THUNKING
24
#include <cublas.h>
25
#include "fortran_common.h"
26
#include "fortran_thunking.h"
27
#elif FBLAS
28
#include <cblas_f77.h>
29
#elif GSL
30
#include <gsl_cblas.h>
31
#elif ACML
32
#include <acml.h>
33
#include <acml_mv.h>
34
#else
35
#include <cblas.h>
36
#endif
37

    
38
#ifdef DOUBLE
39
#define LENGTH double
40
#else
41
#define LENGTH float
42
#endif
43

    
44
#ifdef FBLAS
45

    
46
#ifdef DOUBLE
47

    
48
void dtrsv_( FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT);
49

    
50
void dgemv_(FCHAR, FINT, FINT, const double *, const double *, FINT, 
51
               const double *, FINT, const double *, double *, FINT);
52

    
53
void dswap_( FINT, double *, FINT, double *, FINT);
54

    
55
void daxpy_( FINT, const double *, const double *, FINT, double *, FINT);
56

    
57
void dnrm2_( FINT, const double *, FINT, double *);
58

    
59
#else
60

    
61
void strsv_( FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT);
62

    
63
void sgemv_(FCHAR, FINT, FINT, const float *, const float *, FINT, 
64
               const float *, FINT, const float *, float *, FINT);
65

    
66
void sswap_( FINT, float *, FINT, float *, FINT);
67

    
68
void saxpy_( FINT, const float *, const float *, FINT, float *, FINT);
69

    
70
void snrm2_( FINT, const float *, FINT, float *);
71

    
72
#endif
73

    
74
#endif
75

    
76
/* Matrix with only defined triangular terms */
77
/* Even if there are 0 in matrix, must be defined at all ! */
78

    
79
/* Get from fortran.c */
80

    
81
#ifdef CUBLAS
82
static char *errMsg[5] = 
83
{
84
    "no error",
85
    "allocation error",
86
    "setVector/setMatrix error",
87
    "getVector/getMatrix error",
88
    "not implemented"
89
};
90

    
91
static void wrapperError (const char *funcName, int error)
92
{
93
    printf ("cublas%s wrapper: %s\n", funcName, errMsg[error]);
94
    fflush (stdout);
95
}
96
#endif
97

    
98
int printVector(const int dimVector,const LENGTH *dataVector,
99
                char *nameVector,char *mesgVector)
100
{
101
#ifndef QUIET
102

    
103
  int i;
104
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
105
  for (i=0;i<dimVector;i++)
106
    {
107
      printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
108
    }
109
#endif
110

    
111
  return 0;
112
}
113
  
114
int printResults(const int dimVector,const LENGTH *dataVector,
115
                 char *nameVector,char *mesgVector)
116
{
117
#ifdef RESULTS
118
  int i;
119

    
120
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
121
  for (i=0;i<dimVector;i++)
122
    {
123
      printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
124
    }
125
#endif
126
  return 0;
127
}
128
  
129
#ifdef CUBLAS
130
int printVectorGPU(const int dimVector,const LENGTH *dataVector,
131
                   char *nameVector,char *mesgVector)
132
{
133
#ifndef QUIET
134
  int i;
135
  cublasStatus stat;
136
  LENGTH *P=0;
137
  int incx=1;
138

    
139
  P=malloc(dimVector*sizeof(LENGTH));
140
  
141
  stat=cublasGetVector(dimVector,sizeof(P[0]),dataVector,incx,P,incx);
142

    
143
  if (stat != CUBLAS_STATUS_SUCCESS) {
144
    wrapperError ("ToGet", CUBLAS_WRAPPER_ERROR_GET);
145
  }  
146

    
147
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
148
  for (i=0;i<dimVector;i++)
149
    {
150
      printf("%s[%i]=%2.10e\n",nameVector,i,P[i]);
151
    }
152

    
153
  free(P);  
154
#endif
155

    
156
  return 0;
157
}
158
#endif
159

    
160
int bench(int dim,int RUNS)
161
{
162
  /*
163
  int dim=1000;
164
  int RUNS=100;
165
  */
166
  int incx=1;
167
#ifdef PRINT
168
  LENGTH factor=1.;
169
#endif
170

    
171
  LENGTH alpha=1.,beta=0.,beta2=-1.;
172
  LENGTH *A,*X,*Y;
173

    
174
  /* checkBefore checkAfter checks */
175
  LENGTH *checksA,*checksB;
176

    
177
  int i=0, j=0;
178

    
179
  double duration;
180

    
181
  struct timeval tv1,tv2;
182
  struct timezone tz;
183

    
184
  /* Create 1 Matrix and 2 Vectors of dimension dim  */
185

    
186
  A=malloc(dim*dim*sizeof(LENGTH));
187
  X=malloc(dim*sizeof(LENGTH));
188
  Y=malloc(dim*sizeof(LENGTH));
189

    
190
  /* Create 2 vectors for checker Before and After */
191

    
192
  checksA=malloc(RUNS*sizeof(double));
193
  checksB=malloc(RUNS*sizeof(double));
194

    
195
  /* Initialize elements with random numbers */
196
  /* Initialize the seed for rand() */
197
  /* srand(time()); */
198

    
199
#ifdef UNIT
200
  /* Fill the matrix and vector with random numbers */
201
  for (i=0; i<dim; i++) {
202
    for (j=0; j<dim; j++) 
203
      if (j>=i)
204
        {
205
          /* Normalization is necessary to avoid problems */
206
          A[i*dim+j]=1.;
207
        }
208
      else
209
        {
210
           A[i*dim+j]=0.;
211
        }
212
    X[i]=1;
213
  }
214
#else
215
  for (i=0; i<dim; i++) {
216
    for (j=0; j<dim; j++) 
217
      if (j>i)
218
        {
219
          /* Normalization is necessary to avoid problems */
220
          A[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.)
221
            *(LENGTH)(i+1.)/(LENGTH)(j+1.);
222
        }
223
      else if (j==i)
224
        {
225
           A[i*dim+j]=1.;
226
        }
227
      else
228
        {
229
           A[i*dim+j]=0.;
230
        }
231
    X[i]=(LENGTH)rand()/(RAND_MAX+1.);
232
  }
233
#endif
234

    
235
  /* Print the matrix */
236

    
237
#ifdef QUIET
238
#else
239
  for (i=0; i<dim; i++) {
240
    for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]);
241
    printf("\tX[%i]=%1.5f ", i,X[i]);
242
    putchar('\n');
243
  }
244
  putchar('\n');
245
#endif
246

    
247
  /* Get first timer before launching */
248
  gettimeofday(&tv1, &tz);
249

    
250
  /* Compute with CuBLAS library  */
251
#ifdef CUBLAS
252
  LENGTH *devPtrA=0, *devPtrX=0, *devPtrY=0;
253
  cublasStatus stat1, stat2, stat3;
254
  struct timeval tv3,tv4;
255

    
256
  /* Order is Row */
257
  /* Have to swap uplo and trans */
258
  char uplo='L',trans='T',diag='N';
259

    
260
  printf("Using CuBLAS: %i iterations for %ix%i matrix\n",
261
         RUNS,dim,dim);
262

    
263
  stat1=cublasAlloc(dim*dim,sizeof(devPtrA[0]),(void**)&devPtrA);
264
  stat2=cublasAlloc(dim,sizeof(devPtrX[0]),(void**)&devPtrX);
265
  stat3=cublasAlloc(dim,sizeof(devPtrY[0]),(void**)&devPtrY);
266

    
267
  if ((stat1 != CUBLAS_STATUS_SUCCESS) || 
268
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
269
      (stat3 != CUBLAS_STATUS_SUCCESS)) {
270
    wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_ALLOC);
271
    cublasFree (devPtrA);
272
    cublasFree (devPtrX);
273
    cublasFree (devPtrY);
274
    return 1;
275
  }
276

    
277
  stat1=cublasSetMatrix(dim,dim,sizeof(A[0]),A,dim,devPtrA,dim);
278
  stat2=cublasSetVector(dim,sizeof(X[0]),X,incx,devPtrX,incx);
279
  stat3=cublasSetVector(dim,sizeof(Y[0]),Y,incx,devPtrY,incx);
280
  
281
  if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
282
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
283
      (stat3 != CUBLAS_STATUS_SUCCESS)) {
284
    wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_SET);
285
    cublasFree (devPtrA);
286
    cublasFree (devPtrX);
287
    cublasFree (devPtrY);
288
    return 1;
289
  }
290

    
291
  /* Get third timer after memory operation */
292
  gettimeofday(&tv3, &tz);
293

    
294
  for (i=0;i<RUNS;i++)
295
    {
296
#ifdef DOUBLE
297

    
298
      printVectorGPU(dim,devPtrX,"X","Roots");
299

    
300
      /* Multiply Y <- A.X */
301
      cublasDgemv(trans,dim,dim,alpha,devPtrA,dim,
302
                  devPtrX,incx,beta,devPtrY,incx);
303

    
304
      printVectorGPU(dim,devPtrY,"Y","Results");
305

    
306
      /* Solve linear system A.X=Y : Y <- A-1.Y */
307
      cublasDtrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx);
308

    
309
      printVectorGPU(dim,devPtrY,"Y","Solutions");
310

    
311
      /* Estimate the difference between X and Y : Y <- -Y+X */
312
      cublasDaxpy(dim,beta2,devPtrY,incx,devPtrX,incx);
313

    
314
      printVectorGPU(dim,devPtrX,"X","Errors");
315

    
316
      /* Estimate the second checker */
317
/*       checksA[i]=(double)cublasDnrm2(dim,devPtrX,incx); */
318

    
319
      /* Swap vector X and Y */
320
      cublasDswap(dim,devPtrX,incx,devPtrY,incx);
321

    
322
#else
323

    
324
      printVectorGPU(dim,devPtrX,"X","Roots");
325

    
326
      /* Multiply Y <- A.X */
327
      cublasSgemv(trans,dim,dim,alpha,devPtrA,dim,
328
                  devPtrX,incx,beta,devPtrY,incx);
329

    
330
      printVectorGPU(dim,devPtrY,"Y","Results");
331

    
332
      /* Solve linear system Y <- A-1.Y */
333
      cublasStrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx);
334

    
335
      printVectorGPU(dim,devPtrY,"Y","Solutions");
336

    
337
      /* Add vectors X and -Y */
338
      cublasSaxpy(dim,beta2,devPtrY,incx,devPtrX,incx);
339

    
340
      printVectorGPU(dim,devPtrX,"X","Errors");
341

    
342
      /* Estimate the second checker */
343
/*       checksA[i]=(double)cublasSnrm2(dim,devPtrX,incx); */
344

    
345
      /* Swap vector X and Y */
346
      cublasSswap(dim,devPtrX,incx,devPtrY,incx);
347

    
348
#endif
349
  
350
    }
351

    
352
  stat1=cublasGetMatrix(dim,dim,sizeof(A[0]),devPtrA,dim,A,dim);
353
  stat2=cublasGetVector(dim,sizeof(X[0]),devPtrX,incx,X,incx);
354
  stat3=cublasGetVector(dim,sizeof(Y[0]),devPtrY,incx,Y,incx);
355
  
356
  cublasFree (devPtrA);
357
  cublasFree (devPtrX);
358
  cublasFree (devPtrY);
359
  
360
  if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
361
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
362
      (stat3 != CUBLAS_STATUS_SUCCESS)) {
363
    wrapperError ("LinearSystem", CUBLAS_WRAPPER_ERROR_GET);
364
  }
365
  
366
  /* Get fourth timer after memory free */
367
  gettimeofday(&tv4, &tz);
368

    
369
#elif THUNKING
370
  
371
  /* Order is Row : Have to swap uplo='U' and trans='N' */
372
  char uplo='L',trans='T',diag='N';
373
  printf("Using CuBLAS/Thunking: %i iterations for %ix%i matrix\n",
374
         RUNS,dim,dim);
375

    
376
  for (i=0;i<RUNS;i++)
377
    {
378
#ifdef DOUBLE
379
      
380
      printVector(dim,X,"X","Roots");
381
      
382
      /* Multiply A by X as Y <- A.X */
383
      CUBLAS_DGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
384
      
385
      printVector(dim,Y,"Y","Results");
386

    
387
      /* Solve linear system */
388
      CUBLAS_DTRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
389
      
390
      printVector(dim,Y,"Y","Solutions");
391

    
392
      /* Compare the roots X and Y */
393
      CUBLAS_DAXPY(&dim,&beta2,Y,&incx,X,&incx);
394

    
395
      printVector(dim,X,"X","Errors");
396

    
397
      /* Store the checker of errors */
398
/*       checksA[i]=(double)CUBLAS_DNRM2(&dim,X,&incx); */
399

    
400
      /* Swap vector X and Y */
401
      CUBLAS_DSWAP(&dim,X,&incx,Y,&incx);
402
#else
403

    
404
      printVector(dim,X,"X","Roots");
405
      
406
      /* Multiply A by X as Y <- A.X */
407
      CUBLAS_SGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
408
      
409
      printVector(dim,Y,"Y","Results");
410

    
411
      /* Solve linear system */
412
      CUBLAS_STRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
413
      
414
      printVector(dim,Y,"Y","Solutions");
415

    
416
      /* Compare the roots X and Y */
417
      CUBLAS_SAXPY(&dim,&beta2,Y,&incx,X,&incx);
418

    
419
      printVector(dim,X,"X","Errors");
420

    
421
      /* Store the checker of errors */
422
/*       checksA[i]=(double)CUBLAS_SNRM2(&dim,X,&incx); */
423

    
424
      /* Swap vector X and Y */
425
      CUBLAS_SSWAP(&dim,X,&incx,Y,&incx);
426
#endif
427

    
428
#ifdef PRINT
429
      printf("Iteration %i, checker is %2.5f and error is %2.10f\n",
430
             i,checksA[i],fabs(checksB[i]-checksA[i])/factor);
431
#endif
432
    }
433

    
434
#elif FBLAS
435
  
436
  /* Order is Row : Have to swap uplo='U' and trans='N' */
437
  char uplo='L',trans='T',diag='N';
438
  
439
  printf("Using FBLAS: %i iterations for %ix%i matrix\n",
440
         RUNS,dim,dim);
441
  
442
  for (i=0;i<RUNS;i++)
443
    {
444
#ifdef DOUBLE
445
      
446
      printVector(dim,X,"X","Roots");
447
      
448
      /* Multiply A by X as Y <- A.X */
449
      dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
450
      
451
      printVector(dim,Y,"Y","Results");
452
      
453
      /* Solve linear system */
454
      dtrsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
455
      
456
      printVector(dim,Y,"Y","Solutions");
457
      
458
      /* Compare the roots X and Y */
459
      daxpy_(&dim,&beta2,Y,&incx,X,&incx);
460
      
461
      printVector(dim,X,"X","Errors");
462
      
463
      /* Store the checker of errors */
464
/*       dnrm2_(&dim,X,&incx,&checksA[i]); */
465
            
466
      /* Swap vector X and Y */
467
      dswap_(&dim,X,&incx,Y,&incx);
468

    
469
#else
470

    
471
      printVector(dim,X,"X","Roots");
472
      
473
      /* Multiply A by X as Y <- A.X */
474
      sgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
475
      
476
      printVector(dim,Y,"Y","Results");
477

    
478
      /* Solve linear system */
479
      strsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
480
      
481
      printVector(dim,Y,"Y","Solutions");
482

    
483
      /* Compare the roots X and Y */
484
      saxpy_(&dim,&beta2,Y,&incx,X,&incx);
485

    
486
      printVector(dim,X,"X","Errors");
487

    
488
      /* Store the checker of errors */
489
/*       snrm2_(&dim,X,&incx,&checksA[i]); */
490

    
491
      /* Swap vector X and Y */
492
      sswap_(&dim,X,&incx,Y,&incx);
493
#endif
494

    
495
    }
496

    
497
#elif ACML
498
  
499
  /* Order is Row : Have to swap uplo='U' and trans='N' */
500
  char uplo='L',trans='T',diag='N';
501
  
502
  printf("Using ACML: %i iterations for %ix%i matrix\n",
503
         RUNS,dim,dim);
504
  
505
  for (i=0;i<RUNS;i++)
506
    {
507
#ifdef DOUBLE
508
      
509
      printVector(dim,X,"X","Roots");
510
      
511
      /* Multiply A by X as Y <- A.X */
512
      dgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
513
      
514
      printVector(dim,Y,"Y","Results");
515
      
516
      /* Solve linear system */
517
      dtrsv(uplo,trans,diag,dim,A,dim,Y,incx);
518
      
519
      printVector(dim,Y,"Y","Solutions");
520
      
521
      /* Compare the roots X and Y */
522
      daxpy(dim,beta2,Y,incx,X,incx);
523
      
524
      printVector(dim,X,"X","Errors");
525
      
526
      /* Store the checker of errors */
527
/*       dnrm2_(&dim,X,&incx,&checksA[i]); */
528
            
529
      /* Swap vector X and Y */
530
      dswap(dim,X,incx,Y,incx);
531

    
532
#else
533

    
534
      printVector(dim,X,"X","Roots");
535
      
536
      /* Multiply A by X as Y <- A.X */
537
      sgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
538
      
539
      printVector(dim,Y,"Y","Results");
540

    
541
      /* Solve linear system */
542
      strsv(uplo,trans,diag,dim,A,dim,Y,incx);
543
      
544
      printVector(dim,Y,"Y","Solutions");
545

    
546
      /* Compare the roots X and Y */
547
      saxpy(dim,beta2,Y,incx,X,incx);
548

    
549
      printVector(dim,X,"X","Errors");
550

    
551
      /* Store the checker of errors */
552
/*       snrm2_(&dim,X,&incx,&checksA[i]); */
553

    
554
      /* Swap vector X and Y */
555
      sswap(dim,X,incx,Y,incx);
556
#endif
557

    
558
    }
559

    
560
#elif GSL
561

    
562
  printf("Using GSL: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
563

    
564
  /* 
565
     RowMajor : Matrix is read row by row
566
     Upper : the no null elements are on top
567
     NoTrans : no transposition before estimation
568
     NonUnit : Matrix is not unit
569
   */
570

    
571
  for (i=0;i<RUNS;i++)
572
    {  
573

    
574
#ifdef DOUBLE
575

    
576
      printVector(dim,X,"X","Roots");
577

    
578
      /* Multiply A by X as Y <- A.X */
579
      cblas_dgemv(CblasRowMajor,CblasNoTrans,
580
                  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
581

    
582
      printVector(dim,Y,"Y","Results");
583

    
584
      /* Solve linear system : Y <- A-1.Y */
585
      cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
586
                  dim,A,dim,Y,incx);
587

    
588
      printVector(dim,Y,"Y","Solutions");
589
      
590
      cblas_daxpy(dim,beta2,Y,incx,X,incx);
591

    
592
      printVector(dim,X,"X","Errors");
593

    
594
      /* Store the checker of errors */
595
/*       checksA[i]=(double)cblas_dnrm2(dim,X,incx); */
596

    
597
      cblas_dswap(dim,X,incx,Y,incx);
598
      
599
#else
600

    
601
      printVector(dim,X,"X","Roots");
602

    
603
      /* Multiply A by X as Y <- A.X */
604
      cblas_sgemv(CblasRowMajor,CblasNoTrans,
605
                  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
606

    
607
      printVector(dim,Y,"Y","Results");
608

    
609
      /* Solve linear system : Y <- A-1.Y */
610
      cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
611
                  dim,A,dim,Y,incx);
612

    
613
      printVector(dim,Y,"Y","Solutions");
614
      
615
      cblas_saxpy(dim,beta2,Y,incx,X,incx);
616

    
617
      printVector(dim,X,"X","Errors");
618

    
619
      /* Store the checker of errors */
620
/*       checksA[i]=(double)cblas_snrm2(dim,X,incx); */
621

    
622
      cblas_sswap(dim,X,incx,Y,incx);
623
      
624
#endif
625
      
626
    }
627
#else
628

    
629
  printf("Using CBLAS: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
630

    
631
  /* 
632
     RowMajor : Matrix is read row bu row
633
     Upper : the no null elements are on top
634
     NoTrans : no transposition before estimation
635
     NonUnit : Matrix is not unit
636
   */
637

    
638
  for (i=0;i<RUNS;i++)
639
    {  
640

    
641
#ifdef DOUBLE
642

    
643
      printVector(dim,X,"X","Roots");
644

    
645
      /* Multiply A by X as Y <- A.X */
646
      cblas_dgemv(CblasRowMajor,CblasNoTrans,
647
                  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
648

    
649
      printVector(dim,Y,"Y","Results");
650

    
651
      /* Solve linear system : Y <- A-1.Y */
652
      cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
653
                  dim,A,dim,Y,incx);
654

    
655
      printVector(dim,Y,"Y","Solutions");
656
      
657
      cblas_daxpy(dim,beta2,Y,incx,X,incx);
658

    
659
      printVector(dim,X,"X","Errors");
660

    
661
      /* Store the checker of errors */
662
/*       checksA[i]=(double)cblas_dnrm2(dim,X,incx); */
663

    
664
      cblas_dswap(dim,X,incx,Y,incx);
665
      
666
#else
667

    
668
      printVector(dim,X,"X","Roots");
669

    
670
      /* Multiply A by X as Y <- A.X */
671
      cblas_sgemv(CblasRowMajor,CblasNoTrans,
672
                  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
673

    
674
      printVector(dim,Y,"Y","Results");
675

    
676
      /* Solve linear system : Y <- A-1.Y */
677
      cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
678
                  dim,A,dim,Y,incx);
679

    
680
      printVector(dim,Y,"Y","Solutions");
681
      
682
      cblas_saxpy(dim,beta2,Y,incx,X,incx);
683

    
684
      printVector(dim,X,"X","Errors");
685

    
686
      /* Store the checker of errors */
687
/*       checksA[i]=(double)cblas_snrm2(dim,X,incx); */
688

    
689
      cblas_sswap(dim,X,incx,Y,incx);
690
      
691
#endif
692

    
693
    }
694
#endif
695
  putchar('\n');
696

    
697
  /* Get second timer after launching */
698
  gettimeofday(&tv2, &tz);
699

    
700
#ifdef CUBLAS
701
  double memoryIn,memoryOut;
702

    
703
  memoryIn=(double)((tv3.tv_sec-tv1.tv_sec) * 1000000L +        \
704
                    (tv3.tv_usec-tv1.tv_usec))/1000000.;  
705

    
706
  memoryOut=(double)((tv2.tv_sec-tv4.tv_sec) * 1000000L +        \
707
                    (tv2.tv_usec-tv4.tv_usec))/1000000.;  
708

    
709
  duration=(double)((tv4.tv_sec-tv3.tv_sec) * 1000000L +        \
710
                    (tv4.tv_usec-tv3.tv_usec))/1000000./RUNS;  
711

    
712
  printf("Duration of memory allocation : %2.10f s\n",memoryIn);
713
  printf("Duration of memory free : %2.10f s\n",memoryOut);
714
#else
715
  duration=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +        \
716
                    (tv2.tv_usec-tv1.tv_usec))/1000000./RUNS;  
717

    
718
#endif
719

    
720
  printf("Duration of each cycle : %2.10f s\n",duration);
721

    
722
  printResults(RUNS,checksA,"C","Errors cumulated");
723

    
724
  putchar('\n');
725

    
726
  /*
727
#ifdef PRINT
728
  for (i=0; i<dim; i++) {
729
    for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]);
730
    putchar('\n');
731
  }
732

733
  for (i=0; i<dim; i++) {
734
    printf("X[%i]=%2.5f",i,X[i]);
735
    putchar('\n');
736
  }
737
  putchar('\n');
738
  for (i=0; i<dim; i++) {
739
    printf("Y[%i]=%2.5f",i,Y[i]);
740
    putchar('\n');
741
  }
742
#endif
743
  */
744

    
745
  return 0;
746
}
747

    
748
int main(int argc,char **argv)
749
{
750
  if ((argc==1)||
751
      (strcmp(argv[1],"-h")==0)||
752
      (strcmp(argv[1],"--help")==0))
753
    {
754
      printf("\nPerforms a bench using BLAS library implementation:\n\n"
755
             "\t#1 Size on triangular system\n"
756
             "\t#2 Number of iterations\n\n");
757
    }
758
  else if ((atoi(argv[1])>=2)&&
759
           (atoi(argv[2])>=1))
760
    {
761
      bench(atoi(argv[1]),atoi(argv[2]));
762
    }
763

    
764
  return 0;
765
}