Statistiques
| Révision :

root / BLAS / xTRSV / xTRSV.c @ 251

Historique | Voir | Annoter | Télécharger (17,48 ko)

1
/* 
2
   Performs a linear system solving of random generated system
3
   Estimates a test
4

5
   Matrix is triangular
6
   
7
   Thanks for help from aurel32@debian.org
8
*/
9

    
10
#include <stdio.h>
11
#include <math.h>
12
#include <stdlib.h>
13
#include <sys/time.h>
14
#include <string.h>
15

    
16
#ifdef CUBLAS
17
#include <cublas.h>
18
#define CUBLAS_WRAPPER_ERROR_NOERR      0
19
#define CUBLAS_WRAPPER_ERROR_ALLOC      1
20
#define CUBLAS_WRAPPER_ERROR_SET        2
21
#define CUBLAS_WRAPPER_ERROR_GET        3
22
#define CUBLAS_WRAPPER_ERROR_STUB       4
23
#elif THUNKING
24
#include <cublas.h>
25
#include "fortran_common.h"
26
#include "fortran_thunking.h"
27
#elif FBLAS
28
#include <f77blas.h>
29
#elif GSL
30
#include <gsl_cblas.h>
31
#elif ACML
32
#include <acml.h>
33
#else
34
#include <cblas.h>
35
// #include <blaswrap.h>
36
#endif
37

    
38
#ifdef FP64
39
#define LENGTH double
40
#else
41
#define LENGTH float
42
#endif
43

    
44
/* Matrix with only defined triangular terms */
45
/* Even if there are 0 in matrix, must be defined at all ! */
46

    
47
/* Get from fortran.c */
48

    
49
#ifdef CUBLAS
50
static char *errMsg[5] = 
51
{
52
    "no error",
53
    "allocation error",
54
    "setVector/setMatrix error",
55
    "getVector/getMatrix error",
56
    "not implemented"
57
};
58

    
59
static void wrapperError (const char *funcName, int error)
60
{
61
    printf ("cublas%s wrapper: %s\n", funcName, errMsg[error]);
62
    fflush (stdout);
63
}
64
#endif
65

    
66
int printVector(const int dimVector,const LENGTH *dataVector,
67
                char *nameVector,char *mesgVector)
68
{
69
#ifndef QUIET
70

    
71
  int i;
72
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
73
  for (i=0;i<dimVector;i++)
74
    {
75
      printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
76
    }
77
#endif
78

    
79
  return 0;
80
}
81
  
82
int printResults(const int dimVector,const LENGTH *dataVector,
83
                 char *nameVector,char *mesgVector)
84
{
85
#ifdef RESULTS
86
  int i;
87

    
88
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
89
  for (i=0;i<dimVector;i++)
90
    {
91
      printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
92
    }
93
#endif
94
  return 0;
95
}
96
  
97
#ifdef CUBLAS
98
int printVectorGPU(const int dimVector,const LENGTH *dataVector,
99
                   char *nameVector,char *mesgVector)
100
{
101
#ifndef QUIET
102
  int i;
103
  cublasStatus stat;
104
  LENGTH *P=0;
105
  int incx=1;
106

    
107
  P=malloc(dimVector*sizeof(LENGTH));
108
  
109
  stat=cublasGetVector(dimVector,sizeof(P[0]),dataVector,incx,P,incx);
110

    
111
  if (stat != CUBLAS_STATUS_SUCCESS) {
112
    wrapperError ("ToGet", CUBLAS_WRAPPER_ERROR_GET);
113
  }  
114

    
115
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
116
  for (i=0;i<dimVector;i++)
117
    {
118
      printf("%s[%i]=%2.10e\n",nameVector,i,P[i]);
119
    }
120

    
121
  free(P);  
122
#endif
123

    
124
  return 0;
125
}
126
#endif
127

    
128
int bench(int dim,int RUNS)
129
{
130
  /*
131
  int dim=1000;
132
  int RUNS=100;
133
  */
134
  int incx=1;
135
#ifdef PRINT
136
  LENGTH factor=1.;
137
#endif
138

    
139
  LENGTH alpha=1.,beta=0.,beta2=-1.;
140
  LENGTH *A,*X,*Y;
141

    
142
  /* checkBefore checkAfter checks */
143
  LENGTH *checksA,*checksB;
144

    
145
  int i=0, j=0;
146

    
147
  double duration;
148

    
149
  struct timeval tv1,tv2;
150
  struct timezone tz;
151

    
152
  /* Create 1 Matrix and 2 Vectors of dimension dim  */
153

    
154
  A=malloc(dim*dim*sizeof(LENGTH));
155
  X=malloc(dim*sizeof(LENGTH));
156
  Y=malloc(dim*sizeof(LENGTH));
157

    
158
  /* Create 2 vectors for checker Before and After */
159

    
160
  checksA=malloc(RUNS*sizeof(double));
161
  checksB=malloc(RUNS*sizeof(double));
162

    
163
  /* Initialize elements with random numbers */
164
  /* Initialize the seed for rand() */
165
  /* srand(time()); */
166

    
167
#ifdef UNIT
168
  /* Fill the matrix and vector with random numbers */
169
  for (i=0; i<dim; i++) {
170
    for (j=0; j<dim; j++) 
171
      if (j>=i)
172
        {
173
          /* Normalization is necessary to avoid problems */
174
          A[i*dim+j]=1.;
175
        }
176
      else
177
        {
178
           A[i*dim+j]=0.;
179
        }
180
    X[i]=1;
181
  }
182
#else
183
  for (i=0; i<dim; i++) {
184
    for (j=0; j<dim; j++) 
185
      if (j>i)
186
        {
187
          /* Normalization is necessary to avoid problems */
188
          /* A[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.) */
189
          /*   *(LENGTH)(i+1.)/(LENGTH)(j+1.); */
190
          A[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.)
191
            /(LENGTH)(dim-j);
192
        }
193
      else if (j==i)
194
        {
195
           A[i*dim+j]=1.;
196
        }
197
      else
198
        {
199
           A[i*dim+j]=0.;
200
        }
201
    X[i]=(LENGTH)rand()/(RAND_MAX+1.);
202
  }
203
#endif
204

    
205
  /* Print the matrix */
206

    
207
#ifdef QUIET
208
#else
209
  for (i=0; i<dim; i++) {
210
    for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]);
211
    printf("\tX[%i]=%1.5f ", i,X[i]);
212
    putchar('\n');
213
  }
214
  putchar('\n');
215
#endif
216

    
217
  /* Get first timer before launching */
218
  gettimeofday(&tv1, &tz);
219

    
220
  /* Compute with CuBLAS library  */
221
#ifdef CUBLAS
222
  LENGTH *devPtrA=0, *devPtrX=0, *devPtrY=0;
223
  cublasStatus stat1, stat2, stat3;
224
  struct timeval tv3,tv4;
225

    
226
  /* Order is Row */
227
  /* Have to swap uplo and trans */
228
  char uplo='L',trans='T',diag='N';
229

    
230
  printf("Using CuBLAS: %i iterations for %ix%i matrix\n",
231
         RUNS,dim,dim);
232

    
233
  stat1=cublasAlloc(dim*dim,sizeof(devPtrA[0]),(void**)&devPtrA);
234
  stat2=cublasAlloc(dim,sizeof(devPtrX[0]),(void**)&devPtrX);
235
  stat3=cublasAlloc(dim,sizeof(devPtrY[0]),(void**)&devPtrY);
236

    
237
  if ((stat1 != CUBLAS_STATUS_SUCCESS) || 
238
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
239
      (stat3 != CUBLAS_STATUS_SUCCESS)) {
240
    wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_ALLOC);
241
    cublasFree (devPtrA);
242
    cublasFree (devPtrX);
243
    cublasFree (devPtrY);
244
    return 1;
245
  }
246

    
247
  stat1=cublasSetMatrix(dim,dim,sizeof(A[0]),A,dim,devPtrA,dim);
248
  stat2=cublasSetVector(dim,sizeof(X[0]),X,incx,devPtrX,incx);
249
  stat3=cublasSetVector(dim,sizeof(Y[0]),Y,incx,devPtrY,incx);
250
  
251
  if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
252
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
253
      (stat3 != CUBLAS_STATUS_SUCCESS)) {
254
    wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_SET);
255
    cublasFree (devPtrA);
256
    cublasFree (devPtrX);
257
    cublasFree (devPtrY);
258
    return 1;
259
  }
260

    
261
  /* Get third timer after memory operation */
262
  gettimeofday(&tv3, &tz);
263

    
264
  for (i=0;i<RUNS;i++)
265
    {
266
#ifdef FP64
267

    
268
      printVectorGPU(dim,devPtrX,"X","Roots");
269

    
270
      /* Multiply Y <- A.X */
271
      cublasDgemv(trans,dim,dim,alpha,devPtrA,dim,
272
                  devPtrX,incx,beta,devPtrY,incx);
273

    
274
      printVectorGPU(dim,devPtrY,"Y","Results");
275

    
276
      /* Solve linear system A.X=Y : Y <- A-1.Y */
277
      cublasDtrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx);
278

    
279
      printVectorGPU(dim,devPtrY,"Y","Solutions");
280

    
281
      /* Estimate the difference between X and Y : Y <- -Y+X */
282
      cublasDaxpy(dim,beta2,devPtrY,incx,devPtrX,incx);
283

    
284
      printVectorGPU(dim,devPtrX,"X","Errors");
285

    
286
      /* Estimate the second checker */
287
      checksA[i]=(double)cublasDnrm2(dim,devPtrX,incx);
288

    
289
      /* Swap vector X and Y */
290
      cublasDswap(dim,devPtrX,incx,devPtrY,incx);
291

    
292
#else
293

    
294
      printVectorGPU(dim,devPtrX,"X","Roots");
295

    
296
      /* Multiply Y <- A.X */
297
      cublasSgemv(trans,dim,dim,alpha,devPtrA,dim,
298
                  devPtrX,incx,beta,devPtrY,incx);
299

    
300
      printVectorGPU(dim,devPtrY,"Y","Results");
301

    
302
      /* Solve linear system Y <- A-1.Y */
303
      cublasStrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx);
304

    
305
      printVectorGPU(dim,devPtrY,"Y","Solutions");
306

    
307
      /* Add vectors X and -Y */
308
      cublasSaxpy(dim,beta2,devPtrY,incx,devPtrX,incx);
309

    
310
      printVectorGPU(dim,devPtrX,"X","Errors");
311

    
312
      /* Estimate the second checker */
313
      checksA[i]=(double)cublasSnrm2(dim,devPtrX,incx);
314

    
315
      /* Swap vector X and Y */
316
      cublasSswap(dim,devPtrX,incx,devPtrY,incx);
317

    
318
#endif
319
  
320
    }
321

    
322
  stat1=cublasGetMatrix(dim,dim,sizeof(A[0]),devPtrA,dim,A,dim);
323
  stat2=cublasGetVector(dim,sizeof(X[0]),devPtrX,incx,X,incx);
324
  stat3=cublasGetVector(dim,sizeof(Y[0]),devPtrY,incx,Y,incx);
325
  
326
  cublasFree (devPtrA);
327
  cublasFree (devPtrX);
328
  cublasFree (devPtrY);
329
  
330
  if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
331
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
332
      (stat3 != CUBLAS_STATUS_SUCCESS)) {
333
    wrapperError ("LinearSystem", CUBLAS_WRAPPER_ERROR_GET);
334
  }
335
  
336
  /* Get fourth timer after memory free */
337
  gettimeofday(&tv4, &tz);
338

    
339
#elif THUNKING
340
  
341
  /* Order is Row : Have to swap uplo='U' and trans='N' */
342
  char uplo='L',trans='T',diag='N';
343
  printf("Using CuBLAS/Thunking: %i iterations for %ix%i matrix\n",
344
         RUNS,dim,dim);
345

    
346
  for (i=0;i<RUNS;i++)
347
    {
348
#ifdef FP64
349
      
350
      printVector(dim,X,"X","Roots");
351
      
352
      /* Multiply A by X as Y <- A.X */
353
      CUBLAS_DGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
354
      
355
      printVector(dim,Y,"Y","Results");
356

    
357
      /* Solve linear system */
358
      CUBLAS_DTRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
359
      
360
      printVector(dim,Y,"Y","Solutions");
361

    
362
      /* Compare the roots X and Y */
363
      CUBLAS_DAXPY(&dim,&beta2,Y,&incx,X,&incx);
364

    
365
      printVector(dim,X,"X","Errors");
366

    
367
      /* Store the checker of errors */
368
      checksA[i]=(double)CUBLAS_DNRM2(&dim,X,&incx);
369

    
370
      /* Swap vector X and Y */
371
      CUBLAS_DSWAP(&dim,X,&incx,Y,&incx);
372
#else
373

    
374
      printVector(dim,X,"X","Roots");
375
      
376
      /* Multiply A by X as Y <- A.X */
377
      CUBLAS_SGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
378
      
379
      printVector(dim,Y,"Y","Results");
380

    
381
      /* Solve linear system */
382
      CUBLAS_STRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
383
      
384
      printVector(dim,Y,"Y","Solutions");
385

    
386
      /* Compare the roots X and Y */
387
      CUBLAS_SAXPY(&dim,&beta2,Y,&incx,X,&incx);
388

    
389
      printVector(dim,X,"X","Errors");
390

    
391
      /* Store the checker of errors */
392
      checksA[i]=(double)CUBLAS_SNRM2(&dim,X,&incx);
393

    
394
      /* Swap vector X and Y */
395
      CUBLAS_SSWAP(&dim,X,&incx,Y,&incx);
396
#endif
397

    
398
#ifdef PRINT
399
      printf("Iteration %i, checker is %2.5f and error is %2.10f\n",
400
             i,checksA[i],fabs(checksB[i]-checksA[i])/factor);
401
#endif
402
    }
403

    
404
#elif FBLAS
405
  
406
  /* Order is Row : Have to swap uplo='U' and trans='N' */
407
  char uplo='L',trans='T',diag='N';
408
  
409
  printf("Using FBLAS: %i iterations for %ix%i matrix\n",
410
         RUNS,dim,dim);
411
  
412
  for (i=0;i<RUNS;i++)
413
    {
414
#ifdef FP64
415
      
416
      printVector(dim,X,"X","Initial roots");
417
      
418
      /* /\* Multiply A by X as Y <- A.X *\/ */
419
      /* dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); */
420
      
421
      /* printVector(dim,Y,"Y<-A.X","Estimated results"); */
422
      
423
      /* /\* Solve linear system *\/ */
424
      /* dtrsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); */
425
      
426
      /* printVector(dim,Y,"X","Solutions from A.X=Y"); */
427
      
428
      /* /\* Compare the roots X and Y *\/ */
429
      /* daxpy_(&dim,&beta2,Y,&incx,X,&incx); */
430
      
431
      /* printVector(dim,X,"X","Differences initial and estimated"); */
432
      
433
      /* /\* Store the checker of errors *\/ */
434
      /* dnrm2_(&dim,X,&incx,&checksA[i]); */
435
            
436
      /* /\* Swap vector X and Y *\/ */
437
      /* dswap_(&dim,X,&incx,Y,&incx); */
438

    
439
      /* Multiply A by X as Y <- A.X */
440
      dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
441
      
442
      printVector(dim,Y,"Y<-A.X","Estimated results");
443
      
444
      /* Solve linear system */
445
      dtrsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
446
      
447
      printVector(dim,Y,"X","Solutions from A.X=Y");
448
      
449
      /* Compare the roots X and Y */
450
      daxpy_(&dim,&beta2,Y,&incx,X,&incx);
451
      
452
      printVector(dim,X,"X","Differences initial and estimated");
453
      
454
      /* Store the checker of errors */
455
      checksA[i]=(double)dnrm2_(&dim,X,&incx);
456
            
457
      /* Swap vector X and Y */
458
      dswap_(&dim,X,&incx,Y,&incx);
459

    
460
#else
461

    
462
      printVector(dim,X,"X","Roots");
463
      
464
      /* Multiply A by X as Y <- A.X */
465
      sgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
466
      
467
      printVector(dim,Y,"Y","Results");
468

    
469
      /* Solve linear system */
470
      strsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
471
      
472
      printVector(dim,Y,"Y","Solutions");
473

    
474
      /* Compare the roots X and Y */
475
      saxpy_(&dim,&beta2,Y,&incx,X,&incx);
476

    
477
      printVector(dim,X,"X","Errors");
478

    
479
      /* Store the checker of errors */
480
      checksA[i]=(LENGTH)snrm2_(&dim,X,&incx);
481

    
482
      /* Swap vector X and Y */
483
      sswap_(&dim,X,&incx,Y,&incx);
484
#endif
485

    
486
    }
487

    
488
#elif ACML
489
  
490
  /* Order is Row : Have to swap uplo='U' and trans='N' */
491
  char uplo='L',trans='T',diag='N';
492
  
493
  printf("Using ACML: %i iterations for %ix%i matrix\n",
494
         RUNS,dim,dim);
495
  
496
  for (i=0;i<RUNS;i++)
497
    {
498
#ifdef FP64
499
      
500
      printVector(dim,X,"X","Roots");
501
      
502
      /* Multiply A by X as Y <- A.X */
503
      dgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
504
      
505
      printVector(dim,Y,"Y","Results");
506
      
507
      /* Solve linear system */
508
      dtrsv(uplo,trans,diag,dim,A,dim,Y,incx);
509
      
510
      printVector(dim,Y,"Y","Solutions");
511
      
512
      /* Compare the roots X and Y */
513
      daxpy(dim,beta2,Y,incx,X,incx);
514
      
515
      printVector(dim,X,"X","Errors");
516
      
517
      /* Store the checker of errors */
518
      dnrm2_(&dim,X,&incx,&checksA[i]);
519
            
520
      /* Swap vector X and Y */
521
      dswap(dim,X,incx,Y,incx);
522

    
523
#else
524

    
525
      printVector(dim,X,"X","Roots");
526
      
527
      /* Multiply A by X as Y <- A.X */
528
      sgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
529
      
530
      printVector(dim,Y,"Y","Results");
531

    
532
      /* Solve linear system */
533
      strsv(uplo,trans,diag,dim,A,dim,Y,incx);
534
      
535
      printVector(dim,Y,"Y","Solutions");
536

    
537
      /* Compare the roots X and Y */
538
      saxpy(dim,beta2,Y,incx,X,incx);
539

    
540
      printVector(dim,X,"X","Errors");
541

    
542
      /* Store the checker of errors */
543
      snrm2_(&dim,X,&incx,&checksA[i]);
544

    
545
      /* Swap vector X and Y */
546
      sswap(dim,X,incx,Y,incx);
547
#endif
548

    
549
    }
550

    
551
#elif GSL
552

    
553
  printf("Using GSL: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
554

    
555
  /* 
556
     RowMajor : Matrix is read row by row
557
     Upper : the no null elements are on top
558
     NoTrans : no transposition before estimation
559
     NonUnit : Matrix is not unit
560
   */
561

    
562
  for (i=0;i<RUNS;i++)
563
    {  
564

    
565
#ifdef FP64
566

    
567
      printVector(dim,X,"X","Roots");
568

    
569
      /* Multiply A by X as Y <- A.X */
570
      cblas_dgemv(CblasRowMajor,CblasNoTrans,
571
                  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
572

    
573
      printVector(dim,Y,"Y","Results");
574

    
575
      /* Solve linear system : Y <- A-1.Y */
576
      cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
577
                  dim,A,dim,Y,incx);
578

    
579
      printVector(dim,Y,"Y","Solutions");
580
      
581
      cblas_daxpy(dim,beta2,Y,incx,X,incx);
582

    
583
      printVector(dim,X,"X","Errors");
584

    
585
      /* Store the checker of errors */
586
      checksA[i]=(double)cblas_dnrm2(dim,X,incx);
587

    
588
      cblas_dswap(dim,X,incx,Y,incx);
589
      
590
#else
591

    
592
      printVector(dim,X,"X","Roots");
593

    
594
      /* Multiply A by X as Y <- A.X */
595
      cblas_sgemv(CblasRowMajor,CblasNoTrans,
596
                  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
597

    
598
      printVector(dim,Y,"Y","Results");
599

    
600
      /* Solve linear system : Y <- A-1.Y */
601
      cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
602
                  dim,A,dim,Y,incx);
603

    
604
      printVector(dim,Y,"Y","Solutions");
605
      
606
      cblas_saxpy(dim,beta2,Y,incx,X,incx);
607

    
608
      printVector(dim,X,"X","Errors");
609

    
610
      /* Store the checker of errors */
611
      checksA[i]=(double)cblas_snrm2(dim,X,incx);
612

    
613
      cblas_sswap(dim,X,incx,Y,incx);
614
      
615
#endif
616
      
617
    }
618
#else
619

    
620
  printf("Using CBLAS: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
621

    
622
  /* 
623
     RowMajor : Matrix is read row bu row
624
     Upper : the no null elements are on top
625
     NoTrans : no transposition before estimation
626
     NonUnit : Matrix is not unit
627
   */
628

    
629
  for (i=0;i<RUNS;i++)
630
    {  
631

    
632
#ifdef FP64
633

    
634
      printVector(dim,X,"X","Roots");
635

    
636
      /* Multiply A by X as Y <- A.X */
637
      cblas_dgemv(CblasRowMajor,CblasNoTrans,
638
                  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
639

    
640
      printVector(dim,Y,"Y","Results");
641

    
642
      /* Solve linear system : Y <- A-1.Y */
643
      cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
644
                  dim,A,dim,Y,incx);
645

    
646
      printVector(dim,Y,"Y","Solutions");
647
      
648
      cblas_daxpy(dim,beta2,Y,incx,X,incx);
649

    
650
      printVector(dim,X,"X","Errors");
651

    
652
      /* Store the checker of errors */
653
      checksA[i]=(double)cblas_dnrm2(dim,X,incx);
654

    
655
      cblas_dswap(dim,X,incx,Y,incx);
656
      
657
#else
658

    
659
      printVector(dim,X,"X","Roots");
660

    
661
      /* Multiply A by X as Y <- A.X */
662
      cblas_sgemv(CblasRowMajor,CblasNoTrans,
663
                  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
664

    
665
      printVector(dim,Y,"Y","Results");
666

    
667
      /* Solve linear system : Y <- A-1.Y */
668
      cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
669
                  dim,A,dim,Y,incx);
670

    
671
      printVector(dim,Y,"Y","Solutions");
672
      
673
      cblas_saxpy(dim,beta2,Y,incx,X,incx);
674

    
675
      printVector(dim,X,"X","Errors");
676

    
677
      /* Store the checker of errors */
678
      checksA[i]=(double)cblas_snrm2(dim,X,incx);
679
      
680
      cblas_sswap(dim,X,incx,Y,incx);
681
      
682
#endif
683

    
684
    }
685
#endif
686
  putchar('\n');
687

    
688
  /* Get second timer after launching */
689
  gettimeofday(&tv2, &tz);
690

    
691
#ifdef CUBLAS
692
  double memoryIn,memoryOut;
693

    
694
  memoryIn=(double)((tv3.tv_sec-tv1.tv_sec) * 1000000L +        \
695
                    (tv3.tv_usec-tv1.tv_usec))/1000000.;  
696

    
697
  memoryOut=(double)((tv2.tv_sec-tv4.tv_sec) * 1000000L +        \
698
                    (tv2.tv_usec-tv4.tv_usec))/1000000.;  
699

    
700
  duration=(double)((tv4.tv_sec-tv3.tv_sec) * 1000000L +        \
701
                    (tv4.tv_usec-tv3.tv_usec))/1000000./RUNS;  
702

    
703
  printf("Duration of memory allocation : %2.10f s\n",memoryIn);
704
  printf("Duration of memory free : %2.10f s\n",memoryOut);
705
#else
706
  duration=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +        \
707
                    (tv2.tv_usec-tv1.tv_usec))/1000000./RUNS;  
708

    
709
#endif
710

    
711
  printf("Duration of each cycle : %2.10f s\n",duration);
712

    
713
  printResults(RUNS,checksA,"C","Errors cumulated");
714

    
715
  putchar('\n');
716

    
717
  /*
718
#ifdef PRINT
719
  for (i=0; i<dim; i++) {
720
    for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]);
721
    putchar('\n');
722
  }
723

724
  for (i=0; i<dim; i++) {
725
    printf("X[%i]=%2.5f",i,X[i]);
726
    putchar('\n');
727
  }
728
  putchar('\n');
729
  for (i=0; i<dim; i++) {
730
    printf("Y[%i]=%2.5f",i,Y[i]);
731
    putchar('\n');
732
  }
733
#endif
734
  */
735

    
736
  return 0;
737
}
738

    
739
int main(int argc,char **argv)
740
{
741
  if ((argc==1)||
742
      (strcmp(argv[1],"-h")==0)||
743
      (strcmp(argv[1],"--help")==0))
744
    {
745
      printf("\nPerforms a bench using BLAS library implementation:\n\n"
746
             "\t#1 Size on triangular system\n"
747
             "\t#2 Number of iterations\n\n");
748
    }
749
  else if ((atoi(argv[1])>=2)&&
750
           (atoi(argv[2])>=1))
751
    {
752
      bench(atoi(argv[1]),atoi(argv[2]));
753
    }
754

    
755
  return 0;
756
}