Révision 1

BLAS/xTRSV/patch_thunking.h (revision 1)
1
41c41
2
< #define CUBLAS_FORTRAN_COMPILER CUBLAS_G95
3
---
4
> #define CUBLAS_FORTRAN_COMPILER CUBLAS_INTEL_FORTRAN
BLAS/xTRSV/Makefile (revision 1)
1
SOURCE=xTRSV.c
2

  
3
CC=gcc
4
CFLAGS=-Wall -O3
5
LDFLAGS=-lm
6
CUDADIR=/opt/cuda
7
CUDASRC=$(CUDADIR)/src
8
THUNKING=fortran_thunking.c
9
CUDASRCINC=fortran_common.h
10
CUDAINC=$(CUDADIR)/include
11
CUDALIB=$(CUDADIR)/lib64
12

  
13
PATCHTHUNKING=patch_thunking.h
14

  
15
GSLINC=/usr/include/gsl
16

  
17
GOTO2=/opt/GotoBLAS2
18

  
19
ACML=/opt/acml
20
ACMLINC=$(ACML)/gfortran64_mp/include
21
ACMLLIB=$(ACML)/gfortran64_mp/lib
22

  
23
EXECUTABLE=cblas fblas gsl cublas thunking gotoblas acml
24

  
25
#FORMAT=DOUBLE
26
FORMAT=FLOAT
27

  
28
#DIRECTIVES=-D$(FORMAT) -DPRINT -DUNIT
29
#DIRECTIVES=-D$(FORMAT) -DUNIT -DRESULTS -DQUIET
30
DIRECTIVES=-DQUIET -DUNIT
31

  
32
all: $(EXECUTABLE)
33

  
34
cblas: $(SOURCE)
35

  
36
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DCBLAS $(LDFLAGS) \
37
		$(SOURCE) -lcblas -o $(SOURCE:.c=)_SP_$@
38

  
39
	$(CC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DCBLAS $(LDFLAGS) \
40
		$(SOURCE) -lcblas -o $(SOURCE:.c=)_DP_$@
41

  
42
gotoblas: $(SOURCE)
43

  
44
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DCBLAS $(LDFLAGS) \
45
		$(SOURCE) $(GOTO2)/libgoto2.a -lpthread -o $(SOURCE:.c=)_SP_$@
46

  
47
	$(CC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DCBLAS $(LDFLAGS) \
48
		$(SOURCE) $(GOTO2)/libgoto2.a -lpthread -o $(SOURCE:.c=)_DP_$@
49

  
50
acml: $(SOURCE)
51

  
52
	$(CC) -I$(ACMLINC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DACML $(LDFLAGS) \
53
		$(SOURCE) -L$(ACMLLIB) -lacml_mp -lacml_mv \
54
		-lgomp -lgfortran -lpthread -o $(SOURCE:.c=)_SP_$@
55

  
56
	$(CC) -I$(ACMLINC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DACML $(LDFLAGS) \
57
		$(SOURCE) -L$(ACMLLIB) -lacml_mp -lacml_mv \
58
		-lgomp -lgfortran -lpthread -o $(SOURCE:.c=)_DP_$@
59

  
60
fblas: $(SOURCE)
61

  
62
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DFBLAS $(LDFLAGS) \
63
		$(SOURCE) -lf77blas -o $(SOURCE:.c=)_SP_$@
64

  
65
	$(CC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DFBLAS $(LDFLAGS) \
66
		$(SOURCE) -lf77blas -o $(SOURCE:.c=)_DP_$@
67

  
68

  
69
gsl: $(SOURCE)
70

  
71
	$(CC) -I$(GSLINC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DGSL $(LDFLAGS) \
72
		$(SOURCE) -lgslcblas -o $(SOURCE:.c=)_SP_$@
73

  
74
	$(CC) -I$(GSLINC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DGSL $(LDFLAGS) \
75
		$(SOURCE) -lgslcblas -o $(SOURCE:.c=)_DP_$@
76

  
77

  
78
cublas: $(SOURCE)
79

  
80
	$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DCUBLAS -DFLOAT $(LDFLAGS) \
81
		$(DIRECTIVES) $(SOURCE) -lcublas -o $(SOURCE:.c=)_SP_$@
82

  
83
	$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DCUBLAS -DDOUBLE $(LDFLAGS) \
84
		$(DIRECTIVES) $(SOURCE) -lcublas -o $(SOURCE:.c=)_DP_$@
85

  
86
thunking: $(SOURCE)
87

  
88
# Copy of source for thunking CUBLAS approach
89
	cp $(CUDASRC)/$(THUNKING) $(CUDASRC)/$(THUNKING:.c=.h) \
90
		$(CUDASRC)/$(CUDASRCINC) .
91
# Patch Thunking prototypes to compile on Debian Lenny
92
	patch $(CUDASRCINC) $(PATCHTHUNKING)
93
	$(CC) -I$(CUDAINC) $(CFLAGS) -c $(THUNKING)
94

  
95
	$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DTHUNKING \
96
		$(LDFLAGS) $(DIRECTIVES) $(SOURCE) -DFLOAT \
97
		$(THUNKING:.c=.o) -lcublas -o $(SOURCE:.c=)_SP_$@
98

  
99
	$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DTHUNKING \
100
		$(LDFLAGS) $(DIRECTIVES) $(SOURCE) -DDOUBLE \
101
		$(THUNKING:.c=.o) -lcublas -o $(SOURCE:.c=)_DP_$@
102

  
103
clean: $(SOURCE)
104
	find . -name "$(SOURCE:.c=)_*" -exec rm {} \;
105
	find . -name "$(THUNKING:.c=)*" -exec rm {} \;
106
	find . -name "*~" -exec rm {} \;
107
	find . -name "$(CUDASRCINC)" -exec rm {} \;
BLAS/xTRSV/xTRSV.c (revision 1)
1
/* 
2
   Performs a linear system solving of random generated system
3
   Estimates a test
4

  
5
   Matrix is triangular
6
   
7
   Thanks for help from aurel32@debian.org
8
*/
9

  
10
#include <stdio.h>
11
#include <math.h>
12
#include <stdlib.h>
13
#include <sys/time.h>
14
#include <string.h>
15

  
16
#ifdef CUBLAS
17
#include <cublas.h>
18
#define CUBLAS_WRAPPER_ERROR_NOERR      0
19
#define CUBLAS_WRAPPER_ERROR_ALLOC      1
20
#define CUBLAS_WRAPPER_ERROR_SET        2
21
#define CUBLAS_WRAPPER_ERROR_GET        3
22
#define CUBLAS_WRAPPER_ERROR_STUB       4
23
#elif THUNKING
24
#include <cublas.h>
25
#elif FBLAS
26
#include <cblas_f77.h>
27
#elif GSL
28
#include <gsl_cblas.h>
29
#elif ACML
30
#include <acml.h>
31
#include <acml_mv.h>
32
#else
33
#include <cblas.h>
34
#endif
35

  
36
#ifdef DOUBLE
37
#define LENGTH double
38
#else
39
#define LENGTH float
40
#endif
41

  
42
#ifdef THUNKING
43
/* WARNING !
44
Prototypes from fortran.c functions used MUST be defined here !
45
*/
46
#include "fortran_thunking.h"
47

  
48
/*
49
#ifdef DOUBLE
50

  
51
void CUBLAS_DCOPY (const int *n, const double *x, const int *incx, double *y,
52
                   const int *incy);
53

  
54
double CUBLAS_DNRM2 (const int *dim, const double *X, const int *incx);
55

  
56
void CUBLAS_DTRSV (const char *uplo, const char *trans, const char *diag,
57
                   const int *n, const double *A, const int *lda, double *x,
58
                   const int *incx);
59

  
60
void CUBLAS_DGEMV (const char *trans, const int *m, const int *n,
61
                   const double *alpha, const double *A, const int *lda,
62
                   const double *x, const int *incx, const double *beta,
63
                   double *y, const int *incy);
64

  
65
void CUBLAS_DSWAP (const int *n, double *x, const int *incx, double *y,
66
                   const int *incy);
67

  
68
void CUBLAS_DAXPY (const int *n, const double *alpha, const double *x, 
69
                   const int *incx, double *y, const int *incy);
70

  
71
#else
72
void CUBLAS_SCOPY (const int *n, const float *x, const int *incx, float *y,
73
                   const int *incy);
74

  
75
float CUBLAS_SNRM2 (const int *dim, const float *X, const int *incx);
76

  
77
void CUBLAS_STRSV (const char *uplo, const char *trans, const char *diag,
78
                   const int *n, const float *A, const int *lda, float *x,
79
                   const int *incx);
80

  
81
void CUBLAS_SGEMV (const char *trans, const int *m, const int *n,
82
                   const float *alpha, const float *A, const int *lda,
83
                   const float *x, const int *incx, const float *beta,
84
                   float *y, const int *incy);
85

  
86
void CUBLAS_SSWAP (const int *n, float *x, const int *incx, float *y,
87
                   const int *incy);
88

  
89
void CUBLAS_SAXPY (const int *n, const float *alpha, const float *x, 
90
                   const int *incx, float *y, const int *incy);
91

  
92
#endif
93
*/
94

  
95
#elif FBLAS
96

  
97
#ifdef DOUBLE
98

  
99
void dtrsv_( FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT);
100

  
101
void dgemv_(FCHAR, FINT, FINT, const double *, const double *, FINT, 
102
	       const double *, FINT, const double *, double *, FINT);
103

  
104
void dswap_( FINT, double *, FINT, double *, FINT);
105

  
106
void daxpy_( FINT, const double *, const double *, FINT, double *, FINT);
107

  
108
void dnrm2_( FINT, const double *, FINT, double *);
109

  
110
#else
111

  
112
void strsv_( FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT);
113

  
114
void sgemv_(FCHAR, FINT, FINT, const float *, const float *, FINT, 
115
	       const float *, FINT, const float *, float *, FINT);
116

  
117
void sswap_( FINT, float *, FINT, float *, FINT);
118

  
119
void saxpy_( FINT, const float *, const float *, FINT, float *, FINT);
120

  
121
void snrm2_( FINT, const float *, FINT, float *);
122

  
123
#endif
124

  
125
#endif
126

  
127
/* Matrix with only defined triangular terms */
128
/* Even if there are 0 in matrix, must be defined at all ! */
129

  
130
/* Get from fortran.c */
131

  
132
#ifdef CUBLAS
133
static char *errMsg[5] = 
134
{
135
    "no error",
136
    "allocation error",
137
    "setVector/setMatrix error",
138
    "getVector/getMatrix error",
139
    "not implemented"
140
};
141

  
142
static void wrapperError (const char *funcName, int error)
143
{
144
    printf ("cublas%s wrapper: %s\n", funcName, errMsg[error]);
145
    fflush (stdout);
146
}
147
#endif
148

  
149
int printVector(const int dimVector,const LENGTH *dataVector,
150
		char *nameVector,char *mesgVector)
151
{
152
#ifndef QUIET
153

  
154
  int i;
155
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
156
  for (i=0;i<dimVector;i++)
157
    {
158
      printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
159
    }
160
#endif
161

  
162
  return 0;
163
}
164
  
165
int printResults(const int dimVector,const LENGTH *dataVector,
166
		 char *nameVector,char *mesgVector)
167
{
168
#ifdef RESULTS
169
  int i;
170

  
171
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
172
  for (i=0;i<dimVector;i++)
173
    {
174
      printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
175
    }
176
#endif
177
  return 0;
178
}
179
  
180
#ifdef CUBLAS
181
int printVectorGPU(const int dimVector,const LENGTH *dataVector,
182
		   char *nameVector,char *mesgVector)
183
{
184
#ifndef QUIET
185
  int i;
186
  cublasStatus stat;
187
  LENGTH *P=0;
188
  int incx=1;
189

  
190
  P=malloc(dimVector*sizeof(LENGTH));
191
  
192
  stat=cublasGetVector(dimVector,sizeof(P[0]),dataVector,incx,P,incx);
193

  
194
  if (stat != CUBLAS_STATUS_SUCCESS) {
195
    wrapperError ("ToGet", CUBLAS_WRAPPER_ERROR_GET);
196
  }  
197

  
198
  printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
199
  for (i=0;i<dimVector;i++)
200
    {
201
      printf("%s[%i]=%2.10e\n",nameVector,i,P[i]);
202
    }
203

  
204
  free(P);  
205
#endif
206

  
207
  return 0;
208
}
209
#endif
210

  
211
int bench(int dim,int RUNS)
212
{
213
  /*
214
  int dim=1000;
215
  int RUNS=100;
216
  */
217
  int incx=1;
218
#ifdef PRINT
219
  LENGTH factor=1.;
220
#endif
221

  
222
  LENGTH alpha=1.,beta=0.,beta2=-1.;
223
  LENGTH *A,*X,*Y;
224

  
225
  /* checkBefore checkAfter checks */
226
  LENGTH *checksA,*checksB;
227

  
228
  int i=0, j=0;
229

  
230
  double duration;
231

  
232
  struct timeval tv1,tv2;
233
  struct timezone tz;
234

  
235
  /* Create 1 Matrix and 2 Vectors of dimension dim  */
236

  
237
  A=malloc(dim*dim*sizeof(LENGTH));
238
  X=malloc(dim*sizeof(LENGTH));
239
  Y=malloc(dim*sizeof(LENGTH));
240

  
241
  /* Create 2 vectors for checker Before and After */
242

  
243
  checksA=malloc(RUNS*sizeof(double));
244
  checksB=malloc(RUNS*sizeof(double));
245

  
246
  /* Initialize elements with random numbers */
247
  /* Initialize the seed for rand() */
248
  /* srand(time()); */
249

  
250
#ifdef UNIT
251
  /* Fill the matrix and vector with random numbers */
252
  for (i=0; i<dim; i++) {
253
    for (j=0; j<dim; j++) 
254
      if (j>=i)
255
	{
256
	  /* Normalization is necessary to avoid problems */
257
	  A[i*dim+j]=1.;
258
	}
259
      else
260
	{
261
	   A[i*dim+j]=0.;
262
	}
263
    X[i]=1;
264
  }
265
#else
266
  for (i=0; i<dim; i++) {
267
    for (j=0; j<dim; j++) 
268
      if (j>i)
269
	{
270
	  /* Normalization is necessary to avoid problems */
271
	  A[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.)
272
	    *(LENGTH)(i+1.)/(LENGTH)(j+1.);
273
	}
274
      else if (j==i)
275
	{
276
	   A[i*dim+j]=1.;
277
	}
278
      else
279
	{
280
	   A[i*dim+j]=0.;
281
	}
282
    X[i]=(LENGTH)rand()/(RAND_MAX+1.);
283
  }
284
#endif
285

  
286
  /* Print the matrix */
287

  
288
#ifdef QUIET
289
#else
290
  for (i=0; i<dim; i++) {
291
    for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]);
292
    printf("\tX[%i]=%1.5f ", i,X[i]);
293
    putchar('\n');
294
  }
295
  putchar('\n');
296
#endif
297

  
298
  /* Get first timer before launching */
299
  gettimeofday(&tv1, &tz);
300

  
301
  /* Compute with CuBLAS library  */
302
#ifdef CUBLAS
303
  LENGTH *devPtrA=0, *devPtrX=0, *devPtrY=0;
304
  cublasStatus stat1, stat2, stat3;
305
  struct timeval tv3,tv4;
306

  
307
  /* Order is Row */
308
  /* Have to swap uplo and trans */
309
  char uplo='L',trans='T',diag='N';
310

  
311
  printf("Using CuBLAS: %i iterations for %ix%i matrix\n",
312
	 RUNS,dim,dim);
313

  
314
  stat1=cublasAlloc(dim*dim,sizeof(devPtrA[0]),(void**)&devPtrA);
315
  stat2=cublasAlloc(dim,sizeof(devPtrX[0]),(void**)&devPtrX);
316
  stat3=cublasAlloc(dim,sizeof(devPtrY[0]),(void**)&devPtrY);
317

  
318
  if ((stat1 != CUBLAS_STATUS_SUCCESS) || 
319
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
320
      (stat3 != CUBLAS_STATUS_SUCCESS)) {
321
    wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_ALLOC);
322
    cublasFree (devPtrA);
323
    cublasFree (devPtrX);
324
    cublasFree (devPtrY);
325
    return 1;
326
  }
327

  
328
  stat1=cublasSetMatrix(dim,dim,sizeof(A[0]),A,dim,devPtrA,dim);
329
  stat2=cublasSetVector(dim,sizeof(X[0]),X,incx,devPtrX,incx);
330
  stat3=cublasSetVector(dim,sizeof(Y[0]),Y,incx,devPtrY,incx);
331
  
332
  if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
333
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
334
      (stat3 != CUBLAS_STATUS_SUCCESS)) {
335
    wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_SET);
336
    cublasFree (devPtrA);
337
    cublasFree (devPtrX);
338
    cublasFree (devPtrY);
339
    return 1;
340
  }
341

  
342
  /* Get third timer after memory operation */
343
  gettimeofday(&tv3, &tz);
344

  
345
  for (i=0;i<RUNS;i++)
346
    {
347
#ifdef DOUBLE
348

  
349
      printVectorGPU(dim,devPtrX,"X","Roots");
350

  
351
      /* Multiply Y <- A.X */
352
      cublasDgemv(trans,dim,dim,alpha,devPtrA,dim,
353
		  devPtrX,incx,beta,devPtrY,incx);
354

  
355
      printVectorGPU(dim,devPtrY,"Y","Results");
356

  
357
      /* Solve linear system A.X=Y : Y <- A-1.Y */
358
      cublasDtrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx);
359

  
360
      printVectorGPU(dim,devPtrY,"Y","Solutions");
361

  
362
      /* Estimate the difference between X and Y : Y <- -Y+X */
363
      cublasDaxpy(dim,beta2,devPtrY,incx,devPtrX,incx);
364

  
365
      printVectorGPU(dim,devPtrX,"X","Errors");
366

  
367
      /* Estimate the second checker */
368
/*       checksA[i]=(double)cublasDnrm2(dim,devPtrX,incx); */
369

  
370
      /* Swap vector X and Y */
371
      cublasDswap(dim,devPtrX,incx,devPtrY,incx);
372

  
373
#else
374

  
375
      printVectorGPU(dim,devPtrX,"X","Roots");
376

  
377
      /* Multiply Y <- A.X */
378
      cublasSgemv(trans,dim,dim,alpha,devPtrA,dim,
379
		  devPtrX,incx,beta,devPtrY,incx);
380

  
381
      printVectorGPU(dim,devPtrY,"Y","Results");
382

  
383
      /* Solve linear system Y <- A-1.Y */
384
      cublasStrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx);
385

  
386
      printVectorGPU(dim,devPtrY,"Y","Solutions");
387

  
388
      /* Add vectors X and -Y */
389
      cublasSaxpy(dim,beta2,devPtrY,incx,devPtrX,incx);
390

  
391
      printVectorGPU(dim,devPtrX,"X","Errors");
392

  
393
      /* Estimate the second checker */
394
/*       checksA[i]=(double)cublasSnrm2(dim,devPtrX,incx); */
395

  
396
      /* Swap vector X and Y */
397
      cublasSswap(dim,devPtrX,incx,devPtrY,incx);
398

  
399
#endif
400
  
401
    }
402

  
403
  stat1=cublasGetMatrix(dim,dim,sizeof(A[0]),devPtrA,dim,A,dim);
404
  stat2=cublasGetVector(dim,sizeof(X[0]),devPtrX,incx,X,incx);
405
  stat3=cublasGetVector(dim,sizeof(Y[0]),devPtrY,incx,Y,incx);
406
  
407
  cublasFree (devPtrA);
408
  cublasFree (devPtrX);
409
  cublasFree (devPtrY);
410
  
411
  if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
412
      (stat2 != CUBLAS_STATUS_SUCCESS) ||
413
      (stat3 != CUBLAS_STATUS_SUCCESS)) {
414
    wrapperError ("LinearSystem", CUBLAS_WRAPPER_ERROR_GET);
415
  }
416
  
417
  /* Get fourth timer after memory free */
418
  gettimeofday(&tv4, &tz);
419

  
420
#elif THUNKING
421
  
422
  /* Order is Row : Have to swap uplo='U' and trans='N' */
423
  char uplo='L',trans='T',diag='N';
424
  printf("Using CuBLAS/Thunking: %i iterations for %ix%i matrix\n",
425
	 RUNS,dim,dim);
426

  
427
  for (i=0;i<RUNS;i++)
428
    {
429
#ifdef DOUBLE
430
      
431
      printVector(dim,X,"X","Roots");
432
      
433
      /* Multiply A by X as Y <- A.X */
434
      CUBLAS_DGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
435
      
436
      printVector(dim,Y,"Y","Results");
437

  
438
      /* Solve linear system */
439
      CUBLAS_DTRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
440
      
441
      printVector(dim,Y,"Y","Solutions");
442

  
443
      /* Compare the roots X and Y */
444
      CUBLAS_DAXPY(&dim,&beta2,Y,&incx,X,&incx);
445

  
446
      printVector(dim,X,"X","Errors");
447

  
448
      /* Store the checker of errors */
449
/*       checksA[i]=(double)CUBLAS_DNRM2(&dim,X,&incx); */
450

  
451
      /* Swap vector X and Y */
452
      CUBLAS_DSWAP(&dim,X,&incx,Y,&incx);
453
#else
454

  
455
      printVector(dim,X,"X","Roots");
456
      
457
      /* Multiply A by X as Y <- A.X */
458
      CUBLAS_SGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
459
      
460
      printVector(dim,Y,"Y","Results");
461

  
462
      /* Solve linear system */
463
      CUBLAS_STRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
464
      
465
      printVector(dim,Y,"Y","Solutions");
466

  
467
      /* Compare the roots X and Y */
468
      CUBLAS_SAXPY(&dim,&beta2,Y,&incx,X,&incx);
469

  
470
      printVector(dim,X,"X","Errors");
471

  
472
      /* Store the checker of errors */
473
/*       checksA[i]=(double)CUBLAS_SNRM2(&dim,X,&incx); */
474

  
475
      /* Swap vector X and Y */
476
      CUBLAS_SSWAP(&dim,X,&incx,Y,&incx);
477
#endif
478

  
479
#ifdef PRINT
480
      printf("Iteration %i, checker is %2.5f and error is %2.10f\n",
481
	     i,checksA[i],fabs(checksB[i]-checksA[i])/factor);
482
#endif
483
    }
484

  
485
#elif FBLAS
486
  
487
  /* Order is Row : Have to swap uplo='U' and trans='N' */
488
  char uplo='L',trans='T',diag='N';
489
  
490
  printf("Using FBLAS: %i iterations for %ix%i matrix\n",
491
	 RUNS,dim,dim);
492
  
493
  for (i=0;i<RUNS;i++)
494
    {
495
#ifdef DOUBLE
496
      
497
      printVector(dim,X,"X","Roots");
498
      
499
      /* Multiply A by X as Y <- A.X */
500
      dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
501
      
502
      printVector(dim,Y,"Y","Results");
503
      
504
      /* Solve linear system */
505
      dtrsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
506
      
507
      printVector(dim,Y,"Y","Solutions");
508
      
509
      /* Compare the roots X and Y */
510
      daxpy_(&dim,&beta2,Y,&incx,X,&incx);
511
      
512
      printVector(dim,X,"X","Errors");
513
      
514
      /* Store the checker of errors */
515
/*       dnrm2_(&dim,X,&incx,&checksA[i]); */
516
            
517
      /* Swap vector X and Y */
518
      dswap_(&dim,X,&incx,Y,&incx);
519

  
520
#else
521

  
522
      printVector(dim,X,"X","Roots");
523
      
524
      /* Multiply A by X as Y <- A.X */
525
      sgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
526
      
527
      printVector(dim,Y,"Y","Results");
528

  
529
      /* Solve linear system */
530
      strsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx);
531
      
532
      printVector(dim,Y,"Y","Solutions");
533

  
534
      /* Compare the roots X and Y */
535
      saxpy_(&dim,&beta2,Y,&incx,X,&incx);
536

  
537
      printVector(dim,X,"X","Errors");
538

  
539
      /* Store the checker of errors */
540
/*       snrm2_(&dim,X,&incx,&checksA[i]); */
541

  
542
      /* Swap vector X and Y */
543
      sswap_(&dim,X,&incx,Y,&incx);
544
#endif
545

  
546
    }
547

  
548
#elif ACML
549
  
550
  /* Order is Row : Have to swap uplo='U' and trans='N' */
551
  char uplo='L',trans='T',diag='N';
552
  
553
  printf("Using ACML: %i iterations for %ix%i matrix\n",
554
	 RUNS,dim,dim);
555
  
556
  for (i=0;i<RUNS;i++)
557
    {
558
#ifdef DOUBLE
559
      
560
      printVector(dim,X,"X","Roots");
561
      
562
      /* Multiply A by X as Y <- A.X */
563
      dgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
564
      
565
      printVector(dim,Y,"Y","Results");
566
      
567
      /* Solve linear system */
568
      dtrsv(uplo,trans,diag,dim,A,dim,Y,incx);
569
      
570
      printVector(dim,Y,"Y","Solutions");
571
      
572
      /* Compare the roots X and Y */
573
      daxpy(dim,beta2,Y,incx,X,incx);
574
      
575
      printVector(dim,X,"X","Errors");
576
      
577
      /* Store the checker of errors */
578
/*       dnrm2_(&dim,X,&incx,&checksA[i]); */
579
            
580
      /* Swap vector X and Y */
581
      dswap(dim,X,incx,Y,incx);
582

  
583
#else
584

  
585
      printVector(dim,X,"X","Roots");
586
      
587
      /* Multiply A by X as Y <- A.X */
588
      sgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
589
      
590
      printVector(dim,Y,"Y","Results");
591

  
592
      /* Solve linear system */
593
      strsv(uplo,trans,diag,dim,A,dim,Y,incx);
594
      
595
      printVector(dim,Y,"Y","Solutions");
596

  
597
      /* Compare the roots X and Y */
598
      saxpy(dim,beta2,Y,incx,X,incx);
599

  
600
      printVector(dim,X,"X","Errors");
601

  
602
      /* Store the checker of errors */
603
/*       snrm2_(&dim,X,&incx,&checksA[i]); */
604

  
605
      /* Swap vector X and Y */
606
      sswap(dim,X,incx,Y,incx);
607
#endif
608

  
609
    }
610

  
611
#elif GSL
612

  
613
  printf("Using GSL: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
614

  
615
  /* 
616
     RowMajor : Matrix is read row by row
617
     Upper : the no null elements are on top
618
     NoTrans : no transposition before estimation
619
     NonUnit : Matrix is not unit
620
   */
621

  
622
  for (i=0;i<RUNS;i++)
623
    {  
624

  
625
#ifdef DOUBLE
626

  
627
      printVector(dim,X,"X","Roots");
628

  
629
      /* Multiply A by X as Y <- A.X */
630
      cblas_dgemv(CblasRowMajor,CblasNoTrans,
631
		  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
632

  
633
      printVector(dim,Y,"Y","Results");
634

  
635
      /* Solve linear system : Y <- A-1.Y */
636
      cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
637
		  dim,A,dim,Y,incx);
638

  
639
      printVector(dim,Y,"Y","Solutions");
640
      
641
      cblas_daxpy(dim,beta2,Y,incx,X,incx);
642

  
643
      printVector(dim,X,"X","Errors");
644

  
645
      /* Store the checker of errors */
646
/*       checksA[i]=(double)cblas_dnrm2(dim,X,incx); */
647

  
648
      cblas_dswap(dim,X,incx,Y,incx);
649
      
650
#else
651

  
652
      printVector(dim,X,"X","Roots");
653

  
654
      /* Multiply A by X as Y <- A.X */
655
      cblas_sgemv(CblasRowMajor,CblasNoTrans,
656
		  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
657

  
658
      printVector(dim,Y,"Y","Results");
659

  
660
      /* Solve linear system : Y <- A-1.Y */
661
      cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
662
		  dim,A,dim,Y,incx);
663

  
664
      printVector(dim,Y,"Y","Solutions");
665
      
666
      cblas_saxpy(dim,beta2,Y,incx,X,incx);
667

  
668
      printVector(dim,X,"X","Errors");
669

  
670
      /* Store the checker of errors */
671
/*       checksA[i]=(double)cblas_snrm2(dim,X,incx); */
672

  
673
      cblas_sswap(dim,X,incx,Y,incx);
674
      
675
#endif
676
      
677
    }
678
#else
679

  
680
  printf("Using CBLAS: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
681

  
682
  /* 
683
     RowMajor : Matrix is read row bu row
684
     Upper : the no null elements are on top
685
     NoTrans : no transposition before estimation
686
     NonUnit : Matrix is not unit
687
   */
688

  
689
  for (i=0;i<RUNS;i++)
690
    {  
691

  
692
#ifdef DOUBLE
693

  
694
      printVector(dim,X,"X","Roots");
695

  
696
      /* Multiply A by X as Y <- A.X */
697
      cblas_dgemv(CblasRowMajor,CblasNoTrans,
698
		  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
699

  
700
      printVector(dim,Y,"Y","Results");
701

  
702
      /* Solve linear system : Y <- A-1.Y */
703
      cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
704
		  dim,A,dim,Y,incx);
705

  
706
      printVector(dim,Y,"Y","Solutions");
707
      
708
      cblas_daxpy(dim,beta2,Y,incx,X,incx);
709

  
710
      printVector(dim,X,"X","Errors");
711

  
712
      /* Store the checker of errors */
713
/*       checksA[i]=(double)cblas_dnrm2(dim,X,incx); */
714

  
715
      cblas_dswap(dim,X,incx,Y,incx);
716
      
717
#else
718

  
719
      printVector(dim,X,"X","Roots");
720

  
721
      /* Multiply A by X as Y <- A.X */
722
      cblas_sgemv(CblasRowMajor,CblasNoTrans,
723
		  dim,dim,alpha,A,dim,X,incx,beta,Y,incx);
724

  
725
      printVector(dim,Y,"Y","Results");
726

  
727
      /* Solve linear system : Y <- A-1.Y */
728
      cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit,
729
		  dim,A,dim,Y,incx);
730

  
731
      printVector(dim,Y,"Y","Solutions");
732
      
733
      cblas_saxpy(dim,beta2,Y,incx,X,incx);
734

  
735
      printVector(dim,X,"X","Errors");
736

  
737
      /* Store the checker of errors */
738
/*       checksA[i]=(double)cblas_snrm2(dim,X,incx); */
739

  
740
      cblas_sswap(dim,X,incx,Y,incx);
741
      
742
#endif
743

  
744
    }
745
#endif
746
  putchar('\n');
747

  
748
  /* Get second timer after launching */
749
  gettimeofday(&tv2, &tz);
750

  
751
#ifdef CUBLAS
752
  double memoryIn,memoryOut;
753

  
754
  memoryIn=(double)((tv3.tv_sec-tv1.tv_sec) * 1000000L +	\
755
		    (tv3.tv_usec-tv1.tv_usec))/1000000.;  
756

  
757
  memoryOut=(double)((tv2.tv_sec-tv4.tv_sec) * 1000000L +	\
758
		    (tv2.tv_usec-tv4.tv_usec))/1000000.;  
759

  
760
  duration=(double)((tv4.tv_sec-tv3.tv_sec) * 1000000L +	\
761
		    (tv4.tv_usec-tv3.tv_usec))/1000000./RUNS;  
762

  
763
  printf("Duration of memory allocation : %2.10f s\n",memoryIn);
764
  printf("Duration of memory free : %2.10f s\n",memoryOut);
765
#else
766
  duration=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L +	\
767
		    (tv2.tv_usec-tv1.tv_usec))/1000000./RUNS;  
768

  
769
#endif
770

  
771
  printf("Duration of each cycle : %2.10f s\n",duration);
772

  
773
  printResults(RUNS,checksA,"C","Errors cumulated");
774

  
775
  putchar('\n');
776

  
777
  /*
778
#ifdef PRINT
779
  for (i=0; i<dim; i++) {
780
    for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]);
781
    putchar('\n');
782
  }
783

  
784
  for (i=0; i<dim; i++) {
785
    printf("X[%i]=%2.5f",i,X[i]);
786
    putchar('\n');
787
  }
788
  putchar('\n');
789
  for (i=0; i<dim; i++) {
790
    printf("Y[%i]=%2.5f",i,Y[i]);
791
    putchar('\n');
792
  }
793
#endif
794
  */
795

  
796
  return 0;
797
}
798

  
799
int main(int argc,char **argv)
800
{
801
  if ((argc==1)||
802
      (strcmp(argv[1],"-h")==0)||
803
      (strcmp(argv[1],"--help")==0))
804
    {
805
      printf("\nPerforms a bench using BLAS library implementation:\n\n"
806
	     "\t#1 Size on triangular system\n"
807
	     "\t#2 Number of iterations\n\n");
808
    }
809
  else if ((atoi(argv[1])>=2)&&
810
	   (atoi(argv[2])>=1))
811
    {
812
      bench(atoi(argv[1]),atoi(argv[2]));
813
    }
814

  
815
  return 0;
816
}
BLAS/xTRSV/bench.sh (revision 1)
1
#!/bin/sh
2

  
3
BENCH=lesson11
4

  
5
NUMBER=100
6

  
7
SIZE=1000
8

  
9
MAX=32000
10

  
11
FORMAT=DP
12

  
13
OUT_CBLAS=/tmp/${BENCH}_${FORMAT}_cblas.out
14
OUT_FBLAS=/tmp/${BENCH}_${FORMAT}_fblas.out
15
OUT_GSL=/tmp/${BENCH}_${FORMAT}_gsl.out
16
OUT_THUNKING=/tmp/${BENCH}_${FORMAT}_thunking.out
17
OUT_CUBLAS=/tmp/${BENCH}_${FORMAT}_cublas.out
18

  
19
echo > $OUT_CBLAS
20
echo > $OUT_FBLAS
21
echo > $OUT_GSL
22
echo > $OUT_THUNKING
23
echo > $OUT_CUBLAS
24

  
25
while [ $SIZE -le $MAX ]
26
do
27

  
28
    CBLAS=$(./${BENCH}_cblas $SIZE $NUMBER | grep Duration | awk -F: '{ print $2 }' | awk '{ print  $1 }')
29
    
30
    FBLAS=$(./${BENCH}_fblas $SIZE $NUMBER | grep Duration | awk -F: '{ print $2 }' | awk '{ print  $1 }')
31

  
32
    GSL=$(./${BENCH}_gsl $SIZE $NUMBER | grep Duration | awk -F: '{ print $2 }' | awk '{ print  $1 }')
33
    
34
    THUNKING=$(./${BENCH}_thunking $SIZE $NUMBER | grep Duration | awk -F: '{ print $2 }' | awk '{ print  $1 }')
35
    
36
    CUBLAS=$(./${BENCH}_cublas $SIZE $NUMBER | grep Duration | awk -F: '{ print $2 }' | tr "\n" " " | awk '{ print  $5"\t"$1"\t"$3 }')
37
    
38
    echo -e $SIZE"\t"$CBLAS >> $OUT_CBLAS
39
    echo -e $SIZE"\t"$FBLAS >> $OUT_FBLAS
40
    echo -e $SIZE"\t"$GSL >> $OUT_GSL
41
    echo -e $SIZE"\t"$THUNKING >> $OUT_THUNKING
42
    echo -e $SIZE"\t"$CUBLAS >> $OUT_CUBLAS
43

  
44
    SIZE=$(($SIZE+1000))
45
    
46
done
0 47

  
BLAS/xTRSV/fortran_common.h.orig (revision 1)
1
/*
2
 * Copyright 1993-2011 NVIDIA Corporation.  All rights reserved.
3
 *
4
 * NOTICE TO LICENSEE:
5
 *
6
 * This source code and/or documentation ("Licensed Deliverables") are
7
 * subject to NVIDIA intellectual property rights under U.S. and
8
 * international Copyright laws.
9
 *
10
 * These Licensed Deliverables contained herein is PROPRIETARY and
11
 * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
 * conditions of a form of NVIDIA software license agreement by and
13
 * between NVIDIA and Licensee ("License Agreement") or electronically
14
 * accepted by Licensee.  Notwithstanding any terms or conditions to
15
 * the contrary in the License Agreement, reproduction or disclosure
16
 * of the Licensed Deliverables to any third party without the express
17
 * written consent of NVIDIA is prohibited.
18
 *
19
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
22
 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
 * OF THESE LICENSED DELIVERABLES.
33
 *
34
 * U.S. Government End Users.  These Licensed Deliverables are a
35
 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
 * 1995), consisting of "commercial computer software" and "commercial
37
 * computer software documentation" as such terms are used in 48
38
 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
 * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
40
 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
 * U.S. Government End Users acquire the Licensed Deliverables with
42
 * only those rights set forth herein.
43
 *
44
 * Any use of the Licensed Deliverables in individual and commercial
45
 * software must include, in the user documentation and internal
46
 * comments to the code, the above Disclaimer and U.S. Government End
47
 * Users Notice.
48
 */
49
 
50
#define CUBLAS_G77              1
51
#define CUBLAS_INTEL_FORTRAN    2
52
#define CUBLAS_G95              3
53

  
54
/* Default to g77 on Linux, and Intel Fortran on Win32 */
55
#if defined(_WIN32)
56
#define CUBLAS_FORTRAN_COMPILER CUBLAS_INTEL_FORTRAN
57
#elif defined(__linux)
58
#define CUBLAS_FORTRAN_COMPILER CUBLAS_G95
59
#elif defined(__APPLE__)
60
#define CUBLAS_FORTRAN_COMPILER CUBLAS_G95
61
#define RETURN_COMPLEX   1
62
#else
63
#error unsupported platform
64
#endif
65

  
66
#if (CUBLAS_FORTRAN_COMPILER==CUBLAS_G77) || (CUBLAS_FORTRAN_COMPILER==CUBLAS_G95)
67
/* NOTE: Must use -fno-second-underscore when building Fortran source with g77
68
 *       g77 invocation may not use -fno-f2c, which forces different return 
69
 *       type conventions than the one used below
70
 */
71
#define CUBLAS_INIT             cublas_init_
72
#define CUBLAS_SHUTDOWN         cublas_shutdown_
73
#define CUBLAS_ALLOC            cublas_alloc_
74
#define CUBLAS_FREE             cublas_free_
75
#define CUBLAS_SET_VECTOR       cublas_set_vector_
76
#define CUBLAS_GET_VECTOR       cublas_get_vector_
77
#define CUBLAS_SET_MATRIX       cublas_set_matrix_
78
#define CUBLAS_GET_MATRIX       cublas_get_matrix_
79
#define CUBLAS_GET_ERROR        cublas_get_error_
80
#define CUBLAS_XERBLA           cublas_xerbla_
81
#define CUBLAS_ISAMAX           cublas_isamax_
82
#define CUBLAS_ISAMIN           cublas_isamin_
83
#define CUBLAS_SASUM            cublas_sasum_
84
#define CUBLAS_SAXPY            cublas_saxpy_
85
#define CUBLAS_SCOPY            cublas_scopy_
86
#define CUBLAS_SDOT             cublas_sdot_
87
#define CUBLAS_SNRM2            cublas_snrm2_
88
#define CUBLAS_SROT             cublas_srot_
89
#define CUBLAS_SROTG            cublas_srotg_
90
#define CUBLAS_SROTM            cublas_srotm_
91
#define CUBLAS_SROTMG           cublas_srotmg_
92
#define CUBLAS_SSCAL            cublas_sscal_
93
#define CUBLAS_SSWAP            cublas_sswap_
94
#define CUBLAS_CAXPY            cublas_caxpy_
95
#define CUBLAS_CCOPY            cublas_ccopy_
96
#define CUBLAS_CROT             cublas_crot_
97
#define CUBLAS_CROTG            cublas_crotg_
98
#define CUBLAS_CSCAL            cublas_cscal_
99
#define CUBLAS_CSROT            cublas_csrot_
100
#define CUBLAS_CSSCAL           cublas_csscal_
101
#define CUBLAS_CSWAP            cublas_cswap_
102
#define CUBLAS_CTRMV            cublas_ctrmv_
103
#define CUBLAS_CDOTU            cublas_cdotu_
104
#define CUBLAS_CDOTC            cublas_cdotc_
105
#define CUBLAS_ICAMAX           cublas_icamax_
106
#define CUBLAS_SCASUM           cublas_scasum_
107
#define CUBLAS_SCNRM2           cublas_scnrm2_
108
#define CUBLAS_SGBMV            cublas_sgbmv_
109
#define CUBLAS_SGEMV            cublas_sgemv_
110
#define CUBLAS_SGER             cublas_sger_
111
#define CUBLAS_SSBMV            cublas_ssbmv_
112
#define CUBLAS_SSPMV            cublas_sspmv_
113
#define CUBLAS_SSPR             cublas_sspr_
114
#define CUBLAS_SSPR2            cublas_sspr2_
115
#define CUBLAS_SSYMV            cublas_ssymv_
116
#define CUBLAS_SSYR             cublas_ssyr_
117
#define CUBLAS_SSYR2            cublas_ssyr2_
118
#define CUBLAS_STBMV            cublas_stbmv_
119
#define CUBLAS_STBSV            cublas_stbsv_
120
#define CUBLAS_STPMV            cublas_stpmv_
121
#define CUBLAS_STPSV            cublas_stpsv_
122
#define CUBLAS_STRMV            cublas_strmv_
123
#define CUBLAS_STRSV            cublas_strsv_
124
#define CUBLAS_SGEMM            cublas_sgemm_
125
#define CUBLAS_SSYMM            cublas_ssymm_
126
#define CUBLAS_SSYR2K           cublas_ssyr2k_
127
#define CUBLAS_SSYRK            cublas_ssyrk_
128
#define CUBLAS_STRMM            cublas_strmm_
129
#define CUBLAS_STRSM            cublas_strsm_
130
#define CUBLAS_CGEMM            cublas_cgemm_
131
#define CUBLAS_CHEMM            cublas_chemm_
132
#define CUBLAS_CSYMM            cublas_csymm_
133
#define CUBLAS_CTRMM            cublas_ctrmm_
134
#define CUBLAS_CTRSM            cublas_ctrsm_
135
#define CUBLAS_CHERK            cublas_cherk_
136
#define CUBLAS_CSYRK            cublas_csyrk_
137
#define CUBLAS_CHER2K           cublas_cher2k_
138
#define CUBLAS_CSYR2K           cublas_csyr2k_
139
#define CUBLAS_IDAMAX           cublas_idamax_
140
#define CUBLAS_IDAMIN           cublas_idamin_
141
#define CUBLAS_DASUM            cublas_dasum_
142
#define CUBLAS_DAXPY            cublas_daxpy_
143
#define CUBLAS_DCOPY            cublas_dcopy_
144
#define CUBLAS_DDOT             cublas_ddot_
145
#define CUBLAS_DNRM2            cublas_dnrm2_
146
#define CUBLAS_DROT             cublas_drot_
147
#define CUBLAS_DROTG            cublas_drotg_
148
#define CUBLAS_DROTM            cublas_drotm_
149
#define CUBLAS_DROTMG           cublas_drotmg_
150
#define CUBLAS_DSCAL            cublas_dscal_
151
#define CUBLAS_DSWAP            cublas_dswap_
152
#define CUBLAS_ZAXPY            cublas_zaxpy_
153
#define CUBLAS_ZCOPY            cublas_zcopy_
154
#define CUBLAS_ZROT             cublas_zrot_
155
#define CUBLAS_ZROTG            cublas_zrotg_
156
#define CUBLAS_ZSCAL            cublas_zscal_
157
#define CUBLAS_ZDROT            cublas_zdrot_
158
#define CUBLAS_ZDSCAL           cublas_zdscal_
159
#define CUBLAS_ZSWAP            cublas_zswap_
160
#define CUBLAS_ZDOTU            cublas_zdotu_
161
#define CUBLAS_ZDOTC            cublas_zdotc_
162
#define CUBLAS_IZAMAX           cublas_izamax_
163
#define CUBLAS_DZASUM           cublas_dzasum_
164
#define CUBLAS_DZNRM2           cublas_dznrm2_
165
#define CUBLAS_DGBMV            cublas_dgbmv_
166
#define CUBLAS_DGEMV            cublas_dgemv_
167
#define CUBLAS_ZGEMV            cublas_zgemv_
168
#define CUBLAS_DGER             cublas_dger_
169
#define CUBLAS_DSBMV            cublas_dsbmv_
170
#define CUBLAS_DSPMV            cublas_dspmv_
171
#define CUBLAS_DSPR             cublas_dspr_
172
#define CUBLAS_DSPR2            cublas_dspr2_
173
#define CUBLAS_DSYMV            cublas_dsymv_
174
#define CUBLAS_DSYR             cublas_dsyr_
175
#define CUBLAS_DSYR2            cublas_dsyr2_
176
#define CUBLAS_DTBMV            cublas_dtbmv_
177
#define CUBLAS_DTBSV            cublas_dtbsv_
178
#define CUBLAS_DTPMV            cublas_dtpmv_
179
#define CUBLAS_DTPSV            cublas_dtpsv_
180
#define CUBLAS_DTRMV            cublas_dtrmv_
181
#define CUBLAS_DTRSV            cublas_dtrsv_
182
#define CUBLAS_DGEMM            cublas_dgemm_
183
#define CUBLAS_DSYMM            cublas_dsymm_
184
#define CUBLAS_DSYR2K           cublas_dsyr2k_
185
#define CUBLAS_DSYRK            cublas_dsyrk_
186
#define CUBLAS_ZSYRK            cublas_zsyrk_
187
#define CUBLAS_DTRMM            cublas_dtrmm_
188
#define CUBLAS_DTRSM            cublas_dtrsm_
189
#define CUBLAS_ZGEMM            cublas_zgemm_
190
#define CUBLAS_ZHEMM            cublas_zhemm_
191
#define CUBLAS_ZSYMM            cublas_zsymm_
192
#define CUBLAS_ZTRMM            cublas_ztrmm_
193
#define CUBLAS_ZTRSM            cublas_ztrsm_
194
#define CUBLAS_ZHERK            cublas_zherk_
195
#define CUBLAS_ZSYRK            cublas_zsyrk_
196
#define CUBLAS_ZHER2K           cublas_zher2k_
197
#define CUBLAS_ZSYR2K           cublas_zsyr2k_
198

  
199
#define  CUBLAS_CGEMV           cublas_cgemv_
200
#define  CUBLAS_CGBMV           cublas_cgbmv_
201
#define  CUBLAS_CHEMV           cublas_chemv_
202
#define  CUBLAS_CHBMV           cublas_chbmv_
203
#define  CUBLAS_CHPMV           cublas_chpmv_
204
#define  CUBLAS_CTBMV           cublas_ctbmv_
205
#define  CUBLAS_CTPMV           cublas_ctpmv_
206
#define  CUBLAS_CTRSV           cublas_ctrsv_
207
#define  CUBLAS_CTBSV           cublas_ctbsv_
208
#define  CUBLAS_CTPSV           cublas_ctpsv_
209
#define  CUBLAS_CGERC           cublas_cgerc_
210
#define  CUBLAS_CGERU           cublas_cgeru_
211
#define  CUBLAS_CHPR            cublas_chpr_
212
#define  CUBLAS_CHPR2           cublas_chpr2_
213
#define  CUBLAS_CHER            cublas_cher_
214
#define  CUBLAS_CHER2           cublas_cher2_
215

  
216
// stubs for zblat2
217
#define CUBLAS_ZGBMV           cublas_zgbmv_
218
#define CUBLAS_ZHEMV           cublas_zhemv_
219
#define CUBLAS_ZHBMV           cublas_zhbmv_
220
#define CUBLAS_ZHPMV           cublas_zhpmv_
221
#define CUBLAS_ZTRMV           cublas_ztrmv_
222
#define CUBLAS_ZTBMV           cublas_ztbmv_
223
#define CUBLAS_ZTPMV           cublas_ztpmv_
224
#define CUBLAS_ZTRSV           cublas_ztrsv_
225
#define CUBLAS_ZTBSV           cublas_ztbsv_
226
#define CUBLAS_ZTPSV           cublas_ztpsv_
227
#define CUBLAS_ZGERC           cublas_zgerc_
228
#define CUBLAS_ZGERU           cublas_zgeru_
229
#define CUBLAS_ZHER            cublas_zher_
230
#define CUBLAS_ZHPR            cublas_zhpr_
231
#define CUBLAS_ZHER2           cublas_zher2_
232
#define CUBLAS_ZHPR2           cublas_zhpr2_
233

  
234
#elif CUBLAS_FORTRAN_COMPILER==CUBLAS_INTEL_FORTRAN
235

  
236
#define CUBLAS_INIT             CUBLAS_INIT 
237
#define CUBLAS_SHUTDOWN         CUBLAS_SHUTDOWN
238
#define CUBLAS_ALLOC            CUBLAS_ALLOC
239
#define CUBLAS_FREE             CUBLAS_FREE
240
#define CUBLAS_SET_VECTOR       CUBLAS_SET_VECTOR
241
#define CUBLAS_GET_VECTOR       CUBLAS_GET_VECTOR
242
#define CUBLAS_SET_MATRIX       CUBLAS_SET_MATRIX
243
#define CUBLAS_GET_MATRIX       CUBLAS_GET_MATRIX
244
#define CUBLAS_GET_ERROR        CUBLAS_GET_ERROR
245
#define CUBLAS_XERBLA           CUBLAS_XERBLA
246
#define CUBLAS_ISAMAX           CUBLAS_ISAMAX
247
#define CUBLAS_ISAMIN           CUBLAS_ISAMIN
248
#define CUBLAS_SASUM            CUBLAS_SASUM
249
#define CUBLAS_SAXPY            CUBLAS_SAXPY
250
#define CUBLAS_SCOPY            CUBLAS_SCOPY
251
#define CUBLAS_SDOT             CUBLAS_SDOT
252
#define CUBLAS_SNRM2            CUBLAS_SNRM2
253
#define CUBLAS_SROT             CUBLAS_SROT
254
#define CUBLAS_SROTG            CUBLAS_SROTG
255
#define CUBLAS_SROTM            CUBLAS_SROTM
256
#define CUBLAS_SROTMG           CUBLAS_SROTMG
257
#define CUBLAS_SSCAL            CUBLAS_SSCAL
258
#define CUBLAS_SSWAP            CUBLAS_SSWAP
259
#define CUBLAS_CAXPY            CUBLAS_CAXPY
260
#define CUBLAS_CCOPY            CUBLAS_CCOPY
261
#define CUBLAS_ZCOPY            CUBLAS_ZCOPY
262
#define CUBLAS_CROT             CUBLAS_CROT
263
#define CUBLAS_CROTG            CUBLAS_CROTG
264
#define CUBLAS_CSCAL            CUBLAS_CSCAL
265
#define CUBLAS_CSROT            CUBLAS_CSROT
266
#define CUBLAS_CSSCAL           CUBLAS_CSSCAL
267
#define CUBLAS_CSWAP            CUBLAS_CSWAP 
268
#define CUBLAS_ZSWAP            CUBLAS_ZSWAP 
269
#define CUBLAS_CTRMV            CUBLAS_CTRMV 
270
#define CUBLAS_CDOTU            CUBLAS_CDOTU
271
#define CUBLAS_CDOTC            CUBLAS_CDOTC
272
#define CUBLAS_ICAMAX           CUBLAS_ICAMAX
273
#define CUBLAS_SCASUM           CUBLAS_SCASUM
274
#define CUBLAS_SCNRM2           CUBLAS_SCNRM2
275
#define CUBLAS_SGBMV            CUBLAS_SGBMV
276
#define CUBLAS_SGEMV            CUBLAS_SGEMV
277
#define CUBLAS_SGER             CUBLAS_SGER
278
#define CUBLAS_SSBMV            CUBLAS_SSBMV
279
#define CUBLAS_SSPMV            CUBLAS_SSPMV
280
#define CUBLAS_SSPR             CUBLAS_SSPR
281
#define CUBLAS_SSPR2            CUBLAS_SSPR2
282
#define CUBLAS_SSYMV            CUBLAS_SSYMV
283
#define CUBLAS_SSYR             CUBLAS_SSYR
284
#define CUBLAS_SSYR2            CUBLAS_SSYR2
285
#define CUBLAS_STBMV            CUBLAS_STBMV
286
#define CUBLAS_STBSV            CUBLAS_STBSV
287
#define CUBLAS_STPMV            CUBLAS_STPMV
288
#define CUBLAS_STPSV            CUBLAS_STPSV
289
#define CUBLAS_STRMV            CUBLAS_STRMV
290
#define CUBLAS_STRSV            CUBLAS_STRSV
291
#define CUBLAS_SGEMM            CUBLAS_SGEMM
292
#define CUBLAS_SSYMM            CUBLAS_SSYMM
293
#define CUBLAS_SSYR2K           CUBLAS_SSYR2K
294
#define CUBLAS_SSYRK            CUBLAS_SSYRK
295
#define CUBLAS_STRMM            CUBLAS_STRMM
296
#define CUBLAS_STRSM            CUBLAS_STRSM
297
#define CUBLAS_CGEMM            CUBLAS_CGEMM
298
#define CUBLAS_CHEMM            CUBLAS_CHEMM
299
#define CUBLAS_CSYMM            CUBLAS_CSYMM
300
#define CUBLAS_CTRMM            CUBLAS_CTRMM
301
#define CUBLAS_CTRSM            CUBLAS_CTRSM
302
#define CUBLAS_CHERK            CUBLAS_CHERK
303
#define CUBLAS_CSYRK            CUBLAS_CSYRK
304
#define CUBLAS_CHER2K           CUBLAS_CHER2K
305
#define CUBLAS_CSYR2K           CUBLAS_CSYR2K
306
#define CUBLAS_IDAMAX           CUBLAS_IDAMAX
307
#define CUBLAS_IDAMIN           CUBLAS_IDAMIN
308
#define CUBLAS_DASUM            CUBLAS_DASUM
309
#define CUBLAS_DAXPY            CUBLAS_DAXPY
310
#define CUBLAS_DCOPY            CUBLAS_DCOPY
311
#define CUBLAS_DDOT             CUBLAS_DDOT
312
#define CUBLAS_DNRM2            CUBLAS_DNRM2
313
#define CUBLAS_DROT             CUBLAS_DROT
314
#define CUBLAS_DROTG            CUBLAS_DROTG
315
#define CUBLAS_DROTM            CUBLAS_DROTM
316
#define CUBLAS_DROTMG           CUBLAS_DROTMG
317
#define CUBLAS_DSCAL            CUBLAS_DSCAL
318
#define CUBLAS_DSWAP            CUBLAS_DSWAP
319
#define CUBLAS_ZAXPY            CUBLAS_ZAXPY
320
#define CUBLAS_ZCOPY            CUBLAS_ZCOPY
321
#define CUBLAS_ZROT             CUBLAS_ZROT
322
#define CUBLAS_ZROTG            CUBLAS_ZROTG
323
#define CUBLAS_ZSCAL            CUBLAS_ZSCAL
324
#define CUBLAS_ZDROT            CUBLAS_ZDROT
325
#define CUBLAS_ZDSCAL           CUBLAS_ZDSCAL
326
#define CUBLAS_ZSWAP            CUBLAS_ZSWAP 
327
#define CUBLAS_ZDOTU            CUBLAS_ZDOTU
328
#define CUBLAS_ZDOTC            CUBLAS_ZDOTC
329
#define CUBLAS_IZAMAX           CUBLAS_IZAMAX
330
#define CUBLAS_DZASUM           CUBLAS_DZASUM
331
#define CUBLAS_DZNRM2           CUBLAS_DZNRM2
332
#define CUBLAS_DGBMV            CUBLAS_DGBMV
333
#define CUBLAS_DGEMV            CUBLAS_DGEMV
334
#define CUBLAS_ZGEMV            CUBLAS_ZGEMV
335
#define CUBLAS_DGER             CUBLAS_DGER
336
#define CUBLAS_DSBMV            CUBLAS_DSBMV
337
#define CUBLAS_DSPMV            CUBLAS_DSPMV
338
#define CUBLAS_DSPR             CUBLAS_DSPR
339
#define CUBLAS_DSPR2            CUBLAS_DSPR2
340
#define CUBLAS_DSYMV            CUBLAS_DSYMV
341
#define CUBLAS_DSYR             CUBLAS_DSYR
342
#define CUBLAS_DSYR2            CUBLAS_DSYR2
343
#define CUBLAS_DTBMV            CUBLAS_DTBMV
344
#define CUBLAS_DTBSV            CUBLAS_DTBSV
345
#define CUBLAS_DTPMV            CUBLAS_DTPMV
346
#define CUBLAS_DTPSV            CUBLAS_DTPSV
347
#define CUBLAS_DTRMV            CUBLAS_DTRMV
348
#define CUBLAS_DTRSV            CUBLAS_DTRSV
349
#define CUBLAS_DGEMM            CUBLAS_DGEMM
350
#define CUBLAS_DSYMM            CUBLAS_DSYMM
351
#define CUBLAS_DSYR2K           CUBLAS_DSYR2K
352
#define CUBLAS_ZSYRK            CUBLAS_ZSYRK
353
#define CUBLAS_DTRMM            CUBLAS_DTRMM
354
#define CUBLAS_DTRSM            CUBLAS_DTRSM
355
#define CUBLAS_ZGEMM            CUBLAS_ZGEMM
356
#define CUBLAS_ZHEMM            CUBLAS_ZHEMM
357
#define CUBLAS_ZSYMM            CUBLAS_ZSYMM
358
#define CUBLAS_ZTRMM            CUBLAS_ZTRMM
359
#define CUBLAS_ZTRSM            CUBLAS_ZTRSM
360
#define CUBLAS_ZHERK            CUBLAS_ZHERK
361
#define CUBLAS_ZSYRK            CUBLAS_ZSYRK
362
#define CUBLAS_ZHER2K           CUBLAS_ZHER2K
363
#define CUBLAS_ZSYR2K           CUBLAS_ZSYR2K
364

  
365
#define  CUBLAS_CGEMV           CUBLAS_CGEMV
366
#define  CUBLAS_CGBMV           CUBLAS_CGBMV
367
#define  CUBLAS_CHEMV           CUBLAS_CHEMV
368
#define  CUBLAS_CHBMV           CUBLAS_CHBMV
369
#define  CUBLAS_CHPMV           CUBLAS_CHPMV
370
#define  CUBLAS_CTBMV           CUBLAS_CTBMV
371
#define  CUBLAS_CTPMV           CUBLAS_CTPMV
372
#define  CUBLAS_CTRSV           CUBLAS_CTRSV
373
#define  CUBLAS_CTBSV           CUBLAS_CTBSV
374
#define  CUBLAS_CTPSV           CUBLAS_CTPSV
375
#define  CUBLAS_CGERC           CUBLAS_CGERC
376
#define  CUBLAS_CGERU           CUBLAS_CGERU
377
#define  CUBLAS_CHPR            CUBLAS_CHPR
378

  
379

  
380
// stubs for zblat2
381
#define CUBLAS_ZGBMV           CUBLAS_ZGBMV
382
#define CUBLAS_ZHEMV           CUBLAS_ZHEMV
383
#define CUBLAS_ZHBMV           CUBLAS_ZHBMV
384
#define CUBLAS_ZHPMV           CUBLAS_ZHPMV
385
#define CUBLAS_ZTRMV           CUBLAS_ZTRMV
386
#define CUBLAS_ZTBMV           CUBLAS_ZTBMV
387
#define CUBLAS_ZTPMV           CUBLAS_ZTPMV
388
#define CUBLAS_ZTRSV           CUBLAS_ZTRSV
389
#define CUBLAS_ZTBSV           CUBLAS_ZTBSV
390
#define CUBLAS_ZTPSV           CUBLAS_ZTPSV
391
#define CUBLAS_ZGERC           CUBLAS_ZGERC
392
#define CUBLAS_ZGERU           CUBLAS_ZGERU
393
#define CUBLAS_ZHER            CUBLAS_ZHER
394
#define CUBLAS_ZHPR            CUBLAS_ZHPR
395
#define CUBLAS_ZHER2           CUBLAS_ZHER2
396
#define CUBLAS_ZHPR2           CUBLAS_ZHPR2
397

  
398
#else
399
#error unsupported Fortran compiler
400
#endif
BLAS/xGEMM/patch_thunking.h (revision 1)
1
41c41
2
< #define CUBLAS_FORTRAN_COMPILER CUBLAS_G95
3
---
4
> #define CUBLAS_FORTRAN_COMPILER CUBLAS_INTEL_FORTRAN
BLAS/xGEMM/Makefile (revision 1)
1
SOURCE=xGEMM.c
2

  
3
CC=gcc
4
CFLAGS=-Wall -O3
5
LDFLAGS=-lm
6
CUDADIR=/opt/cuda
7
CUDASRC=$(CUDADIR)/src
8
THUNKING=fortran_thunking.c
9
CUDASRCINC=fortran_common.h
10
CUDAINC=$(CUDADIR)/include
11
CUDALIB=$(CUDADIR)/lib64
12

  
13
PATCHTHUNKING=patch_thunking.h
14

  
15
GSLINC=/usr/include/gsl
16

  
17
GOTO2=/opt/GotoBLAS2
18

  
19
ACML=/opt/acml
20
ACMLINC=$(ACML)/gfortran64_mp/include
21
ACMLLIB=$(ACML)/gfortran64_mp/lib
22

  
23

  
24
EXECUTABLE=cblas fblas gsl cublas thunking gotoblas acml
25

  
26
FORMAT=DOUBLE
27
#FORMAT=FLOAT
28

  
29
#DIRECTIVES=-D$(FORMAT) -DPRINT -DUNIT
30
#DIRECTIVES=-D$(FORMAT) -DUNIT -DRESULTS -DQUIET
31
DIRECTIVES=-DUNIT -DQUIET
32

  
33
all: $(EXECUTABLE)
34

  
35
cblas: $(SOURCE)
36

  
37
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DCBLAS $(LDFLAGS) \
38
		$(SOURCE) -lcblas -o $(SOURCE:.c=)_SP_$@
39

  
40
	$(CC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DCBLAS $(LDFLAGS) \
41
		$(SOURCE) -lcblas -o $(SOURCE:.c=)_DP_$@
42

  
43
gotoblas: $(SOURCE)
44

  
45
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DCBLAS $(LDFLAGS) \
46
		$(SOURCE) $(GOTO2)/libgoto2.a -lpthread -o $(SOURCE:.c=)_SP_$@
47

  
48
	$(CC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DCBLAS $(LDFLAGS) \
49
		$(SOURCE) $(GOTO2)/libgoto2.a -lpthread -o $(SOURCE:.c=)_DP_$@
50

  
51
acml: $(SOURCE)
52

  
53
	$(CC) -I$(ACMLINC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DACML $(LDFLAGS) \
54
		$(SOURCE) -L$(ACMLLIB) -lacml_mp -lacml_mv \
55
		-lgomp -lgfortran -lpthread -o $(SOURCE:.c=)_SP_$@
56

  
57
	$(CC) -I$(ACMLINC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DACML $(LDFLAGS) \
58
		$(SOURCE) -L$(ACMLLIB) -lacml_mp -lacml_mv \
59
		-lgomp -lgfortran -lpthread -o $(SOURCE:.c=)_DP_$@
60

  
61
fblas: $(SOURCE)
62

  
63
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DFBLAS $(LDFLAGS) \
64
		$(SOURCE) -lf77blas -o $(SOURCE:.c=)_SP_$@
65

  
66
	$(CC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DFBLAS $(LDFLAGS) \
67
		$(SOURCE) -lf77blas -o $(SOURCE:.c=)_DP_$@
68

  
69

  
70
gsl: $(SOURCE)
71

  
72
	$(CC) -I$(GSLINC) $(CFLAGS) $(DIRECTIVES) -DFLOAT \
73
		-DGSL $(LDFLAGS) \
74
		$(SOURCE) -lgslcblas -o $(SOURCE:.c=)_SP_$@
75

  
76
	$(CC) -I$(GSLINC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE \
77
		-DGSL $(LDFLAGS) \
78
		$(SOURCE) -lgslcblas -o $(SOURCE:.c=)_DP_$@
79

  
80
cublas: $(SOURCE)
81

  
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff