Révision 251
BLAS/xTRSV/Makefile (revision 251) | ||
---|---|---|
1 |
# Makefile designed for Debian Buster |
|
2 |
|
|
1 | 3 |
SOURCE=xTRSV.c |
2 | 4 |
|
3 | 5 |
CC=gcc |
4 | 6 |
FC=gfortran |
5 | 7 |
CFLAGS=-Wall -O3 |
6 | 8 |
LDFLAGS=-lm |
7 |
CUDASRC=/usr/share/doc/nvidia-cuda-doc/examples/ |
|
9 |
|
|
10 |
CUDAINC?=/usr/include |
|
11 |
CUDALIB?=/usr/lib/x86_64-linux-gnu/ |
|
12 |
CUDASRC?=/usr/share/doc/nvidia-cuda-doc/examples/ |
|
8 | 13 |
THUNKING=fortran_thunking.c |
9 | 14 |
CUDASRCINC=fortran_common.h |
10 |
CUDAINC=/usr/include |
|
11 |
CUDALIB=/usr/lib/x86_64-linux-gnu/ |
|
12 | 15 |
|
13 | 16 |
GSLINC=/usr/include/gsl |
14 | 17 |
|
15 |
GOTO2=/opt/GotoBLAS2 |
|
18 |
# OPENBLAS=/usr/lib/x86_64-linux-gnu |
|
19 |
# ATLAS=/usr/lib/x86_64-linux-gnu |
|
16 | 20 |
|
17 |
OPENBLAS=/usr/lib |
|
18 |
|
|
19 |
ATLAS=/usr/lib/atlas-base |
|
20 |
|
|
21 | 21 |
ACML=/opt/acml |
22 | 22 |
ACMLINC=$(ACML)/gfortran64_mp/include |
23 | 23 |
ACMLLIB=$(ACML)/gfortran64_mp/lib |
24 | 24 |
|
25 |
#EXECUTABLE=cblas fblas gsl cublas thunking gotoblas acml openblas
|
|
25 |
#EXECUTABLE=cblas fblas gsl cublas thunking acml openblas |
|
26 | 26 |
EXECUTABLE=cblas fblas gsl cublas thunking openblas |
27 | 27 |
|
28 |
#FORMAT=DOUBLE |
|
29 |
FORMAT=FLOAT |
|
28 |
# FORMAT define the precision of Floating Point numbers |
|
29 |
# FP32 for simple of 32 bits size |
|
30 |
# FP64 for double of 64 bits size |
|
31 |
#FORMAT=FP64 |
|
32 |
FORMAT=FP32 |
|
30 | 33 |
|
31 | 34 |
#DIRECTIVES=-D$(FORMAT) -DPRINT -DUNIT |
32 | 35 |
#DIRECTIVES=-D$(FORMAT) -DUNIT -DRESULTS -DQUIET |
... | ... | |
37 | 40 |
|
38 | 41 |
cblas: $(SOURCE) |
39 | 42 |
|
40 |
$(CC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DCBLAS $(LDFLAGS) \
|
|
41 |
$(SOURCE) $(ATLAS)/libcblas.a $(ATLAS)/libatlas.a -o $(SOURCE:.c=)_SP_$@
|
|
43 |
$(CC) $(CFLAGS) $(DIRECTIVES) -DFP32 -DCBLAS $(LDFLAGS) \
|
|
44 |
$(SOURCE) -latlas -lcblas -o $(SOURCE:.c=)_SP_$@
|
|
42 | 45 |
|
43 |
$(CC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DCBLAS $(LDFLAGS) \
|
|
44 |
$(SOURCE) $(ATLAS)/libcblas.a $(ATLAS)/libatlas.a -o $(SOURCE:.c=)_DP_$@
|
|
46 |
$(CC) $(CFLAGS) $(DIRECTIVES) -DFP64 -DCBLAS $(LDFLAGS) \
|
|
47 |
$(SOURCE) -latlas -lcblas -o $(SOURCE:.c=)_DP_$@
|
|
45 | 48 |
|
46 | 49 |
openblas: $(SOURCE) |
47 | 50 |
|
48 |
$(CC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DCBLAS $(LDFLAGS) \
|
|
49 |
$(SOURCE) $(OPENBLAS)/libopenblas.a -lpthread -o $(SOURCE:.c=)_SP_$@
|
|
51 |
$(CC) $(CFLAGS) $(DIRECTIVES) -DFP32 -DCBLAS $(LDFLAGS) \
|
|
52 |
$(SOURCE) -lopenblas -lpthread -o $(SOURCE:.c=)_SP_$@
|
|
50 | 53 |
|
51 |
$(CC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DCBLAS $(LDFLAGS) \
|
|
52 |
$(SOURCE) $(OPENBLAS)/libopenblas.a -lpthread -o $(SOURCE:.c=)_DP_$@
|
|
54 |
$(CC) $(CFLAGS) $(DIRECTIVES) -DFP64 -DCBLAS $(LDFLAGS) \
|
|
55 |
$(SOURCE) -lopenblas -lpthread -o $(SOURCE:.c=)_DP_$@
|
|
53 | 56 |
|
54 | 57 |
acml: $(SOURCE) |
55 | 58 |
|
56 |
$(CC) -I$(ACMLINC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DACML $(LDFLAGS) \
|
|
59 |
$(CC) -I$(ACMLINC) $(CFLAGS) $(DIRECTIVES) -DFP32 -DACML $(LDFLAGS) \
|
|
57 | 60 |
$(SOURCE) -L$(ACMLLIB) -lacml_mp -lacml_mv \ |
58 | 61 |
-lgomp -lgfortran -lpthread -o $(SOURCE:.c=)_SP_$@ |
59 | 62 |
|
60 |
$(CC) -I$(ACMLINC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DACML $(LDFLAGS) \
|
|
63 |
$(CC) -I$(ACMLINC) $(CFLAGS) $(DIRECTIVES) -DFP64 -DACML $(LDFLAGS) \
|
|
61 | 64 |
$(SOURCE) -L$(ACMLLIB) -lacml_mp -lacml_mv \ |
62 | 65 |
-lgomp -lgfortran -lpthread -o $(SOURCE:.c=)_DP_$@ |
63 | 66 |
|
64 | 67 |
fblas: $(SOURCE) |
65 | 68 |
|
66 |
$(CC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DFBLAS $(LDFLAGS) \ |
|
67 |
$(SOURCE) $(ATLAS)/libf77blas.a $(ATLAS)/libatlas.a -lgfortran -o $(SOURCE:.c=)_SP_$@ |
|
69 |
# ATLAS version |
|
70 |
# $(CC) -L$(ATLAS) $(CFLAGS) $(DIRECTIVES) -DFP32 -DFBLAS $(LDFLAGS) \ |
|
71 |
# $(SOURCE) -latlas -lf77blas -o $(SOURCE:.c=)_SP_$@ |
|
68 | 72 |
|
69 |
$(CC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DFBLAS $(LDFLAGS) \
|
|
70 |
$(SOURCE) $(ATLAS)/libf77blas.a $(ATLAS)/libatlas.a -lgfortran -o $(SOURCE:.c=)_DP_$@
|
|
73 |
# $(CC) -L$(ATLAS) $(CFLAGS) $(DIRECTIVES) -DFP64 -DFBLAS $(LDFLAGS) \
|
|
74 |
# $(SOURCE) -latlas -lf77blas -o $(SOURCE:.c=)_DP_$@
|
|
71 | 75 |
|
76 |
# OpenBLAS version |
|
77 |
$(CC) $(CFLAGS) $(DIRECTIVES) -DFP32 -DFBLAS $(LDFLAGS) \ |
|
78 |
$(SOURCE) -lopenblas -lpthread -o $(SOURCE:.c=)_SP_$@ |
|
72 | 79 |
|
80 |
$(CC) $(CFLAGS) $(DIRECTIVES) -DFP64 -DFBLAS $(LDFLAGS) \ |
|
81 |
$(SOURCE) -lopenblas -lpthread -o $(SOURCE:.c=)_DP_$@ |
|
82 |
|
|
83 |
|
|
73 | 84 |
gsl: $(SOURCE) |
74 | 85 |
|
75 |
$(CC) -I$(GSLINC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DGSL $(LDFLAGS) \
|
|
86 |
$(CC) -I$(GSLINC) $(CFLAGS) $(DIRECTIVES) -DFP32 -DGSL $(LDFLAGS) \
|
|
76 | 87 |
$(SOURCE) -lgslcblas -o $(SOURCE:.c=)_SP_$@ |
77 | 88 |
|
78 |
$(CC) -I$(GSLINC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DGSL $(LDFLAGS) \
|
|
89 |
$(CC) -I$(GSLINC) $(CFLAGS) $(DIRECTIVES) -DFP64 -DGSL $(LDFLAGS) \
|
|
79 | 90 |
$(SOURCE) -lgslcblas -o $(SOURCE:.c=)_DP_$@ |
80 | 91 |
|
81 | 92 |
|
82 | 93 |
cublas: $(SOURCE) |
83 | 94 |
|
84 |
$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DCUBLAS -DFLOAT $(LDFLAGS) \
|
|
95 |
$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DCUBLAS -DFP32 $(LDFLAGS) \
|
|
85 | 96 |
$(DIRECTIVES) $(SOURCE) -lcublas -o $(SOURCE:.c=)_SP_$@ |
86 | 97 |
|
87 |
$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DCUBLAS -DDOUBLE $(LDFLAGS) \
|
|
98 |
$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DCUBLAS -DFP64 $(LDFLAGS) \
|
|
88 | 99 |
$(DIRECTIVES) $(SOURCE) -lcublas -o $(SOURCE:.c=)_DP_$@ |
89 | 100 |
|
90 | 101 |
thunking: $(SOURCE) |
... | ... | |
96 | 107 |
$(CC) -DCUBLAS_GFORTRAN -I$(CUDAINC) $(CFLAGS) -c $(THUNKING) |
97 | 108 |
|
98 | 109 |
$(CC) -DCUBLAS_GFORTRAN -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DTHUNKING \ |
99 |
$(LDFLAGS) $(DIRECTIVES) $(SOURCE) -DFLOAT \
|
|
110 |
$(LDFLAGS) $(DIRECTIVES) $(SOURCE) -DFP32 \
|
|
100 | 111 |
$(THUNKING:.c=.o) -lcublas -o $(SOURCE:.c=)_SP_$@ |
101 | 112 |
|
102 | 113 |
$(CC) -DCUBLAS_GFORTRAN -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DTHUNKING \ |
103 |
$(LDFLAGS) $(DIRECTIVES) $(SOURCE) -DDOUBLE \
|
|
114 |
$(LDFLAGS) $(DIRECTIVES) $(SOURCE) -DFP64 \
|
|
104 | 115 |
$(THUNKING:.c=.o) -lcublas -o $(SOURCE:.c=)_DP_$@ |
105 | 116 |
|
106 | 117 |
clean: $(SOURCE) |
BLAS/xTRSV/xTRSV.c (revision 251) | ||
---|---|---|
25 | 25 |
#include "fortran_common.h" |
26 | 26 |
#include "fortran_thunking.h" |
27 | 27 |
#elif FBLAS |
28 |
#include <cblas.h> |
|
29 |
#include <cblas_f77.h> |
|
28 |
#include <f77blas.h> |
|
30 | 29 |
#elif GSL |
31 | 30 |
#include <gsl_cblas.h> |
32 | 31 |
#elif ACML |
33 | 32 |
#include <acml.h> |
34 | 33 |
#else |
35 | 34 |
#include <cblas.h> |
36 |
#include <blaswrap.h> |
|
35 |
// #include <blaswrap.h>
|
|
37 | 36 |
#endif |
38 | 37 |
|
39 |
#ifdef DOUBLE
|
|
38 |
#ifdef FP64
|
|
40 | 39 |
#define LENGTH double |
41 | 40 |
#else |
42 | 41 |
#define LENGTH float |
... | ... | |
264 | 263 |
|
265 | 264 |
for (i=0;i<RUNS;i++) |
266 | 265 |
{ |
267 |
#ifdef DOUBLE
|
|
266 |
#ifdef FP64
|
|
268 | 267 |
|
269 | 268 |
printVectorGPU(dim,devPtrX,"X","Roots"); |
270 | 269 |
|
... | ... | |
346 | 345 |
|
347 | 346 |
for (i=0;i<RUNS;i++) |
348 | 347 |
{ |
349 |
#ifdef DOUBLE
|
|
348 |
#ifdef FP64
|
|
350 | 349 |
|
351 | 350 |
printVector(dim,X,"X","Roots"); |
352 | 351 |
|
... | ... | |
412 | 411 |
|
413 | 412 |
for (i=0;i<RUNS;i++) |
414 | 413 |
{ |
415 |
#ifdef DOUBLE
|
|
414 |
#ifdef FP64
|
|
416 | 415 |
|
417 | 416 |
printVector(dim,X,"X","Initial roots"); |
418 | 417 |
|
418 |
/* /\* Multiply A by X as Y <- A.X *\/ */ |
|
419 |
/* dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); */ |
|
420 |
|
|
421 |
/* printVector(dim,Y,"Y<-A.X","Estimated results"); */ |
|
422 |
|
|
423 |
/* /\* Solve linear system *\/ */ |
|
424 |
/* dtrsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); */ |
|
425 |
|
|
426 |
/* printVector(dim,Y,"X","Solutions from A.X=Y"); */ |
|
427 |
|
|
428 |
/* /\* Compare the roots X and Y *\/ */ |
|
429 |
/* daxpy_(&dim,&beta2,Y,&incx,X,&incx); */ |
|
430 |
|
|
431 |
/* printVector(dim,X,"X","Differences initial and estimated"); */ |
|
432 |
|
|
433 |
/* /\* Store the checker of errors *\/ */ |
|
434 |
/* dnrm2_(&dim,X,&incx,&checksA[i]); */ |
|
435 |
|
|
436 |
/* /\* Swap vector X and Y *\/ */ |
|
437 |
/* dswap_(&dim,X,&incx,Y,&incx); */ |
|
438 |
|
|
419 | 439 |
/* Multiply A by X as Y <- A.X */ |
420 | 440 |
dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
421 | 441 |
|
... | ... | |
432 | 452 |
printVector(dim,X,"X","Differences initial and estimated"); |
433 | 453 |
|
434 | 454 |
/* Store the checker of errors */ |
435 |
dnrm2_(&dim,X,&incx,&checksA[i]);
|
|
455 |
checksA[i]=(double)dnrm2_(&dim,X,&incx);
|
|
436 | 456 |
|
437 | 457 |
/* Swap vector X and Y */ |
438 | 458 |
dswap_(&dim,X,&incx,Y,&incx); |
... | ... | |
457 | 477 |
printVector(dim,X,"X","Errors"); |
458 | 478 |
|
459 | 479 |
/* Store the checker of errors */ |
460 |
snrm2_(&dim,X,&incx,&checksA[i]);
|
|
480 |
checksA[i]=(LENGTH)snrm2_(&dim,X,&incx);
|
|
461 | 481 |
|
462 | 482 |
/* Swap vector X and Y */ |
463 | 483 |
sswap_(&dim,X,&incx,Y,&incx); |
... | ... | |
475 | 495 |
|
476 | 496 |
for (i=0;i<RUNS;i++) |
477 | 497 |
{ |
478 |
#ifdef DOUBLE
|
|
498 |
#ifdef FP64
|
|
479 | 499 |
|
480 | 500 |
printVector(dim,X,"X","Roots"); |
481 | 501 |
|
... | ... | |
542 | 562 |
for (i=0;i<RUNS;i++) |
543 | 563 |
{ |
544 | 564 |
|
545 |
#ifdef DOUBLE
|
|
565 |
#ifdef FP64
|
|
546 | 566 |
|
547 | 567 |
printVector(dim,X,"X","Roots"); |
548 | 568 |
|
... | ... | |
609 | 629 |
for (i=0;i<RUNS;i++) |
610 | 630 |
{ |
611 | 631 |
|
612 |
#ifdef DOUBLE
|
|
632 |
#ifdef FP64
|
|
613 | 633 |
|
614 | 634 |
printVector(dim,X,"X","Roots"); |
615 | 635 |
|
Formats disponibles : Unified diff