Révision 251

BLAS/xTRSV/Makefile (revision 251)
1
# Makefile designed for Debian Buster
2

  
1 3
SOURCE=xTRSV.c
2 4

  
3 5
CC=gcc
4 6
FC=gfortran
5 7
CFLAGS=-Wall -O3
6 8
LDFLAGS=-lm
7
CUDASRC=/usr/share/doc/nvidia-cuda-doc/examples/
9

  
10
CUDAINC?=/usr/include
11
CUDALIB?=/usr/lib/x86_64-linux-gnu/
12
CUDASRC?=/usr/share/doc/nvidia-cuda-doc/examples/
8 13
THUNKING=fortran_thunking.c
9 14
CUDASRCINC=fortran_common.h
10
CUDAINC=/usr/include
11
CUDALIB=/usr/lib/x86_64-linux-gnu/
12 15

  
13 16
GSLINC=/usr/include/gsl
14 17

  
15
GOTO2=/opt/GotoBLAS2
18
# OPENBLAS=/usr/lib/x86_64-linux-gnu
19
# ATLAS=/usr/lib/x86_64-linux-gnu
16 20

  
17
OPENBLAS=/usr/lib
18

  
19
ATLAS=/usr/lib/atlas-base
20

  
21 21
ACML=/opt/acml
22 22
ACMLINC=$(ACML)/gfortran64_mp/include
23 23
ACMLLIB=$(ACML)/gfortran64_mp/lib
24 24

  
25
#EXECUTABLE=cblas fblas gsl cublas thunking gotoblas acml openblas
25
#EXECUTABLE=cblas fblas gsl cublas thunking acml openblas
26 26
EXECUTABLE=cblas fblas gsl cublas thunking openblas
27 27

  
28
#FORMAT=DOUBLE
29
FORMAT=FLOAT
28
# FORMAT define the precision of Floating Point numbers
29
# FP32 for simple of 32 bits size
30
# FP64 for double of 64 bits size
31
#FORMAT=FP64
32
FORMAT=FP32
30 33

  
31 34
#DIRECTIVES=-D$(FORMAT) -DPRINT -DUNIT
32 35
#DIRECTIVES=-D$(FORMAT) -DUNIT -DRESULTS -DQUIET
......
37 40

  
38 41
cblas: $(SOURCE)
39 42

  
40
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DCBLAS $(LDFLAGS) \
41
		$(SOURCE) $(ATLAS)/libcblas.a $(ATLAS)/libatlas.a -o $(SOURCE:.c=)_SP_$@
43
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFP32 -DCBLAS $(LDFLAGS) \
44
		$(SOURCE) -latlas -lcblas -o $(SOURCE:.c=)_SP_$@
42 45

  
43
	$(CC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DCBLAS $(LDFLAGS) \
44
		$(SOURCE) $(ATLAS)/libcblas.a $(ATLAS)/libatlas.a -o $(SOURCE:.c=)_DP_$@
46
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFP64 -DCBLAS $(LDFLAGS) \
47
		$(SOURCE) -latlas -lcblas -o $(SOURCE:.c=)_DP_$@
45 48

  
46 49
openblas: $(SOURCE)
47 50

  
48
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DCBLAS $(LDFLAGS) \
49
		$(SOURCE) $(OPENBLAS)/libopenblas.a -lpthread -o $(SOURCE:.c=)_SP_$@
51
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFP32 -DCBLAS $(LDFLAGS) \
52
		$(SOURCE) -lopenblas -lpthread -o $(SOURCE:.c=)_SP_$@
50 53

  
51
	$(CC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DCBLAS $(LDFLAGS) \
52
		$(SOURCE) $(OPENBLAS)/libopenblas.a -lpthread -o $(SOURCE:.c=)_DP_$@
54
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFP64 -DCBLAS $(LDFLAGS) \
55
		$(SOURCE) -lopenblas -lpthread -o $(SOURCE:.c=)_DP_$@
53 56

  
54 57
acml: $(SOURCE)
55 58

  
56
	$(CC) -I$(ACMLINC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DACML $(LDFLAGS) \
59
	$(CC) -I$(ACMLINC) $(CFLAGS) $(DIRECTIVES) -DFP32 -DACML $(LDFLAGS) \
57 60
		$(SOURCE) -L$(ACMLLIB) -lacml_mp -lacml_mv \
58 61
		-lgomp -lgfortran -lpthread -o $(SOURCE:.c=)_SP_$@
59 62

  
60
	$(CC) -I$(ACMLINC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DACML $(LDFLAGS) \
63
	$(CC) -I$(ACMLINC) $(CFLAGS) $(DIRECTIVES) -DFP64 -DACML $(LDFLAGS) \
61 64
		$(SOURCE) -L$(ACMLLIB) -lacml_mp -lacml_mv \
62 65
		-lgomp -lgfortran -lpthread -o $(SOURCE:.c=)_DP_$@
63 66

  
64 67
fblas: $(SOURCE)
65 68

  
66
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DFBLAS $(LDFLAGS) \
67
		$(SOURCE) $(ATLAS)/libf77blas.a $(ATLAS)/libatlas.a -lgfortran -o $(SOURCE:.c=)_SP_$@
69
	# ATLAS version
70
	# $(CC) -L$(ATLAS) $(CFLAGS) $(DIRECTIVES) -DFP32 -DFBLAS $(LDFLAGS) \
71
	#  	$(SOURCE) -latlas -lf77blas -o $(SOURCE:.c=)_SP_$@
68 72

  
69
	$(CC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DFBLAS $(LDFLAGS) \
70
		$(SOURCE) $(ATLAS)/libf77blas.a $(ATLAS)/libatlas.a -lgfortran -o $(SOURCE:.c=)_DP_$@
73
	# $(CC) -L$(ATLAS) $(CFLAGS) $(DIRECTIVES) -DFP64 -DFBLAS $(LDFLAGS) \
74
	#  	$(SOURCE) -latlas -lf77blas -o $(SOURCE:.c=)_DP_$@
71 75

  
76
	# OpenBLAS version
77
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFP32 -DFBLAS $(LDFLAGS) \
78
	  	$(SOURCE) -lopenblas -lpthread -o $(SOURCE:.c=)_SP_$@
72 79

  
80
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFP64 -DFBLAS $(LDFLAGS) \
81
	  	$(SOURCE) -lopenblas -lpthread -o $(SOURCE:.c=)_DP_$@
82

  
83

  
73 84
gsl: $(SOURCE)
74 85

  
75
	$(CC) -I$(GSLINC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DGSL $(LDFLAGS) \
86
	$(CC) -I$(GSLINC) $(CFLAGS) $(DIRECTIVES) -DFP32 -DGSL $(LDFLAGS) \
76 87
		$(SOURCE) -lgslcblas -o $(SOURCE:.c=)_SP_$@
77 88

  
78
	$(CC) -I$(GSLINC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DGSL $(LDFLAGS) \
89
	$(CC) -I$(GSLINC) $(CFLAGS) $(DIRECTIVES) -DFP64 -DGSL $(LDFLAGS) \
79 90
		$(SOURCE) -lgslcblas -o $(SOURCE:.c=)_DP_$@
80 91

  
81 92

  
82 93
cublas: $(SOURCE)
83 94

  
84
	$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DCUBLAS -DFLOAT $(LDFLAGS) \
95
	$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DCUBLAS -DFP32 $(LDFLAGS) \
85 96
		$(DIRECTIVES) $(SOURCE) -lcublas -o $(SOURCE:.c=)_SP_$@
86 97

  
87
	$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DCUBLAS -DDOUBLE $(LDFLAGS) \
98
	$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DCUBLAS -DFP64 $(LDFLAGS) \
88 99
		$(DIRECTIVES) $(SOURCE) -lcublas -o $(SOURCE:.c=)_DP_$@
89 100

  
90 101
thunking: $(SOURCE)
......
96 107
	$(CC) -DCUBLAS_GFORTRAN -I$(CUDAINC) $(CFLAGS) -c $(THUNKING)
97 108

  
98 109
	$(CC) -DCUBLAS_GFORTRAN -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DTHUNKING \
99
		$(LDFLAGS) $(DIRECTIVES) $(SOURCE) -DFLOAT \
110
		$(LDFLAGS) $(DIRECTIVES) $(SOURCE) -DFP32 \
100 111
		$(THUNKING:.c=.o) -lcublas -o $(SOURCE:.c=)_SP_$@
101 112

  
102 113
	$(CC) -DCUBLAS_GFORTRAN -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DTHUNKING \
103
		$(LDFLAGS) $(DIRECTIVES) $(SOURCE) -DDOUBLE \
114
		$(LDFLAGS) $(DIRECTIVES) $(SOURCE) -DFP64 \
104 115
		$(THUNKING:.c=.o) -lcublas -o $(SOURCE:.c=)_DP_$@
105 116

  
106 117
clean: $(SOURCE)
BLAS/xTRSV/xTRSV.c (revision 251)
25 25
#include "fortran_common.h"
26 26
#include "fortran_thunking.h"
27 27
#elif FBLAS
28
#include <cblas.h>
29
#include <cblas_f77.h>
28
#include <f77blas.h>
30 29
#elif GSL
31 30
#include <gsl_cblas.h>
32 31
#elif ACML
33 32
#include <acml.h>
34 33
#else
35 34
#include <cblas.h>
36
#include <blaswrap.h>
35
// #include <blaswrap.h>
37 36
#endif
38 37

  
39
#ifdef DOUBLE
38
#ifdef FP64
40 39
#define LENGTH double
41 40
#else
42 41
#define LENGTH float
......
264 263

  
265 264
  for (i=0;i<RUNS;i++)
266 265
    {
267
#ifdef DOUBLE
266
#ifdef FP64
268 267

  
269 268
      printVectorGPU(dim,devPtrX,"X","Roots");
270 269

  
......
346 345

  
347 346
  for (i=0;i<RUNS;i++)
348 347
    {
349
#ifdef DOUBLE
348
#ifdef FP64
350 349
      
351 350
      printVector(dim,X,"X","Roots");
352 351
      
......
412 411
  
413 412
  for (i=0;i<RUNS;i++)
414 413
    {
415
#ifdef DOUBLE
414
#ifdef FP64
416 415
      
417 416
      printVector(dim,X,"X","Initial roots");
418 417
      
418
      /* /\* Multiply A by X as Y <- A.X *\/ */
419
      /* dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); */
420
      
421
      /* printVector(dim,Y,"Y<-A.X","Estimated results"); */
422
      
423
      /* /\* Solve linear system *\/ */
424
      /* dtrsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); */
425
      
426
      /* printVector(dim,Y,"X","Solutions from A.X=Y"); */
427
      
428
      /* /\* Compare the roots X and Y *\/ */
429
      /* daxpy_(&dim,&beta2,Y,&incx,X,&incx); */
430
      
431
      /* printVector(dim,X,"X","Differences initial and estimated"); */
432
      
433
      /* /\* Store the checker of errors *\/ */
434
      /* dnrm2_(&dim,X,&incx,&checksA[i]); */
435
            
436
      /* /\* Swap vector X and Y *\/ */
437
      /* dswap_(&dim,X,&incx,Y,&incx); */
438

  
419 439
      /* Multiply A by X as Y <- A.X */
420 440
      dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx);
421 441
      
......
432 452
      printVector(dim,X,"X","Differences initial and estimated");
433 453
      
434 454
      /* Store the checker of errors */
435
      dnrm2_(&dim,X,&incx,&checksA[i]);
455
      checksA[i]=(double)dnrm2_(&dim,X,&incx);
436 456
            
437 457
      /* Swap vector X and Y */
438 458
      dswap_(&dim,X,&incx,Y,&incx);
......
457 477
      printVector(dim,X,"X","Errors");
458 478

  
459 479
      /* Store the checker of errors */
460
      snrm2_(&dim,X,&incx,&checksA[i]);
480
      checksA[i]=(LENGTH)snrm2_(&dim,X,&incx);
461 481

  
462 482
      /* Swap vector X and Y */
463 483
      sswap_(&dim,X,&incx,Y,&incx);
......
475 495
  
476 496
  for (i=0;i<RUNS;i++)
477 497
    {
478
#ifdef DOUBLE
498
#ifdef FP64
479 499
      
480 500
      printVector(dim,X,"X","Roots");
481 501
      
......
542 562
  for (i=0;i<RUNS;i++)
543 563
    {  
544 564

  
545
#ifdef DOUBLE
565
#ifdef FP64
546 566

  
547 567
      printVector(dim,X,"X","Roots");
548 568

  
......
609 629
  for (i=0;i<RUNS;i++)
610 630
    {  
611 631

  
612
#ifdef DOUBLE
632
#ifdef FP64
613 633

  
614 634
      printVector(dim,X,"X","Roots");
615 635

  

Formats disponibles : Unified diff