Révision 250

BLAS/xGEMM/Makefile (revision 250)
1
# Makefile designed for Debian Buster
2

  
1 3
SOURCE=xGEMM.c
2 4

  
3 5
CC=gcc
4 6
FC=gfortran
5 7
CFLAGS=-Wall -O3 -ffast-math
6 8
LDFLAGS=-lm
7
#CUDASRC=/usr/share/doc/nvidia-cuda-doc/examples/
8
CUDASRC=/usr/share/doc/nvidia-cuda-toolkit/examples/
9
THUNKING=fortran_thunking.c
10
CUDASRCINC=fortran_common.h
11
CUDAINC=/usr/include
12
CUDALIB=/usr/lib/x86_64-linux-gnu/
9

  
13 10
CLBLASINC=/usr/include
14 11
CLBLASLIB=/usr/lib/x86_64-linux-gnu
15
# CLBLASINC=/opt/clBLAS/src
16
# CLBLASLIB=/opt/clBLAS/src/library/
17 12

  
18
PATCHTHUNKING=patch_thunking.h
13
CUDALIB?=/usr/lib/x86_64-linux-gnu/
14
CUDAINC?=/usr/include
15
CUDASRC?=/usr/share/doc/nvidia-cuda-toolkit/examples/
16
THUNKING=fortran_thunking.c
17
CUDASRCINC=fortran_common.h
19 18

  
20 19
GSLINC=/usr/include/gsl
21 20

  
22
GOTO2=/opt/GotoBLAS2
21
# OPENBLAS=/usr/lib/x86_64-linux-gnu
22
# ATLAS=/usr/lib/x86_64-linux-gnu
23 23

  
24
#OPENBLAS=/usr/lib/openblas-base
25
OPENBLAS=/usr/lib/x86_64-linux-gnu
26

  
27 24
ACML=/opt/acml
28 25
ACMLINC=$(ACML)/gfortran64_mp/include
29 26
ACMLLIB=$(ACML)/gfortran64_mp/lib
30 27

  
31
ATLAS=/usr/lib/atlas-base
28
#EXECUTABLE=gsl cblas fblas openblas clblas cublas acml thunking
29
EXECUTABLE=gsl cblas fblas openblas clblas cublas thunking
32 30

  
33
#EXECUTABLE=cblas fblas gsl cublas thunking openblas acml clblas
34
#EXECUTABLE=cblas fblas gsl cublas thunking openblas clblas
35
#EXECUTABLE=gsl openblas clblas cublas thunking
36
#EXECUTABLE=openblas clblas cublas thunking
37
#EXECUTABLE=acml gsl cblas fblas openblas clblas cublas thunking
38
#EXECUTABLE=gsl cblas fblas openblas clblas cublas thunking
39
EXECUTABLE=gsl cblas openblas clblas cublas thunking
31
# FORMAT define the precision of Floating Point numbers
32
# FP32 for simple of 32 bits size
33
# FP64 for double of 64 bits size
34
FORMAT=FP64
35
#FORMAT=FP32
40 36

  
41
FORMAT=DOUBLE
42
#FORMAT=FLOAT
43

  
44 37
#DIRECTIVES=-D$(FORMAT) -DPRINT -DUNIT
45 38
#DIRECTIVES=-D$(FORMAT) -DUNIT -DRESULTS -DQUIET
46 39
DIRECTIVES=-DUNIT -DQUIET
......
49 42

  
50 43
cblas: $(SOURCE)
51 44

  
52
	$(CC) -L$(ATLAS) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DCBLAS $(LDFLAGS) \
45
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFP32 -DCBLAS $(LDFLAGS) \
53 46
		$(SOURCE) -latlas -lcblas -o $(SOURCE:.c=)_SP_$@
54 47

  
55
	$(CC) -L$(ATLAS) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DCBLAS $(LDFLAGS) \
48
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFP64 -DCBLAS $(LDFLAGS) \
56 49
		$(SOURCE) -latlas -lcblas -o $(SOURCE:.c=)_DP_$@
57 50

  
58
gotoblas: $(SOURCE)
59

  
60
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DCBLAS $(LDFLAGS) \
61
		$(SOURCE) $(GOTO2)/libgoto2.a -lpthread -o $(SOURCE:.c=)_SP_$@
62

  
63
	$(CC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DCBLAS $(LDFLAGS) \
64
		$(SOURCE) $(GOTO2)/libgoto2.a -lpthread -o $(SOURCE:.c=)_DP_$@
65

  
66 51
openblas: $(SOURCE)
67 52

  
68
	$(CC) $(CFLAGS) $(DIRECTIVES) -L/usr/include/openblas -fopenmp -DFLOAT -DCBLAS $(LDFLAGS) \
69
		$(SOURCE) $(OPENBLAS)/libopenblas.a -lpthread -o $(SOURCE:.c=)_SP_$@
53
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFP32 -DCBLAS $(LDFLAGS) \
54
		$(SOURCE) -lopenblas -lpthread -o $(SOURCE:.c=)_SP_$@
70 55

  
71
	$(CC) $(CFLAGS) $(DIRECTIVES) -L/usr/include/openblas -fopenmp -DDOUBLE -DCBLAS $(LDFLAGS) \
72
		$(SOURCE) $(OPENBLAS)/libopenblas.a -lpthread -o $(SOURCE:.c=)_DP_$@
56
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFP64 -DCBLAS $(LDFLAGS) \
57
		$(SOURCE) -lopenblas -lpthread -o $(SOURCE:.c=)_DP_$@
73 58

  
74 59
acml: $(SOURCE)
75 60

  
76
	$(CC) -I$(ACMLINC) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DACML $(LDFLAGS) \
61
	$(CC) -I$(ACMLINC) $(CFLAGS) $(DIRECTIVES) -DFP32 -DACML $(LDFLAGS) \
77 62
		$(SOURCE) -L$(ACMLLIB) -lacml_mp \
78 63
		-lgomp -lgfortran -lpthread -o $(SOURCE:.c=)_SP_$@
79 64

  
80
	$(CC) -I$(ACMLINC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DACML $(LDFLAGS) \
65
	$(CC) -I$(ACMLINC) $(CFLAGS) $(DIRECTIVES) -DFP64 -DACML $(LDFLAGS) \
81 66
		$(SOURCE) -L$(ACMLLIB) -lacml_mp \
82 67
		-lgomp -lgfortran -lpthread -o $(SOURCE:.c=)_DP_$@
83 68

  
84 69
fblas: $(SOURCE)
85 70

  
86
	$(CC) -L$(ATLAS) $(CFLAGS) $(DIRECTIVES) -DFLOAT -DFBLAS $(LDFLAGS) \
87
		$(SOURCE) -latlas -lf77blas -o $(SOURCE:.c=)_SP_$@
71
	# ATLAS version
72
	# $(CC) -L$(ATLAS) $(CFLAGS) $(DIRECTIVES) -DFP32 -DFBLAS $(LDFLAGS) \
73
	# 	$(SOURCE) -latlas -lf77blas -o $(SOURCE:.c=)_SP_$@
88 74

  
89
	$(CC) -L$(ATLAS) $(CFLAGS) $(DIRECTIVES) -DDOUBLE -DFBLAS $(LDFLAGS) \
90
		$(SOURCE) -latlas -lf77blas -o $(SOURCE:.c=)_DP_$@
75
	# $(CC) -L$(ATLAS) $(CFLAGS) $(DIRECTIVES) -DFP64 -DFBLAS $(LDFLAGS) \
76
	# 	$(SOURCE) -latlas -lf77blas -o $(SOURCE:.c=)_DP_$@
91 77

  
78
	# OpenBLAS version
79
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFP32 -DFBLAS $(LDFLAGS) \
80
		$(SOURCE) -lopenblas -lpthread -o $(SOURCE:.c=)_SP_$@
92 81

  
82
	$(CC) $(CFLAGS) $(DIRECTIVES) -DFP64 -DFBLAS $(LDFLAGS) \
83
		$(SOURCE) -lopenblas -lpthread -o $(SOURCE:.c=)_DP_$@
84

  
85

  
93 86
gsl: $(SOURCE)
94 87

  
95
	$(CC) -I$(GSLINC) $(CFLAGS) $(DIRECTIVES) -DFLOAT \
88
	$(CC) -I$(GSLINC) $(CFLAGS) $(DIRECTIVES) -DFP32 \
96 89
		-DGSL $(LDFLAGS) \
97 90
		$(SOURCE) -lgslcblas -o $(SOURCE:.c=)_SP_$@
98 91

  
99
	$(CC) -I$(GSLINC) $(CFLAGS) $(DIRECTIVES) -DDOUBLE \
92
	$(CC) -I$(GSLINC) $(CFLAGS) $(DIRECTIVES) -DFP64 \
100 93
		-DGSL $(LDFLAGS) \
101 94
		$(SOURCE) -lgslcblas -o $(SOURCE:.c=)_DP_$@
102 95

  
103 96
clblas: $(SOURCE)
104 97

  
105
	$(CC) -I$(CLBLASINC) -L$(CLBLASLIB) $(CFLAGS) -DFLOAT \
98
	$(CC) -I$(CLBLASINC) -L$(CLBLASLIB) $(CFLAGS) -DFP32 \
106 99
		-DCLBLAS $(LDFLAGS) \
107 100
		$(DIRECTIVES) $(SOURCE) -lclBLAS -lOpenCL -o $(SOURCE:.c=)_SP_$@
108 101

  
109
	$(CC) -I$(CLBLASINC) -L$(CLBLASLIB) $(CFLAGS) -DDOUBLE \
102
	$(CC) -I$(CLBLASINC) -L$(CLBLASLIB) $(CFLAGS) -DFP64 \
110 103
		-DCLBLAS $(LDFLAGS) \
111 104
		$(DIRECTIVES) $(SOURCE) -lclBLAS -lOpenCL -o $(SOURCE:.c=)_DP_$@
112 105

  
113 106
cublas: $(SOURCE)
114 107

  
115
	$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DFLOAT \
108
	$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DFP32 \
116 109
		-DCUBLAS $(LDFLAGS) \
117 110
		$(DIRECTIVES) $(SOURCE) -lcublas -o $(SOURCE:.c=)_SP_$@
118 111

  
119
	$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DDOUBLE \
112
	$(CC) -I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DFP64 \
120 113
		-DCUBLAS $(LDFLAGS) \
121 114
		$(DIRECTIVES) $(SOURCE) -lcublas -o $(SOURCE:.c=)_DP_$@
122 115

  
......
129 122
	$(CC) -DCUBLAS_GFORTRAN -I$(CUDAINC) $(CFLAGS) -c $(THUNKING)
130 123

  
131 124
	$(CC) -DCUBLAS_GFORTRAN \
132
		-I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DFLOAT -DTHUNKING \
125
		-I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DFP32 -DTHUNKING \
133 126
		$(LDFLAGS) $(DIRECTIVES) $(SOURCE) \
134 127
		$(THUNKING:.c=.o) -lcublas -o $(SOURCE:.c=)_SP_$@
135 128

  
136 129
	$(CC) -DCUBLAS_GFORTRAN \
137
		-I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DDOUBLE -DTHUNKING \
130
		-I$(CUDAINC) -L$(CUDALIB) $(CFLAGS) -DFP64 -DTHUNKING \
138 131
		$(LDFLAGS) $(DIRECTIVES) $(SOURCE) \
139 132
		$(THUNKING:.c=.o) -lcublas -o $(SOURCE:.c=)_DP_$@
140 133

  
BLAS/xGEMM/xGEMM.c (revision 250)
30 30
#include "fortran_common.h"
31 31
#include "fortran_thunking.h"
32 32
#elif FBLAS
33
#include <cblas.h>
34
#include <cblas_f77.h>
33
#include <f77blas.h>
35 34
#elif GSL
36 35
#include <gsl_cblas.h>
37 36
#elif ACML
......
43 42

  
44 43
#ifdef CLBLAS
45 44

  
46
#ifdef DOUBLE
45
#ifdef FP64
47 46
#define LENGTH cl_double
48 47
#else
49 48
#define LENGTH cl_float
......
51 50

  
52 51
#else
53 52

  
54
#ifdef DOUBLE
53
#ifdef FP64
55 54
#define LENGTH double
56 55
#else
57 56
#define LENGTH float
......
59 58

  
60 59
#endif
61 60

  
62
#ifdef FBLAS
61
/* #ifdef FBLAS */
63 62

  
64
#ifdef DOUBLE
63
/* #ifdef FP64 */
65 64

  
66
void F77_dgemm(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, 
67
	       const double *, FINT, const double *, double *, FINT);
65
/* void F77_dgemm(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT,  */
66
/* 	       const double *, FINT, const double *, double *, FINT); */
68 67

  
69
#else
68
/* #else */
70 69

  
71
void F77_sgemm(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, 
72
	       const float *, FINT, const float *, float *, FINT);
70
/* void F77_sgemm(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT,  */
71
/* 	       const float *, FINT, const float *, float *, FINT); */
73 72

  
74
#endif
75
#endif
73
/* #endif */
74
/* #endif */
76 75

  
77 76
/* Matrix with only defined triangular terms */
78 77
/* Even if there are 0 in matrix, must be defined at all ! */
......
312 311
  /* Get third timer after memory operation */
313 312
  gettimeofday(&tv3, &tz);
314 313

  
315
#ifdef DOUBLE
314
#ifdef FP64
316 315

  
317 316
  for (i=0;i<RUNS;i++)
318 317
    {
......
428 427
  /* Get third timer after memory operation */
429 428
  gettimeofday(&tv3, &tz);
430 429

  
431
#ifdef DOUBLE
430
#ifdef FP64
432 431

  
433 432
  for (i=0;i<RUNS;i++)
434 433
    {
......
473 472
  printf("Using CuBLAS/Thunking: %i iterations for %ix%i matrix\n",
474 473
	 RUNS,dim,dim);
475 474

  
476
#ifdef DOUBLE
475
#ifdef FP64
477 476

  
478 477
  for (i=0;i<RUNS;i++)
479 478
    {      
......
503 502
  printf("Using FBLAS: %i iterations for %ix%i matrix\n",
504 503
	 RUNS,dim,dim);
505 504
  
506
#ifdef DOUBLE
505
#ifdef FP64
507 506

  
508 507
  for (i=0;i<RUNS;i++)
509 508
    {    
510
      F77_dgemm(&transa,&transa,&dim,&dim,&dim,&alpha,B,&dim,A,&dim,&beta,C,&dim);
511
      F77_dgemm(&transb,&transb,&dim,&dim,&dim,&alpha,A,&dim,B,&dim,&beta,D,&dim);
509
      dgemm_(&transa,&transa,&dim,&dim,&dim,&alpha,B,&dim,A,&dim,&beta,C,&dim);
510
      dgemm_(&transb,&transb,&dim,&dim,&dim,&alpha,A,&dim,B,&dim,&beta,D,&dim);
512 511
    }
513 512

  
514 513
#else
515 514

  
516 515
  for (i=0;i<RUNS;i++)
517 516
    {    
518
      F77_sgemm(&transa,&transa,&dim,&dim,&dim,&alpha,B,&dim,A,&dim,&beta,C,&dim);
519
      F77_sgemm(&transb,&transb,&dim,&dim,&dim,&alpha,A,&dim,B,&dim,&beta,D,&dim);
517
      sgemm_(&transa,&transa,&dim,&dim,&dim,&alpha,B,&dim,A,&dim,&beta,C,&dim);
518
      sgemm_(&transb,&transb,&dim,&dim,&dim,&alpha,A,&dim,B,&dim,&beta,D,&dim);
520 519
    }
521 520

  
522 521
#endif
......
529 528
  printf("Using ACML: %i iterations for %ix%i matrix\n",
530 529
	 RUNS,dim,dim);
531 530
  
532
#ifdef DOUBLE
531
#ifdef FP64
533 532

  
534 533
  for (i=0;i<RUNS;i++)
535 534
    {    
......
558 557
     NonUnit : Matrix is not unit
559 558
   */
560 559

  
561
#ifdef DOUBLE
560
#ifdef FP64
562 561

  
563 562
  for (i=0;i<RUNS;i++)
564 563
    {  
......
591 590
     NonUnit : Matrix is not unit
592 591
   */
593 592

  
594
#ifdef DOUBLE
593
#ifdef FP64
595 594

  
596 595
  for (i=0;i<RUNS;i++)
597 596
    {  

Formats disponibles : Unified diff