root / BLAS / xTRSV / xTRSV.c @ 1
Historique | Voir | Annoter | Télécharger (19,31 ko)
1 |
/*
|
---|---|
2 |
Performs a linear system solving of random generated system
|
3 |
Estimates a test
|
4 |
|
5 |
Matrix is triangular
|
6 |
|
7 |
Thanks for help from aurel32@debian.org
|
8 |
*/
|
9 |
|
10 |
#include <stdio.h> |
11 |
#include <math.h> |
12 |
#include <stdlib.h> |
13 |
#include <sys/time.h> |
14 |
#include <string.h> |
15 |
|
16 |
#ifdef CUBLAS
|
17 |
#include <cublas.h> |
18 |
#define CUBLAS_WRAPPER_ERROR_NOERR 0 |
19 |
#define CUBLAS_WRAPPER_ERROR_ALLOC 1 |
20 |
#define CUBLAS_WRAPPER_ERROR_SET 2 |
21 |
#define CUBLAS_WRAPPER_ERROR_GET 3 |
22 |
#define CUBLAS_WRAPPER_ERROR_STUB 4 |
23 |
#elif THUNKING
|
24 |
#include <cublas.h> |
25 |
#elif FBLAS
|
26 |
#include <cblas_f77.h> |
27 |
#elif GSL
|
28 |
#include <gsl_cblas.h> |
29 |
#elif ACML
|
30 |
#include <acml.h> |
31 |
#include <acml_mv.h> |
32 |
#else
|
33 |
#include <cblas.h> |
34 |
#endif
|
35 |
|
36 |
#ifdef DOUBLE
|
37 |
#define LENGTH double |
38 |
#else
|
39 |
#define LENGTH float |
40 |
#endif
|
41 |
|
42 |
#ifdef THUNKING
|
43 |
/* WARNING !
|
44 |
Prototypes from fortran.c functions used MUST be defined here !
|
45 |
*/
|
46 |
#include "fortran_thunking.h" |
47 |
|
48 |
/*
|
49 |
#ifdef DOUBLE
|
50 |
|
51 |
void CUBLAS_DCOPY (const int *n, const double *x, const int *incx, double *y,
|
52 |
const int *incy);
|
53 |
|
54 |
double CUBLAS_DNRM2 (const int *dim, const double *X, const int *incx);
|
55 |
|
56 |
void CUBLAS_DTRSV (const char *uplo, const char *trans, const char *diag,
|
57 |
const int *n, const double *A, const int *lda, double *x,
|
58 |
const int *incx);
|
59 |
|
60 |
void CUBLAS_DGEMV (const char *trans, const int *m, const int *n,
|
61 |
const double *alpha, const double *A, const int *lda,
|
62 |
const double *x, const int *incx, const double *beta,
|
63 |
double *y, const int *incy);
|
64 |
|
65 |
void CUBLAS_DSWAP (const int *n, double *x, const int *incx, double *y,
|
66 |
const int *incy);
|
67 |
|
68 |
void CUBLAS_DAXPY (const int *n, const double *alpha, const double *x,
|
69 |
const int *incx, double *y, const int *incy);
|
70 |
|
71 |
#else
|
72 |
void CUBLAS_SCOPY (const int *n, const float *x, const int *incx, float *y,
|
73 |
const int *incy);
|
74 |
|
75 |
float CUBLAS_SNRM2 (const int *dim, const float *X, const int *incx);
|
76 |
|
77 |
void CUBLAS_STRSV (const char *uplo, const char *trans, const char *diag,
|
78 |
const int *n, const float *A, const int *lda, float *x,
|
79 |
const int *incx);
|
80 |
|
81 |
void CUBLAS_SGEMV (const char *trans, const int *m, const int *n,
|
82 |
const float *alpha, const float *A, const int *lda,
|
83 |
const float *x, const int *incx, const float *beta,
|
84 |
float *y, const int *incy);
|
85 |
|
86 |
void CUBLAS_SSWAP (const int *n, float *x, const int *incx, float *y,
|
87 |
const int *incy);
|
88 |
|
89 |
void CUBLAS_SAXPY (const int *n, const float *alpha, const float *x,
|
90 |
const int *incx, float *y, const int *incy);
|
91 |
|
92 |
#endif
|
93 |
*/
|
94 |
|
95 |
#elif FBLAS
|
96 |
|
97 |
#ifdef DOUBLE
|
98 |
|
99 |
void dtrsv_( FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT); |
100 |
|
101 |
void dgemv_(FCHAR, FINT, FINT, const double *, const double *, FINT, |
102 |
const double *, FINT, const double *, double *, FINT); |
103 |
|
104 |
void dswap_( FINT, double *, FINT, double *, FINT); |
105 |
|
106 |
void daxpy_( FINT, const double *, const double *, FINT, double *, FINT); |
107 |
|
108 |
void dnrm2_( FINT, const double *, FINT, double *); |
109 |
|
110 |
#else
|
111 |
|
112 |
void strsv_( FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT); |
113 |
|
114 |
void sgemv_(FCHAR, FINT, FINT, const float *, const float *, FINT, |
115 |
const float *, FINT, const float *, float *, FINT); |
116 |
|
117 |
void sswap_( FINT, float *, FINT, float *, FINT); |
118 |
|
119 |
void saxpy_( FINT, const float *, const float *, FINT, float *, FINT); |
120 |
|
121 |
void snrm2_( FINT, const float *, FINT, float *); |
122 |
|
123 |
#endif
|
124 |
|
125 |
#endif
|
126 |
|
127 |
/* Matrix with only defined triangular terms */
|
128 |
/* Even if there are 0 in matrix, must be defined at all ! */
|
129 |
|
130 |
/* Get from fortran.c */
|
131 |
|
132 |
#ifdef CUBLAS
|
133 |
static char *errMsg[5] = |
134 |
{ |
135 |
"no error",
|
136 |
"allocation error",
|
137 |
"setVector/setMatrix error",
|
138 |
"getVector/getMatrix error",
|
139 |
"not implemented"
|
140 |
}; |
141 |
|
142 |
static void wrapperError (const char *funcName, int error) |
143 |
{ |
144 |
printf ("cublas%s wrapper: %s\n", funcName, errMsg[error]);
|
145 |
fflush (stdout); |
146 |
} |
147 |
#endif
|
148 |
|
149 |
int printVector(const int dimVector,const LENGTH *dataVector, |
150 |
char *nameVector,char *mesgVector) |
151 |
{ |
152 |
#ifndef QUIET
|
153 |
|
154 |
int i;
|
155 |
printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
156 |
for (i=0;i<dimVector;i++) |
157 |
{ |
158 |
printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
|
159 |
} |
160 |
#endif
|
161 |
|
162 |
return 0; |
163 |
} |
164 |
|
165 |
int printResults(const int dimVector,const LENGTH *dataVector, |
166 |
char *nameVector,char *mesgVector) |
167 |
{ |
168 |
#ifdef RESULTS
|
169 |
int i;
|
170 |
|
171 |
printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
172 |
for (i=0;i<dimVector;i++) |
173 |
{ |
174 |
printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
|
175 |
} |
176 |
#endif
|
177 |
return 0; |
178 |
} |
179 |
|
180 |
#ifdef CUBLAS
|
181 |
int printVectorGPU(const int dimVector,const LENGTH *dataVector, |
182 |
char *nameVector,char *mesgVector) |
183 |
{ |
184 |
#ifndef QUIET
|
185 |
int i;
|
186 |
cublasStatus stat; |
187 |
LENGTH *P=0;
|
188 |
int incx=1; |
189 |
|
190 |
P=malloc(dimVector*sizeof(LENGTH));
|
191 |
|
192 |
stat=cublasGetVector(dimVector,sizeof(P[0]),dataVector,incx,P,incx); |
193 |
|
194 |
if (stat != CUBLAS_STATUS_SUCCESS) {
|
195 |
wrapperError ("ToGet", CUBLAS_WRAPPER_ERROR_GET);
|
196 |
} |
197 |
|
198 |
printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
199 |
for (i=0;i<dimVector;i++) |
200 |
{ |
201 |
printf("%s[%i]=%2.10e\n",nameVector,i,P[i]);
|
202 |
} |
203 |
|
204 |
free(P); |
205 |
#endif
|
206 |
|
207 |
return 0; |
208 |
} |
209 |
#endif
|
210 |
|
211 |
int bench(int dim,int RUNS) |
212 |
{ |
213 |
/*
|
214 |
int dim=1000;
|
215 |
int RUNS=100;
|
216 |
*/
|
217 |
int incx=1; |
218 |
#ifdef PRINT
|
219 |
LENGTH factor=1.;
|
220 |
#endif
|
221 |
|
222 |
LENGTH alpha=1.,beta=0.,beta2=-1.; |
223 |
LENGTH *A,*X,*Y; |
224 |
|
225 |
/* checkBefore checkAfter checks */
|
226 |
LENGTH *checksA,*checksB; |
227 |
|
228 |
int i=0, j=0; |
229 |
|
230 |
double duration;
|
231 |
|
232 |
struct timeval tv1,tv2;
|
233 |
struct timezone tz;
|
234 |
|
235 |
/* Create 1 Matrix and 2 Vectors of dimension dim */
|
236 |
|
237 |
A=malloc(dim*dim*sizeof(LENGTH));
|
238 |
X=malloc(dim*sizeof(LENGTH));
|
239 |
Y=malloc(dim*sizeof(LENGTH));
|
240 |
|
241 |
/* Create 2 vectors for checker Before and After */
|
242 |
|
243 |
checksA=malloc(RUNS*sizeof(double)); |
244 |
checksB=malloc(RUNS*sizeof(double)); |
245 |
|
246 |
/* Initialize elements with random numbers */
|
247 |
/* Initialize the seed for rand() */
|
248 |
/* srand(time()); */
|
249 |
|
250 |
#ifdef UNIT
|
251 |
/* Fill the matrix and vector with random numbers */
|
252 |
for (i=0; i<dim; i++) { |
253 |
for (j=0; j<dim; j++) |
254 |
if (j>=i)
|
255 |
{ |
256 |
/* Normalization is necessary to avoid problems */
|
257 |
A[i*dim+j]=1.;
|
258 |
} |
259 |
else
|
260 |
{ |
261 |
A[i*dim+j]=0.;
|
262 |
} |
263 |
X[i]=1;
|
264 |
} |
265 |
#else
|
266 |
for (i=0; i<dim; i++) { |
267 |
for (j=0; j<dim; j++) |
268 |
if (j>i)
|
269 |
{ |
270 |
/* Normalization is necessary to avoid problems */
|
271 |
A[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.)
|
272 |
*(LENGTH)(i+1.)/(LENGTH)(j+1.); |
273 |
} |
274 |
else if (j==i) |
275 |
{ |
276 |
A[i*dim+j]=1.;
|
277 |
} |
278 |
else
|
279 |
{ |
280 |
A[i*dim+j]=0.;
|
281 |
} |
282 |
X[i]=(LENGTH)rand()/(RAND_MAX+1.);
|
283 |
} |
284 |
#endif
|
285 |
|
286 |
/* Print the matrix */
|
287 |
|
288 |
#ifdef QUIET
|
289 |
#else
|
290 |
for (i=0; i<dim; i++) { |
291 |
for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]); |
292 |
printf("\tX[%i]=%1.5f ", i,X[i]);
|
293 |
putchar('\n');
|
294 |
} |
295 |
putchar('\n');
|
296 |
#endif
|
297 |
|
298 |
/* Get first timer before launching */
|
299 |
gettimeofday(&tv1, &tz); |
300 |
|
301 |
/* Compute with CuBLAS library */
|
302 |
#ifdef CUBLAS
|
303 |
LENGTH *devPtrA=0, *devPtrX=0, *devPtrY=0; |
304 |
cublasStatus stat1, stat2, stat3; |
305 |
struct timeval tv3,tv4;
|
306 |
|
307 |
/* Order is Row */
|
308 |
/* Have to swap uplo and trans */
|
309 |
char uplo='L',trans='T',diag='N'; |
310 |
|
311 |
printf("Using CuBLAS: %i iterations for %ix%i matrix\n",
|
312 |
RUNS,dim,dim); |
313 |
|
314 |
stat1=cublasAlloc(dim*dim,sizeof(devPtrA[0]),(void**)&devPtrA); |
315 |
stat2=cublasAlloc(dim,sizeof(devPtrX[0]),(void**)&devPtrX); |
316 |
stat3=cublasAlloc(dim,sizeof(devPtrY[0]),(void**)&devPtrY); |
317 |
|
318 |
if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
|
319 |
(stat2 != CUBLAS_STATUS_SUCCESS) || |
320 |
(stat3 != CUBLAS_STATUS_SUCCESS)) { |
321 |
wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_ALLOC);
|
322 |
cublasFree (devPtrA); |
323 |
cublasFree (devPtrX); |
324 |
cublasFree (devPtrY); |
325 |
return 1; |
326 |
} |
327 |
|
328 |
stat1=cublasSetMatrix(dim,dim,sizeof(A[0]),A,dim,devPtrA,dim); |
329 |
stat2=cublasSetVector(dim,sizeof(X[0]),X,incx,devPtrX,incx); |
330 |
stat3=cublasSetVector(dim,sizeof(Y[0]),Y,incx,devPtrY,incx); |
331 |
|
332 |
if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
|
333 |
(stat2 != CUBLAS_STATUS_SUCCESS) || |
334 |
(stat3 != CUBLAS_STATUS_SUCCESS)) { |
335 |
wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_SET);
|
336 |
cublasFree (devPtrA); |
337 |
cublasFree (devPtrX); |
338 |
cublasFree (devPtrY); |
339 |
return 1; |
340 |
} |
341 |
|
342 |
/* Get third timer after memory operation */
|
343 |
gettimeofday(&tv3, &tz); |
344 |
|
345 |
for (i=0;i<RUNS;i++) |
346 |
{ |
347 |
#ifdef DOUBLE
|
348 |
|
349 |
printVectorGPU(dim,devPtrX,"X","Roots"); |
350 |
|
351 |
/* Multiply Y <- A.X */
|
352 |
cublasDgemv(trans,dim,dim,alpha,devPtrA,dim, |
353 |
devPtrX,incx,beta,devPtrY,incx); |
354 |
|
355 |
printVectorGPU(dim,devPtrY,"Y","Results"); |
356 |
|
357 |
/* Solve linear system A.X=Y : Y <- A-1.Y */
|
358 |
cublasDtrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx); |
359 |
|
360 |
printVectorGPU(dim,devPtrY,"Y","Solutions"); |
361 |
|
362 |
/* Estimate the difference between X and Y : Y <- -Y+X */
|
363 |
cublasDaxpy(dim,beta2,devPtrY,incx,devPtrX,incx); |
364 |
|
365 |
printVectorGPU(dim,devPtrX,"X","Errors"); |
366 |
|
367 |
/* Estimate the second checker */
|
368 |
/* checksA[i]=(double)cublasDnrm2(dim,devPtrX,incx); */
|
369 |
|
370 |
/* Swap vector X and Y */
|
371 |
cublasDswap(dim,devPtrX,incx,devPtrY,incx); |
372 |
|
373 |
#else
|
374 |
|
375 |
printVectorGPU(dim,devPtrX,"X","Roots"); |
376 |
|
377 |
/* Multiply Y <- A.X */
|
378 |
cublasSgemv(trans,dim,dim,alpha,devPtrA,dim, |
379 |
devPtrX,incx,beta,devPtrY,incx); |
380 |
|
381 |
printVectorGPU(dim,devPtrY,"Y","Results"); |
382 |
|
383 |
/* Solve linear system Y <- A-1.Y */
|
384 |
cublasStrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx); |
385 |
|
386 |
printVectorGPU(dim,devPtrY,"Y","Solutions"); |
387 |
|
388 |
/* Add vectors X and -Y */
|
389 |
cublasSaxpy(dim,beta2,devPtrY,incx,devPtrX,incx); |
390 |
|
391 |
printVectorGPU(dim,devPtrX,"X","Errors"); |
392 |
|
393 |
/* Estimate the second checker */
|
394 |
/* checksA[i]=(double)cublasSnrm2(dim,devPtrX,incx); */
|
395 |
|
396 |
/* Swap vector X and Y */
|
397 |
cublasSswap(dim,devPtrX,incx,devPtrY,incx); |
398 |
|
399 |
#endif
|
400 |
|
401 |
} |
402 |
|
403 |
stat1=cublasGetMatrix(dim,dim,sizeof(A[0]),devPtrA,dim,A,dim); |
404 |
stat2=cublasGetVector(dim,sizeof(X[0]),devPtrX,incx,X,incx); |
405 |
stat3=cublasGetVector(dim,sizeof(Y[0]),devPtrY,incx,Y,incx); |
406 |
|
407 |
cublasFree (devPtrA); |
408 |
cublasFree (devPtrX); |
409 |
cublasFree (devPtrY); |
410 |
|
411 |
if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
|
412 |
(stat2 != CUBLAS_STATUS_SUCCESS) || |
413 |
(stat3 != CUBLAS_STATUS_SUCCESS)) { |
414 |
wrapperError ("LinearSystem", CUBLAS_WRAPPER_ERROR_GET);
|
415 |
} |
416 |
|
417 |
/* Get fourth timer after memory free */
|
418 |
gettimeofday(&tv4, &tz); |
419 |
|
420 |
#elif THUNKING
|
421 |
|
422 |
/* Order is Row : Have to swap uplo='U' and trans='N' */
|
423 |
char uplo='L',trans='T',diag='N'; |
424 |
printf("Using CuBLAS/Thunking: %i iterations for %ix%i matrix\n",
|
425 |
RUNS,dim,dim); |
426 |
|
427 |
for (i=0;i<RUNS;i++) |
428 |
{ |
429 |
#ifdef DOUBLE
|
430 |
|
431 |
printVector(dim,X,"X","Roots"); |
432 |
|
433 |
/* Multiply A by X as Y <- A.X */
|
434 |
CUBLAS_DGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
435 |
|
436 |
printVector(dim,Y,"Y","Results"); |
437 |
|
438 |
/* Solve linear system */
|
439 |
CUBLAS_DTRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); |
440 |
|
441 |
printVector(dim,Y,"Y","Solutions"); |
442 |
|
443 |
/* Compare the roots X and Y */
|
444 |
CUBLAS_DAXPY(&dim,&beta2,Y,&incx,X,&incx); |
445 |
|
446 |
printVector(dim,X,"X","Errors"); |
447 |
|
448 |
/* Store the checker of errors */
|
449 |
/* checksA[i]=(double)CUBLAS_DNRM2(&dim,X,&incx); */
|
450 |
|
451 |
/* Swap vector X and Y */
|
452 |
CUBLAS_DSWAP(&dim,X,&incx,Y,&incx); |
453 |
#else
|
454 |
|
455 |
printVector(dim,X,"X","Roots"); |
456 |
|
457 |
/* Multiply A by X as Y <- A.X */
|
458 |
CUBLAS_SGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
459 |
|
460 |
printVector(dim,Y,"Y","Results"); |
461 |
|
462 |
/* Solve linear system */
|
463 |
CUBLAS_STRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); |
464 |
|
465 |
printVector(dim,Y,"Y","Solutions"); |
466 |
|
467 |
/* Compare the roots X and Y */
|
468 |
CUBLAS_SAXPY(&dim,&beta2,Y,&incx,X,&incx); |
469 |
|
470 |
printVector(dim,X,"X","Errors"); |
471 |
|
472 |
/* Store the checker of errors */
|
473 |
/* checksA[i]=(double)CUBLAS_SNRM2(&dim,X,&incx); */
|
474 |
|
475 |
/* Swap vector X and Y */
|
476 |
CUBLAS_SSWAP(&dim,X,&incx,Y,&incx); |
477 |
#endif
|
478 |
|
479 |
#ifdef PRINT
|
480 |
printf("Iteration %i, checker is %2.5f and error is %2.10f\n",
|
481 |
i,checksA[i],fabs(checksB[i]-checksA[i])/factor); |
482 |
#endif
|
483 |
} |
484 |
|
485 |
#elif FBLAS
|
486 |
|
487 |
/* Order is Row : Have to swap uplo='U' and trans='N' */
|
488 |
char uplo='L',trans='T',diag='N'; |
489 |
|
490 |
printf("Using FBLAS: %i iterations for %ix%i matrix\n",
|
491 |
RUNS,dim,dim); |
492 |
|
493 |
for (i=0;i<RUNS;i++) |
494 |
{ |
495 |
#ifdef DOUBLE
|
496 |
|
497 |
printVector(dim,X,"X","Roots"); |
498 |
|
499 |
/* Multiply A by X as Y <- A.X */
|
500 |
dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
501 |
|
502 |
printVector(dim,Y,"Y","Results"); |
503 |
|
504 |
/* Solve linear system */
|
505 |
dtrsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); |
506 |
|
507 |
printVector(dim,Y,"Y","Solutions"); |
508 |
|
509 |
/* Compare the roots X and Y */
|
510 |
daxpy_(&dim,&beta2,Y,&incx,X,&incx); |
511 |
|
512 |
printVector(dim,X,"X","Errors"); |
513 |
|
514 |
/* Store the checker of errors */
|
515 |
/* dnrm2_(&dim,X,&incx,&checksA[i]); */
|
516 |
|
517 |
/* Swap vector X and Y */
|
518 |
dswap_(&dim,X,&incx,Y,&incx); |
519 |
|
520 |
#else
|
521 |
|
522 |
printVector(dim,X,"X","Roots"); |
523 |
|
524 |
/* Multiply A by X as Y <- A.X */
|
525 |
sgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
526 |
|
527 |
printVector(dim,Y,"Y","Results"); |
528 |
|
529 |
/* Solve linear system */
|
530 |
strsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); |
531 |
|
532 |
printVector(dim,Y,"Y","Solutions"); |
533 |
|
534 |
/* Compare the roots X and Y */
|
535 |
saxpy_(&dim,&beta2,Y,&incx,X,&incx); |
536 |
|
537 |
printVector(dim,X,"X","Errors"); |
538 |
|
539 |
/* Store the checker of errors */
|
540 |
/* snrm2_(&dim,X,&incx,&checksA[i]); */
|
541 |
|
542 |
/* Swap vector X and Y */
|
543 |
sswap_(&dim,X,&incx,Y,&incx); |
544 |
#endif
|
545 |
|
546 |
} |
547 |
|
548 |
#elif ACML
|
549 |
|
550 |
/* Order is Row : Have to swap uplo='U' and trans='N' */
|
551 |
char uplo='L',trans='T',diag='N'; |
552 |
|
553 |
printf("Using ACML: %i iterations for %ix%i matrix\n",
|
554 |
RUNS,dim,dim); |
555 |
|
556 |
for (i=0;i<RUNS;i++) |
557 |
{ |
558 |
#ifdef DOUBLE
|
559 |
|
560 |
printVector(dim,X,"X","Roots"); |
561 |
|
562 |
/* Multiply A by X as Y <- A.X */
|
563 |
dgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
564 |
|
565 |
printVector(dim,Y,"Y","Results"); |
566 |
|
567 |
/* Solve linear system */
|
568 |
dtrsv(uplo,trans,diag,dim,A,dim,Y,incx); |
569 |
|
570 |
printVector(dim,Y,"Y","Solutions"); |
571 |
|
572 |
/* Compare the roots X and Y */
|
573 |
daxpy(dim,beta2,Y,incx,X,incx); |
574 |
|
575 |
printVector(dim,X,"X","Errors"); |
576 |
|
577 |
/* Store the checker of errors */
|
578 |
/* dnrm2_(&dim,X,&incx,&checksA[i]); */
|
579 |
|
580 |
/* Swap vector X and Y */
|
581 |
dswap(dim,X,incx,Y,incx); |
582 |
|
583 |
#else
|
584 |
|
585 |
printVector(dim,X,"X","Roots"); |
586 |
|
587 |
/* Multiply A by X as Y <- A.X */
|
588 |
sgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
589 |
|
590 |
printVector(dim,Y,"Y","Results"); |
591 |
|
592 |
/* Solve linear system */
|
593 |
strsv(uplo,trans,diag,dim,A,dim,Y,incx); |
594 |
|
595 |
printVector(dim,Y,"Y","Solutions"); |
596 |
|
597 |
/* Compare the roots X and Y */
|
598 |
saxpy(dim,beta2,Y,incx,X,incx); |
599 |
|
600 |
printVector(dim,X,"X","Errors"); |
601 |
|
602 |
/* Store the checker of errors */
|
603 |
/* snrm2_(&dim,X,&incx,&checksA[i]); */
|
604 |
|
605 |
/* Swap vector X and Y */
|
606 |
sswap(dim,X,incx,Y,incx); |
607 |
#endif
|
608 |
|
609 |
} |
610 |
|
611 |
#elif GSL
|
612 |
|
613 |
printf("Using GSL: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
|
614 |
|
615 |
/*
|
616 |
RowMajor : Matrix is read row by row
|
617 |
Upper : the no null elements are on top
|
618 |
NoTrans : no transposition before estimation
|
619 |
NonUnit : Matrix is not unit
|
620 |
*/
|
621 |
|
622 |
for (i=0;i<RUNS;i++) |
623 |
{ |
624 |
|
625 |
#ifdef DOUBLE
|
626 |
|
627 |
printVector(dim,X,"X","Roots"); |
628 |
|
629 |
/* Multiply A by X as Y <- A.X */
|
630 |
cblas_dgemv(CblasRowMajor,CblasNoTrans, |
631 |
dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
632 |
|
633 |
printVector(dim,Y,"Y","Results"); |
634 |
|
635 |
/* Solve linear system : Y <- A-1.Y */
|
636 |
cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit, |
637 |
dim,A,dim,Y,incx); |
638 |
|
639 |
printVector(dim,Y,"Y","Solutions"); |
640 |
|
641 |
cblas_daxpy(dim,beta2,Y,incx,X,incx); |
642 |
|
643 |
printVector(dim,X,"X","Errors"); |
644 |
|
645 |
/* Store the checker of errors */
|
646 |
/* checksA[i]=(double)cblas_dnrm2(dim,X,incx); */
|
647 |
|
648 |
cblas_dswap(dim,X,incx,Y,incx); |
649 |
|
650 |
#else
|
651 |
|
652 |
printVector(dim,X,"X","Roots"); |
653 |
|
654 |
/* Multiply A by X as Y <- A.X */
|
655 |
cblas_sgemv(CblasRowMajor,CblasNoTrans, |
656 |
dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
657 |
|
658 |
printVector(dim,Y,"Y","Results"); |
659 |
|
660 |
/* Solve linear system : Y <- A-1.Y */
|
661 |
cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit, |
662 |
dim,A,dim,Y,incx); |
663 |
|
664 |
printVector(dim,Y,"Y","Solutions"); |
665 |
|
666 |
cblas_saxpy(dim,beta2,Y,incx,X,incx); |
667 |
|
668 |
printVector(dim,X,"X","Errors"); |
669 |
|
670 |
/* Store the checker of errors */
|
671 |
/* checksA[i]=(double)cblas_snrm2(dim,X,incx); */
|
672 |
|
673 |
cblas_sswap(dim,X,incx,Y,incx); |
674 |
|
675 |
#endif
|
676 |
|
677 |
} |
678 |
#else
|
679 |
|
680 |
printf("Using CBLAS: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
|
681 |
|
682 |
/*
|
683 |
RowMajor : Matrix is read row bu row
|
684 |
Upper : the no null elements are on top
|
685 |
NoTrans : no transposition before estimation
|
686 |
NonUnit : Matrix is not unit
|
687 |
*/
|
688 |
|
689 |
for (i=0;i<RUNS;i++) |
690 |
{ |
691 |
|
692 |
#ifdef DOUBLE
|
693 |
|
694 |
printVector(dim,X,"X","Roots"); |
695 |
|
696 |
/* Multiply A by X as Y <- A.X */
|
697 |
cblas_dgemv(CblasRowMajor,CblasNoTrans, |
698 |
dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
699 |
|
700 |
printVector(dim,Y,"Y","Results"); |
701 |
|
702 |
/* Solve linear system : Y <- A-1.Y */
|
703 |
cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit, |
704 |
dim,A,dim,Y,incx); |
705 |
|
706 |
printVector(dim,Y,"Y","Solutions"); |
707 |
|
708 |
cblas_daxpy(dim,beta2,Y,incx,X,incx); |
709 |
|
710 |
printVector(dim,X,"X","Errors"); |
711 |
|
712 |
/* Store the checker of errors */
|
713 |
/* checksA[i]=(double)cblas_dnrm2(dim,X,incx); */
|
714 |
|
715 |
cblas_dswap(dim,X,incx,Y,incx); |
716 |
|
717 |
#else
|
718 |
|
719 |
printVector(dim,X,"X","Roots"); |
720 |
|
721 |
/* Multiply A by X as Y <- A.X */
|
722 |
cblas_sgemv(CblasRowMajor,CblasNoTrans, |
723 |
dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
724 |
|
725 |
printVector(dim,Y,"Y","Results"); |
726 |
|
727 |
/* Solve linear system : Y <- A-1.Y */
|
728 |
cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit, |
729 |
dim,A,dim,Y,incx); |
730 |
|
731 |
printVector(dim,Y,"Y","Solutions"); |
732 |
|
733 |
cblas_saxpy(dim,beta2,Y,incx,X,incx); |
734 |
|
735 |
printVector(dim,X,"X","Errors"); |
736 |
|
737 |
/* Store the checker of errors */
|
738 |
/* checksA[i]=(double)cblas_snrm2(dim,X,incx); */
|
739 |
|
740 |
cblas_sswap(dim,X,incx,Y,incx); |
741 |
|
742 |
#endif
|
743 |
|
744 |
} |
745 |
#endif
|
746 |
putchar('\n');
|
747 |
|
748 |
/* Get second timer after launching */
|
749 |
gettimeofday(&tv2, &tz); |
750 |
|
751 |
#ifdef CUBLAS
|
752 |
double memoryIn,memoryOut;
|
753 |
|
754 |
memoryIn=(double)((tv3.tv_sec-tv1.tv_sec) * 1000000L + \ |
755 |
(tv3.tv_usec-tv1.tv_usec))/1000000.; |
756 |
|
757 |
memoryOut=(double)((tv2.tv_sec-tv4.tv_sec) * 1000000L + \ |
758 |
(tv2.tv_usec-tv4.tv_usec))/1000000.; |
759 |
|
760 |
duration=(double)((tv4.tv_sec-tv3.tv_sec) * 1000000L + \ |
761 |
(tv4.tv_usec-tv3.tv_usec))/1000000./RUNS; |
762 |
|
763 |
printf("Duration of memory allocation : %2.10f s\n",memoryIn);
|
764 |
printf("Duration of memory free : %2.10f s\n",memoryOut);
|
765 |
#else
|
766 |
duration=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L + \ |
767 |
(tv2.tv_usec-tv1.tv_usec))/1000000./RUNS; |
768 |
|
769 |
#endif
|
770 |
|
771 |
printf("Duration of each cycle : %2.10f s\n",duration);
|
772 |
|
773 |
printResults(RUNS,checksA,"C","Errors cumulated"); |
774 |
|
775 |
putchar('\n');
|
776 |
|
777 |
/*
|
778 |
#ifdef PRINT
|
779 |
for (i=0; i<dim; i++) {
|
780 |
for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]);
|
781 |
putchar('\n');
|
782 |
}
|
783 |
|
784 |
for (i=0; i<dim; i++) {
|
785 |
printf("X[%i]=%2.5f",i,X[i]);
|
786 |
putchar('\n');
|
787 |
}
|
788 |
putchar('\n');
|
789 |
for (i=0; i<dim; i++) {
|
790 |
printf("Y[%i]=%2.5f",i,Y[i]);
|
791 |
putchar('\n');
|
792 |
}
|
793 |
#endif
|
794 |
*/
|
795 |
|
796 |
return 0; |
797 |
} |
798 |
|
799 |
int main(int argc,char **argv) |
800 |
{ |
801 |
if ((argc==1)|| |
802 |
(strcmp(argv[1],"-h")==0)|| |
803 |
(strcmp(argv[1],"--help")==0)) |
804 |
{ |
805 |
printf("\nPerforms a bench using BLAS library implementation:\n\n"
|
806 |
"\t#1 Size on triangular system\n"
|
807 |
"\t#2 Number of iterations\n\n");
|
808 |
} |
809 |
else if ((atoi(argv[1])>=2)&& |
810 |
(atoi(argv[2])>=1)) |
811 |
{ |
812 |
bench(atoi(argv[1]),atoi(argv[2])); |
813 |
} |
814 |
|
815 |
return 0; |
816 |
} |