root / BLAS / xTRSV / xTRSV.c @ 7
Historique | Voir | Annoter | Télécharger (17,49 ko)
1 |
/*
|
---|---|
2 |
Performs a linear system solving of random generated system
|
3 |
Estimates a test
|
4 |
|
5 |
Matrix is triangular
|
6 |
|
7 |
Thanks for help from aurel32@debian.org
|
8 |
*/
|
9 |
|
10 |
#include <stdio.h> |
11 |
#include <math.h> |
12 |
#include <stdlib.h> |
13 |
#include <sys/time.h> |
14 |
#include <string.h> |
15 |
|
16 |
#ifdef CUBLAS
|
17 |
#include <cublas.h> |
18 |
#define CUBLAS_WRAPPER_ERROR_NOERR 0 |
19 |
#define CUBLAS_WRAPPER_ERROR_ALLOC 1 |
20 |
#define CUBLAS_WRAPPER_ERROR_SET 2 |
21 |
#define CUBLAS_WRAPPER_ERROR_GET 3 |
22 |
#define CUBLAS_WRAPPER_ERROR_STUB 4 |
23 |
#elif THUNKING
|
24 |
#include <cublas.h> |
25 |
#include "fortran_common.h" |
26 |
#include "fortran_thunking.h" |
27 |
#elif FBLAS
|
28 |
#include <cblas_f77.h> |
29 |
#elif GSL
|
30 |
#include <gsl_cblas.h> |
31 |
#elif ACML
|
32 |
#include <acml.h> |
33 |
#include <acml_mv.h> |
34 |
#else
|
35 |
#include <cblas.h> |
36 |
#endif
|
37 |
|
38 |
#ifdef DOUBLE
|
39 |
#define LENGTH double |
40 |
#else
|
41 |
#define LENGTH float |
42 |
#endif
|
43 |
|
44 |
#ifdef FBLAS
|
45 |
|
46 |
#ifdef DOUBLE
|
47 |
|
48 |
void dtrsv_( FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT); |
49 |
|
50 |
void dgemv_(FCHAR, FINT, FINT, const double *, const double *, FINT, |
51 |
const double *, FINT, const double *, double *, FINT); |
52 |
|
53 |
void dswap_( FINT, double *, FINT, double *, FINT); |
54 |
|
55 |
void daxpy_( FINT, const double *, const double *, FINT, double *, FINT); |
56 |
|
57 |
void dnrm2_( FINT, const double *, FINT, double *); |
58 |
|
59 |
#else
|
60 |
|
61 |
void strsv_( FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT); |
62 |
|
63 |
void sgemv_(FCHAR, FINT, FINT, const float *, const float *, FINT, |
64 |
const float *, FINT, const float *, float *, FINT); |
65 |
|
66 |
void sswap_( FINT, float *, FINT, float *, FINT); |
67 |
|
68 |
void saxpy_( FINT, const float *, const float *, FINT, float *, FINT); |
69 |
|
70 |
void snrm2_( FINT, const float *, FINT, float *); |
71 |
|
72 |
#endif
|
73 |
|
74 |
#endif
|
75 |
|
76 |
/* Matrix with only defined triangular terms */
|
77 |
/* Even if there are 0 in matrix, must be defined at all ! */
|
78 |
|
79 |
/* Get from fortran.c */
|
80 |
|
81 |
#ifdef CUBLAS
|
82 |
static char *errMsg[5] = |
83 |
{ |
84 |
"no error",
|
85 |
"allocation error",
|
86 |
"setVector/setMatrix error",
|
87 |
"getVector/getMatrix error",
|
88 |
"not implemented"
|
89 |
}; |
90 |
|
91 |
static void wrapperError (const char *funcName, int error) |
92 |
{ |
93 |
printf ("cublas%s wrapper: %s\n", funcName, errMsg[error]);
|
94 |
fflush (stdout); |
95 |
} |
96 |
#endif
|
97 |
|
98 |
int printVector(const int dimVector,const LENGTH *dataVector, |
99 |
char *nameVector,char *mesgVector) |
100 |
{ |
101 |
#ifndef QUIET
|
102 |
|
103 |
int i;
|
104 |
printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
105 |
for (i=0;i<dimVector;i++) |
106 |
{ |
107 |
printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
|
108 |
} |
109 |
#endif
|
110 |
|
111 |
return 0; |
112 |
} |
113 |
|
114 |
int printResults(const int dimVector,const LENGTH *dataVector, |
115 |
char *nameVector,char *mesgVector) |
116 |
{ |
117 |
#ifdef RESULTS
|
118 |
int i;
|
119 |
|
120 |
printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
121 |
for (i=0;i<dimVector;i++) |
122 |
{ |
123 |
printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
|
124 |
} |
125 |
#endif
|
126 |
return 0; |
127 |
} |
128 |
|
129 |
#ifdef CUBLAS
|
130 |
int printVectorGPU(const int dimVector,const LENGTH *dataVector, |
131 |
char *nameVector,char *mesgVector) |
132 |
{ |
133 |
#ifndef QUIET
|
134 |
int i;
|
135 |
cublasStatus stat; |
136 |
LENGTH *P=0;
|
137 |
int incx=1; |
138 |
|
139 |
P=malloc(dimVector*sizeof(LENGTH));
|
140 |
|
141 |
stat=cublasGetVector(dimVector,sizeof(P[0]),dataVector,incx,P,incx); |
142 |
|
143 |
if (stat != CUBLAS_STATUS_SUCCESS) {
|
144 |
wrapperError ("ToGet", CUBLAS_WRAPPER_ERROR_GET);
|
145 |
} |
146 |
|
147 |
printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
148 |
for (i=0;i<dimVector;i++) |
149 |
{ |
150 |
printf("%s[%i]=%2.10e\n",nameVector,i,P[i]);
|
151 |
} |
152 |
|
153 |
free(P); |
154 |
#endif
|
155 |
|
156 |
return 0; |
157 |
} |
158 |
#endif
|
159 |
|
160 |
int bench(int dim,int RUNS) |
161 |
{ |
162 |
/*
|
163 |
int dim=1000;
|
164 |
int RUNS=100;
|
165 |
*/
|
166 |
int incx=1; |
167 |
#ifdef PRINT
|
168 |
LENGTH factor=1.;
|
169 |
#endif
|
170 |
|
171 |
LENGTH alpha=1.,beta=0.,beta2=-1.; |
172 |
LENGTH *A,*X,*Y; |
173 |
|
174 |
/* checkBefore checkAfter checks */
|
175 |
LENGTH *checksA,*checksB; |
176 |
|
177 |
int i=0, j=0; |
178 |
|
179 |
double duration;
|
180 |
|
181 |
struct timeval tv1,tv2;
|
182 |
struct timezone tz;
|
183 |
|
184 |
/* Create 1 Matrix and 2 Vectors of dimension dim */
|
185 |
|
186 |
A=malloc(dim*dim*sizeof(LENGTH));
|
187 |
X=malloc(dim*sizeof(LENGTH));
|
188 |
Y=malloc(dim*sizeof(LENGTH));
|
189 |
|
190 |
/* Create 2 vectors for checker Before and After */
|
191 |
|
192 |
checksA=malloc(RUNS*sizeof(double)); |
193 |
checksB=malloc(RUNS*sizeof(double)); |
194 |
|
195 |
/* Initialize elements with random numbers */
|
196 |
/* Initialize the seed for rand() */
|
197 |
/* srand(time()); */
|
198 |
|
199 |
#ifdef UNIT
|
200 |
/* Fill the matrix and vector with random numbers */
|
201 |
for (i=0; i<dim; i++) { |
202 |
for (j=0; j<dim; j++) |
203 |
if (j>=i)
|
204 |
{ |
205 |
/* Normalization is necessary to avoid problems */
|
206 |
A[i*dim+j]=1.;
|
207 |
} |
208 |
else
|
209 |
{ |
210 |
A[i*dim+j]=0.;
|
211 |
} |
212 |
X[i]=1;
|
213 |
} |
214 |
#else
|
215 |
for (i=0; i<dim; i++) { |
216 |
for (j=0; j<dim; j++) |
217 |
if (j>i)
|
218 |
{ |
219 |
/* Normalization is necessary to avoid problems */
|
220 |
A[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.)
|
221 |
*(LENGTH)(i+1.)/(LENGTH)(j+1.); |
222 |
} |
223 |
else if (j==i) |
224 |
{ |
225 |
A[i*dim+j]=1.;
|
226 |
} |
227 |
else
|
228 |
{ |
229 |
A[i*dim+j]=0.;
|
230 |
} |
231 |
X[i]=(LENGTH)rand()/(RAND_MAX+1.);
|
232 |
} |
233 |
#endif
|
234 |
|
235 |
/* Print the matrix */
|
236 |
|
237 |
#ifdef QUIET
|
238 |
#else
|
239 |
for (i=0; i<dim; i++) { |
240 |
for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]); |
241 |
printf("\tX[%i]=%1.5f ", i,X[i]);
|
242 |
putchar('\n');
|
243 |
} |
244 |
putchar('\n');
|
245 |
#endif
|
246 |
|
247 |
/* Get first timer before launching */
|
248 |
gettimeofday(&tv1, &tz); |
249 |
|
250 |
/* Compute with CuBLAS library */
|
251 |
#ifdef CUBLAS
|
252 |
LENGTH *devPtrA=0, *devPtrX=0, *devPtrY=0; |
253 |
cublasStatus stat1, stat2, stat3; |
254 |
struct timeval tv3,tv4;
|
255 |
|
256 |
/* Order is Row */
|
257 |
/* Have to swap uplo and trans */
|
258 |
char uplo='L',trans='T',diag='N'; |
259 |
|
260 |
printf("Using CuBLAS: %i iterations for %ix%i matrix\n",
|
261 |
RUNS,dim,dim); |
262 |
|
263 |
stat1=cublasAlloc(dim*dim,sizeof(devPtrA[0]),(void**)&devPtrA); |
264 |
stat2=cublasAlloc(dim,sizeof(devPtrX[0]),(void**)&devPtrX); |
265 |
stat3=cublasAlloc(dim,sizeof(devPtrY[0]),(void**)&devPtrY); |
266 |
|
267 |
if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
|
268 |
(stat2 != CUBLAS_STATUS_SUCCESS) || |
269 |
(stat3 != CUBLAS_STATUS_SUCCESS)) { |
270 |
wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_ALLOC);
|
271 |
cublasFree (devPtrA); |
272 |
cublasFree (devPtrX); |
273 |
cublasFree (devPtrY); |
274 |
return 1; |
275 |
} |
276 |
|
277 |
stat1=cublasSetMatrix(dim,dim,sizeof(A[0]),A,dim,devPtrA,dim); |
278 |
stat2=cublasSetVector(dim,sizeof(X[0]),X,incx,devPtrX,incx); |
279 |
stat3=cublasSetVector(dim,sizeof(Y[0]),Y,incx,devPtrY,incx); |
280 |
|
281 |
if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
|
282 |
(stat2 != CUBLAS_STATUS_SUCCESS) || |
283 |
(stat3 != CUBLAS_STATUS_SUCCESS)) { |
284 |
wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_SET);
|
285 |
cublasFree (devPtrA); |
286 |
cublasFree (devPtrX); |
287 |
cublasFree (devPtrY); |
288 |
return 1; |
289 |
} |
290 |
|
291 |
/* Get third timer after memory operation */
|
292 |
gettimeofday(&tv3, &tz); |
293 |
|
294 |
for (i=0;i<RUNS;i++) |
295 |
{ |
296 |
#ifdef DOUBLE
|
297 |
|
298 |
printVectorGPU(dim,devPtrX,"X","Roots"); |
299 |
|
300 |
/* Multiply Y <- A.X */
|
301 |
cublasDgemv(trans,dim,dim,alpha,devPtrA,dim, |
302 |
devPtrX,incx,beta,devPtrY,incx); |
303 |
|
304 |
printVectorGPU(dim,devPtrY,"Y","Results"); |
305 |
|
306 |
/* Solve linear system A.X=Y : Y <- A-1.Y */
|
307 |
cublasDtrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx); |
308 |
|
309 |
printVectorGPU(dim,devPtrY,"Y","Solutions"); |
310 |
|
311 |
/* Estimate the difference between X and Y : Y <- -Y+X */
|
312 |
cublasDaxpy(dim,beta2,devPtrY,incx,devPtrX,incx); |
313 |
|
314 |
printVectorGPU(dim,devPtrX,"X","Errors"); |
315 |
|
316 |
/* Estimate the second checker */
|
317 |
/* checksA[i]=(double)cublasDnrm2(dim,devPtrX,incx); */
|
318 |
|
319 |
/* Swap vector X and Y */
|
320 |
cublasDswap(dim,devPtrX,incx,devPtrY,incx); |
321 |
|
322 |
#else
|
323 |
|
324 |
printVectorGPU(dim,devPtrX,"X","Roots"); |
325 |
|
326 |
/* Multiply Y <- A.X */
|
327 |
cublasSgemv(trans,dim,dim,alpha,devPtrA,dim, |
328 |
devPtrX,incx,beta,devPtrY,incx); |
329 |
|
330 |
printVectorGPU(dim,devPtrY,"Y","Results"); |
331 |
|
332 |
/* Solve linear system Y <- A-1.Y */
|
333 |
cublasStrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx); |
334 |
|
335 |
printVectorGPU(dim,devPtrY,"Y","Solutions"); |
336 |
|
337 |
/* Add vectors X and -Y */
|
338 |
cublasSaxpy(dim,beta2,devPtrY,incx,devPtrX,incx); |
339 |
|
340 |
printVectorGPU(dim,devPtrX,"X","Errors"); |
341 |
|
342 |
/* Estimate the second checker */
|
343 |
/* checksA[i]=(double)cublasSnrm2(dim,devPtrX,incx); */
|
344 |
|
345 |
/* Swap vector X and Y */
|
346 |
cublasSswap(dim,devPtrX,incx,devPtrY,incx); |
347 |
|
348 |
#endif
|
349 |
|
350 |
} |
351 |
|
352 |
stat1=cublasGetMatrix(dim,dim,sizeof(A[0]),devPtrA,dim,A,dim); |
353 |
stat2=cublasGetVector(dim,sizeof(X[0]),devPtrX,incx,X,incx); |
354 |
stat3=cublasGetVector(dim,sizeof(Y[0]),devPtrY,incx,Y,incx); |
355 |
|
356 |
cublasFree (devPtrA); |
357 |
cublasFree (devPtrX); |
358 |
cublasFree (devPtrY); |
359 |
|
360 |
if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
|
361 |
(stat2 != CUBLAS_STATUS_SUCCESS) || |
362 |
(stat3 != CUBLAS_STATUS_SUCCESS)) { |
363 |
wrapperError ("LinearSystem", CUBLAS_WRAPPER_ERROR_GET);
|
364 |
} |
365 |
|
366 |
/* Get fourth timer after memory free */
|
367 |
gettimeofday(&tv4, &tz); |
368 |
|
369 |
#elif THUNKING
|
370 |
|
371 |
/* Order is Row : Have to swap uplo='U' and trans='N' */
|
372 |
char uplo='L',trans='T',diag='N'; |
373 |
printf("Using CuBLAS/Thunking: %i iterations for %ix%i matrix\n",
|
374 |
RUNS,dim,dim); |
375 |
|
376 |
for (i=0;i<RUNS;i++) |
377 |
{ |
378 |
#ifdef DOUBLE
|
379 |
|
380 |
printVector(dim,X,"X","Roots"); |
381 |
|
382 |
/* Multiply A by X as Y <- A.X */
|
383 |
CUBLAS_DGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
384 |
|
385 |
printVector(dim,Y,"Y","Results"); |
386 |
|
387 |
/* Solve linear system */
|
388 |
CUBLAS_DTRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); |
389 |
|
390 |
printVector(dim,Y,"Y","Solutions"); |
391 |
|
392 |
/* Compare the roots X and Y */
|
393 |
CUBLAS_DAXPY(&dim,&beta2,Y,&incx,X,&incx); |
394 |
|
395 |
printVector(dim,X,"X","Errors"); |
396 |
|
397 |
/* Store the checker of errors */
|
398 |
/* checksA[i]=(double)CUBLAS_DNRM2(&dim,X,&incx); */
|
399 |
|
400 |
/* Swap vector X and Y */
|
401 |
CUBLAS_DSWAP(&dim,X,&incx,Y,&incx); |
402 |
#else
|
403 |
|
404 |
printVector(dim,X,"X","Roots"); |
405 |
|
406 |
/* Multiply A by X as Y <- A.X */
|
407 |
CUBLAS_SGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
408 |
|
409 |
printVector(dim,Y,"Y","Results"); |
410 |
|
411 |
/* Solve linear system */
|
412 |
CUBLAS_STRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); |
413 |
|
414 |
printVector(dim,Y,"Y","Solutions"); |
415 |
|
416 |
/* Compare the roots X and Y */
|
417 |
CUBLAS_SAXPY(&dim,&beta2,Y,&incx,X,&incx); |
418 |
|
419 |
printVector(dim,X,"X","Errors"); |
420 |
|
421 |
/* Store the checker of errors */
|
422 |
/* checksA[i]=(double)CUBLAS_SNRM2(&dim,X,&incx); */
|
423 |
|
424 |
/* Swap vector X and Y */
|
425 |
CUBLAS_SSWAP(&dim,X,&incx,Y,&incx); |
426 |
#endif
|
427 |
|
428 |
#ifdef PRINT
|
429 |
printf("Iteration %i, checker is %2.5f and error is %2.10f\n",
|
430 |
i,checksA[i],fabs(checksB[i]-checksA[i])/factor); |
431 |
#endif
|
432 |
} |
433 |
|
434 |
#elif FBLAS
|
435 |
|
436 |
/* Order is Row : Have to swap uplo='U' and trans='N' */
|
437 |
char uplo='L',trans='T',diag='N'; |
438 |
|
439 |
printf("Using FBLAS: %i iterations for %ix%i matrix\n",
|
440 |
RUNS,dim,dim); |
441 |
|
442 |
for (i=0;i<RUNS;i++) |
443 |
{ |
444 |
#ifdef DOUBLE
|
445 |
|
446 |
printVector(dim,X,"X","Roots"); |
447 |
|
448 |
/* Multiply A by X as Y <- A.X */
|
449 |
dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
450 |
|
451 |
printVector(dim,Y,"Y","Results"); |
452 |
|
453 |
/* Solve linear system */
|
454 |
dtrsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); |
455 |
|
456 |
printVector(dim,Y,"Y","Solutions"); |
457 |
|
458 |
/* Compare the roots X and Y */
|
459 |
daxpy_(&dim,&beta2,Y,&incx,X,&incx); |
460 |
|
461 |
printVector(dim,X,"X","Errors"); |
462 |
|
463 |
/* Store the checker of errors */
|
464 |
/* dnrm2_(&dim,X,&incx,&checksA[i]); */
|
465 |
|
466 |
/* Swap vector X and Y */
|
467 |
dswap_(&dim,X,&incx,Y,&incx); |
468 |
|
469 |
#else
|
470 |
|
471 |
printVector(dim,X,"X","Roots"); |
472 |
|
473 |
/* Multiply A by X as Y <- A.X */
|
474 |
sgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
475 |
|
476 |
printVector(dim,Y,"Y","Results"); |
477 |
|
478 |
/* Solve linear system */
|
479 |
strsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); |
480 |
|
481 |
printVector(dim,Y,"Y","Solutions"); |
482 |
|
483 |
/* Compare the roots X and Y */
|
484 |
saxpy_(&dim,&beta2,Y,&incx,X,&incx); |
485 |
|
486 |
printVector(dim,X,"X","Errors"); |
487 |
|
488 |
/* Store the checker of errors */
|
489 |
/* snrm2_(&dim,X,&incx,&checksA[i]); */
|
490 |
|
491 |
/* Swap vector X and Y */
|
492 |
sswap_(&dim,X,&incx,Y,&incx); |
493 |
#endif
|
494 |
|
495 |
} |
496 |
|
497 |
#elif ACML
|
498 |
|
499 |
/* Order is Row : Have to swap uplo='U' and trans='N' */
|
500 |
char uplo='L',trans='T',diag='N'; |
501 |
|
502 |
printf("Using ACML: %i iterations for %ix%i matrix\n",
|
503 |
RUNS,dim,dim); |
504 |
|
505 |
for (i=0;i<RUNS;i++) |
506 |
{ |
507 |
#ifdef DOUBLE
|
508 |
|
509 |
printVector(dim,X,"X","Roots"); |
510 |
|
511 |
/* Multiply A by X as Y <- A.X */
|
512 |
dgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
513 |
|
514 |
printVector(dim,Y,"Y","Results"); |
515 |
|
516 |
/* Solve linear system */
|
517 |
dtrsv(uplo,trans,diag,dim,A,dim,Y,incx); |
518 |
|
519 |
printVector(dim,Y,"Y","Solutions"); |
520 |
|
521 |
/* Compare the roots X and Y */
|
522 |
daxpy(dim,beta2,Y,incx,X,incx); |
523 |
|
524 |
printVector(dim,X,"X","Errors"); |
525 |
|
526 |
/* Store the checker of errors */
|
527 |
/* dnrm2_(&dim,X,&incx,&checksA[i]); */
|
528 |
|
529 |
/* Swap vector X and Y */
|
530 |
dswap(dim,X,incx,Y,incx); |
531 |
|
532 |
#else
|
533 |
|
534 |
printVector(dim,X,"X","Roots"); |
535 |
|
536 |
/* Multiply A by X as Y <- A.X */
|
537 |
sgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
538 |
|
539 |
printVector(dim,Y,"Y","Results"); |
540 |
|
541 |
/* Solve linear system */
|
542 |
strsv(uplo,trans,diag,dim,A,dim,Y,incx); |
543 |
|
544 |
printVector(dim,Y,"Y","Solutions"); |
545 |
|
546 |
/* Compare the roots X and Y */
|
547 |
saxpy(dim,beta2,Y,incx,X,incx); |
548 |
|
549 |
printVector(dim,X,"X","Errors"); |
550 |
|
551 |
/* Store the checker of errors */
|
552 |
/* snrm2_(&dim,X,&incx,&checksA[i]); */
|
553 |
|
554 |
/* Swap vector X and Y */
|
555 |
sswap(dim,X,incx,Y,incx); |
556 |
#endif
|
557 |
|
558 |
} |
559 |
|
560 |
#elif GSL
|
561 |
|
562 |
printf("Using GSL: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
|
563 |
|
564 |
/*
|
565 |
RowMajor : Matrix is read row by row
|
566 |
Upper : the no null elements are on top
|
567 |
NoTrans : no transposition before estimation
|
568 |
NonUnit : Matrix is not unit
|
569 |
*/
|
570 |
|
571 |
for (i=0;i<RUNS;i++) |
572 |
{ |
573 |
|
574 |
#ifdef DOUBLE
|
575 |
|
576 |
printVector(dim,X,"X","Roots"); |
577 |
|
578 |
/* Multiply A by X as Y <- A.X */
|
579 |
cblas_dgemv(CblasRowMajor,CblasNoTrans, |
580 |
dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
581 |
|
582 |
printVector(dim,Y,"Y","Results"); |
583 |
|
584 |
/* Solve linear system : Y <- A-1.Y */
|
585 |
cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit, |
586 |
dim,A,dim,Y,incx); |
587 |
|
588 |
printVector(dim,Y,"Y","Solutions"); |
589 |
|
590 |
cblas_daxpy(dim,beta2,Y,incx,X,incx); |
591 |
|
592 |
printVector(dim,X,"X","Errors"); |
593 |
|
594 |
/* Store the checker of errors */
|
595 |
/* checksA[i]=(double)cblas_dnrm2(dim,X,incx); */
|
596 |
|
597 |
cblas_dswap(dim,X,incx,Y,incx); |
598 |
|
599 |
#else
|
600 |
|
601 |
printVector(dim,X,"X","Roots"); |
602 |
|
603 |
/* Multiply A by X as Y <- A.X */
|
604 |
cblas_sgemv(CblasRowMajor,CblasNoTrans, |
605 |
dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
606 |
|
607 |
printVector(dim,Y,"Y","Results"); |
608 |
|
609 |
/* Solve linear system : Y <- A-1.Y */
|
610 |
cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit, |
611 |
dim,A,dim,Y,incx); |
612 |
|
613 |
printVector(dim,Y,"Y","Solutions"); |
614 |
|
615 |
cblas_saxpy(dim,beta2,Y,incx,X,incx); |
616 |
|
617 |
printVector(dim,X,"X","Errors"); |
618 |
|
619 |
/* Store the checker of errors */
|
620 |
/* checksA[i]=(double)cblas_snrm2(dim,X,incx); */
|
621 |
|
622 |
cblas_sswap(dim,X,incx,Y,incx); |
623 |
|
624 |
#endif
|
625 |
|
626 |
} |
627 |
#else
|
628 |
|
629 |
printf("Using CBLAS: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
|
630 |
|
631 |
/*
|
632 |
RowMajor : Matrix is read row bu row
|
633 |
Upper : the no null elements are on top
|
634 |
NoTrans : no transposition before estimation
|
635 |
NonUnit : Matrix is not unit
|
636 |
*/
|
637 |
|
638 |
for (i=0;i<RUNS;i++) |
639 |
{ |
640 |
|
641 |
#ifdef DOUBLE
|
642 |
|
643 |
printVector(dim,X,"X","Roots"); |
644 |
|
645 |
/* Multiply A by X as Y <- A.X */
|
646 |
cblas_dgemv(CblasRowMajor,CblasNoTrans, |
647 |
dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
648 |
|
649 |
printVector(dim,Y,"Y","Results"); |
650 |
|
651 |
/* Solve linear system : Y <- A-1.Y */
|
652 |
cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit, |
653 |
dim,A,dim,Y,incx); |
654 |
|
655 |
printVector(dim,Y,"Y","Solutions"); |
656 |
|
657 |
cblas_daxpy(dim,beta2,Y,incx,X,incx); |
658 |
|
659 |
printVector(dim,X,"X","Errors"); |
660 |
|
661 |
/* Store the checker of errors */
|
662 |
/* checksA[i]=(double)cblas_dnrm2(dim,X,incx); */
|
663 |
|
664 |
cblas_dswap(dim,X,incx,Y,incx); |
665 |
|
666 |
#else
|
667 |
|
668 |
printVector(dim,X,"X","Roots"); |
669 |
|
670 |
/* Multiply A by X as Y <- A.X */
|
671 |
cblas_sgemv(CblasRowMajor,CblasNoTrans, |
672 |
dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
673 |
|
674 |
printVector(dim,Y,"Y","Results"); |
675 |
|
676 |
/* Solve linear system : Y <- A-1.Y */
|
677 |
cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit, |
678 |
dim,A,dim,Y,incx); |
679 |
|
680 |
printVector(dim,Y,"Y","Solutions"); |
681 |
|
682 |
cblas_saxpy(dim,beta2,Y,incx,X,incx); |
683 |
|
684 |
printVector(dim,X,"X","Errors"); |
685 |
|
686 |
/* Store the checker of errors */
|
687 |
/* checksA[i]=(double)cblas_snrm2(dim,X,incx); */
|
688 |
|
689 |
cblas_sswap(dim,X,incx,Y,incx); |
690 |
|
691 |
#endif
|
692 |
|
693 |
} |
694 |
#endif
|
695 |
putchar('\n');
|
696 |
|
697 |
/* Get second timer after launching */
|
698 |
gettimeofday(&tv2, &tz); |
699 |
|
700 |
#ifdef CUBLAS
|
701 |
double memoryIn,memoryOut;
|
702 |
|
703 |
memoryIn=(double)((tv3.tv_sec-tv1.tv_sec) * 1000000L + \ |
704 |
(tv3.tv_usec-tv1.tv_usec))/1000000.; |
705 |
|
706 |
memoryOut=(double)((tv2.tv_sec-tv4.tv_sec) * 1000000L + \ |
707 |
(tv2.tv_usec-tv4.tv_usec))/1000000.; |
708 |
|
709 |
duration=(double)((tv4.tv_sec-tv3.tv_sec) * 1000000L + \ |
710 |
(tv4.tv_usec-tv3.tv_usec))/1000000./RUNS; |
711 |
|
712 |
printf("Duration of memory allocation : %2.10f s\n",memoryIn);
|
713 |
printf("Duration of memory free : %2.10f s\n",memoryOut);
|
714 |
#else
|
715 |
duration=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L + \ |
716 |
(tv2.tv_usec-tv1.tv_usec))/1000000./RUNS; |
717 |
|
718 |
#endif
|
719 |
|
720 |
printf("Duration of each cycle : %2.10f s\n",duration);
|
721 |
|
722 |
printResults(RUNS,checksA,"C","Errors cumulated"); |
723 |
|
724 |
putchar('\n');
|
725 |
|
726 |
/*
|
727 |
#ifdef PRINT
|
728 |
for (i=0; i<dim; i++) {
|
729 |
for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]);
|
730 |
putchar('\n');
|
731 |
}
|
732 |
|
733 |
for (i=0; i<dim; i++) {
|
734 |
printf("X[%i]=%2.5f",i,X[i]);
|
735 |
putchar('\n');
|
736 |
}
|
737 |
putchar('\n');
|
738 |
for (i=0; i<dim; i++) {
|
739 |
printf("Y[%i]=%2.5f",i,Y[i]);
|
740 |
putchar('\n');
|
741 |
}
|
742 |
#endif
|
743 |
*/
|
744 |
|
745 |
return 0; |
746 |
} |
747 |
|
748 |
int main(int argc,char **argv) |
749 |
{ |
750 |
if ((argc==1)|| |
751 |
(strcmp(argv[1],"-h")==0)|| |
752 |
(strcmp(argv[1],"--help")==0)) |
753 |
{ |
754 |
printf("\nPerforms a bench using BLAS library implementation:\n\n"
|
755 |
"\t#1 Size on triangular system\n"
|
756 |
"\t#2 Number of iterations\n\n");
|
757 |
} |
758 |
else if ((atoi(argv[1])>=2)&& |
759 |
(atoi(argv[2])>=1)) |
760 |
{ |
761 |
bench(atoi(argv[1]),atoi(argv[2])); |
762 |
} |
763 |
|
764 |
return 0; |
765 |
} |