root / BLAS / xTRSV / xTRSV.c @ 251
Historique | Voir | Annoter | Télécharger (17,48 ko)
1 |
/*
|
---|---|
2 |
Performs a linear system solving of random generated system
|
3 |
Estimates a test
|
4 |
|
5 |
Matrix is triangular
|
6 |
|
7 |
Thanks for help from aurel32@debian.org
|
8 |
*/
|
9 |
|
10 |
#include <stdio.h> |
11 |
#include <math.h> |
12 |
#include <stdlib.h> |
13 |
#include <sys/time.h> |
14 |
#include <string.h> |
15 |
|
16 |
#ifdef CUBLAS
|
17 |
#include <cublas.h> |
18 |
#define CUBLAS_WRAPPER_ERROR_NOERR 0 |
19 |
#define CUBLAS_WRAPPER_ERROR_ALLOC 1 |
20 |
#define CUBLAS_WRAPPER_ERROR_SET 2 |
21 |
#define CUBLAS_WRAPPER_ERROR_GET 3 |
22 |
#define CUBLAS_WRAPPER_ERROR_STUB 4 |
23 |
#elif THUNKING
|
24 |
#include <cublas.h> |
25 |
#include "fortran_common.h" |
26 |
#include "fortran_thunking.h" |
27 |
#elif FBLAS
|
28 |
#include <f77blas.h> |
29 |
#elif GSL
|
30 |
#include <gsl_cblas.h> |
31 |
#elif ACML
|
32 |
#include <acml.h> |
33 |
#else
|
34 |
#include <cblas.h> |
35 |
// #include <blaswrap.h>
|
36 |
#endif
|
37 |
|
38 |
#ifdef FP64
|
39 |
#define LENGTH double |
40 |
#else
|
41 |
#define LENGTH float |
42 |
#endif
|
43 |
|
44 |
/* Matrix with only defined triangular terms */
|
45 |
/* Even if there are 0 in matrix, must be defined at all ! */
|
46 |
|
47 |
/* Get from fortran.c */
|
48 |
|
49 |
#ifdef CUBLAS
|
50 |
static char *errMsg[5] = |
51 |
{ |
52 |
"no error",
|
53 |
"allocation error",
|
54 |
"setVector/setMatrix error",
|
55 |
"getVector/getMatrix error",
|
56 |
"not implemented"
|
57 |
}; |
58 |
|
59 |
static void wrapperError (const char *funcName, int error) |
60 |
{ |
61 |
printf ("cublas%s wrapper: %s\n", funcName, errMsg[error]);
|
62 |
fflush (stdout); |
63 |
} |
64 |
#endif
|
65 |
|
66 |
int printVector(const int dimVector,const LENGTH *dataVector, |
67 |
char *nameVector,char *mesgVector) |
68 |
{ |
69 |
#ifndef QUIET
|
70 |
|
71 |
int i;
|
72 |
printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
73 |
for (i=0;i<dimVector;i++) |
74 |
{ |
75 |
printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
|
76 |
} |
77 |
#endif
|
78 |
|
79 |
return 0; |
80 |
} |
81 |
|
82 |
int printResults(const int dimVector,const LENGTH *dataVector, |
83 |
char *nameVector,char *mesgVector) |
84 |
{ |
85 |
#ifdef RESULTS
|
86 |
int i;
|
87 |
|
88 |
printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
89 |
for (i=0;i<dimVector;i++) |
90 |
{ |
91 |
printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
|
92 |
} |
93 |
#endif
|
94 |
return 0; |
95 |
} |
96 |
|
97 |
#ifdef CUBLAS
|
98 |
int printVectorGPU(const int dimVector,const LENGTH *dataVector, |
99 |
char *nameVector,char *mesgVector) |
100 |
{ |
101 |
#ifndef QUIET
|
102 |
int i;
|
103 |
cublasStatus stat; |
104 |
LENGTH *P=0;
|
105 |
int incx=1; |
106 |
|
107 |
P=malloc(dimVector*sizeof(LENGTH));
|
108 |
|
109 |
stat=cublasGetVector(dimVector,sizeof(P[0]),dataVector,incx,P,incx); |
110 |
|
111 |
if (stat != CUBLAS_STATUS_SUCCESS) {
|
112 |
wrapperError ("ToGet", CUBLAS_WRAPPER_ERROR_GET);
|
113 |
} |
114 |
|
115 |
printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
116 |
for (i=0;i<dimVector;i++) |
117 |
{ |
118 |
printf("%s[%i]=%2.10e\n",nameVector,i,P[i]);
|
119 |
} |
120 |
|
121 |
free(P); |
122 |
#endif
|
123 |
|
124 |
return 0; |
125 |
} |
126 |
#endif
|
127 |
|
128 |
int bench(int dim,int RUNS) |
129 |
{ |
130 |
/*
|
131 |
int dim=1000;
|
132 |
int RUNS=100;
|
133 |
*/
|
134 |
int incx=1; |
135 |
#ifdef PRINT
|
136 |
LENGTH factor=1.;
|
137 |
#endif
|
138 |
|
139 |
LENGTH alpha=1.,beta=0.,beta2=-1.; |
140 |
LENGTH *A,*X,*Y; |
141 |
|
142 |
/* checkBefore checkAfter checks */
|
143 |
LENGTH *checksA,*checksB; |
144 |
|
145 |
int i=0, j=0; |
146 |
|
147 |
double duration;
|
148 |
|
149 |
struct timeval tv1,tv2;
|
150 |
struct timezone tz;
|
151 |
|
152 |
/* Create 1 Matrix and 2 Vectors of dimension dim */
|
153 |
|
154 |
A=malloc(dim*dim*sizeof(LENGTH));
|
155 |
X=malloc(dim*sizeof(LENGTH));
|
156 |
Y=malloc(dim*sizeof(LENGTH));
|
157 |
|
158 |
/* Create 2 vectors for checker Before and After */
|
159 |
|
160 |
checksA=malloc(RUNS*sizeof(double)); |
161 |
checksB=malloc(RUNS*sizeof(double)); |
162 |
|
163 |
/* Initialize elements with random numbers */
|
164 |
/* Initialize the seed for rand() */
|
165 |
/* srand(time()); */
|
166 |
|
167 |
#ifdef UNIT
|
168 |
/* Fill the matrix and vector with random numbers */
|
169 |
for (i=0; i<dim; i++) { |
170 |
for (j=0; j<dim; j++) |
171 |
if (j>=i)
|
172 |
{ |
173 |
/* Normalization is necessary to avoid problems */
|
174 |
A[i*dim+j]=1.;
|
175 |
} |
176 |
else
|
177 |
{ |
178 |
A[i*dim+j]=0.;
|
179 |
} |
180 |
X[i]=1;
|
181 |
} |
182 |
#else
|
183 |
for (i=0; i<dim; i++) { |
184 |
for (j=0; j<dim; j++) |
185 |
if (j>i)
|
186 |
{ |
187 |
/* Normalization is necessary to avoid problems */
|
188 |
/* A[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.) */
|
189 |
/* *(LENGTH)(i+1.)/(LENGTH)(j+1.); */
|
190 |
A[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.)
|
191 |
/(LENGTH)(dim-j); |
192 |
} |
193 |
else if (j==i) |
194 |
{ |
195 |
A[i*dim+j]=1.;
|
196 |
} |
197 |
else
|
198 |
{ |
199 |
A[i*dim+j]=0.;
|
200 |
} |
201 |
X[i]=(LENGTH)rand()/(RAND_MAX+1.);
|
202 |
} |
203 |
#endif
|
204 |
|
205 |
/* Print the matrix */
|
206 |
|
207 |
#ifdef QUIET
|
208 |
#else
|
209 |
for (i=0; i<dim; i++) { |
210 |
for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]); |
211 |
printf("\tX[%i]=%1.5f ", i,X[i]);
|
212 |
putchar('\n');
|
213 |
} |
214 |
putchar('\n');
|
215 |
#endif
|
216 |
|
217 |
/* Get first timer before launching */
|
218 |
gettimeofday(&tv1, &tz); |
219 |
|
220 |
/* Compute with CuBLAS library */
|
221 |
#ifdef CUBLAS
|
222 |
LENGTH *devPtrA=0, *devPtrX=0, *devPtrY=0; |
223 |
cublasStatus stat1, stat2, stat3; |
224 |
struct timeval tv3,tv4;
|
225 |
|
226 |
/* Order is Row */
|
227 |
/* Have to swap uplo and trans */
|
228 |
char uplo='L',trans='T',diag='N'; |
229 |
|
230 |
printf("Using CuBLAS: %i iterations for %ix%i matrix\n",
|
231 |
RUNS,dim,dim); |
232 |
|
233 |
stat1=cublasAlloc(dim*dim,sizeof(devPtrA[0]),(void**)&devPtrA); |
234 |
stat2=cublasAlloc(dim,sizeof(devPtrX[0]),(void**)&devPtrX); |
235 |
stat3=cublasAlloc(dim,sizeof(devPtrY[0]),(void**)&devPtrY); |
236 |
|
237 |
if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
|
238 |
(stat2 != CUBLAS_STATUS_SUCCESS) || |
239 |
(stat3 != CUBLAS_STATUS_SUCCESS)) { |
240 |
wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_ALLOC);
|
241 |
cublasFree (devPtrA); |
242 |
cublasFree (devPtrX); |
243 |
cublasFree (devPtrY); |
244 |
return 1; |
245 |
} |
246 |
|
247 |
stat1=cublasSetMatrix(dim,dim,sizeof(A[0]),A,dim,devPtrA,dim); |
248 |
stat2=cublasSetVector(dim,sizeof(X[0]),X,incx,devPtrX,incx); |
249 |
stat3=cublasSetVector(dim,sizeof(Y[0]),Y,incx,devPtrY,incx); |
250 |
|
251 |
if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
|
252 |
(stat2 != CUBLAS_STATUS_SUCCESS) || |
253 |
(stat3 != CUBLAS_STATUS_SUCCESS)) { |
254 |
wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_SET);
|
255 |
cublasFree (devPtrA); |
256 |
cublasFree (devPtrX); |
257 |
cublasFree (devPtrY); |
258 |
return 1; |
259 |
} |
260 |
|
261 |
/* Get third timer after memory operation */
|
262 |
gettimeofday(&tv3, &tz); |
263 |
|
264 |
for (i=0;i<RUNS;i++) |
265 |
{ |
266 |
#ifdef FP64
|
267 |
|
268 |
printVectorGPU(dim,devPtrX,"X","Roots"); |
269 |
|
270 |
/* Multiply Y <- A.X */
|
271 |
cublasDgemv(trans,dim,dim,alpha,devPtrA,dim, |
272 |
devPtrX,incx,beta,devPtrY,incx); |
273 |
|
274 |
printVectorGPU(dim,devPtrY,"Y","Results"); |
275 |
|
276 |
/* Solve linear system A.X=Y : Y <- A-1.Y */
|
277 |
cublasDtrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx); |
278 |
|
279 |
printVectorGPU(dim,devPtrY,"Y","Solutions"); |
280 |
|
281 |
/* Estimate the difference between X and Y : Y <- -Y+X */
|
282 |
cublasDaxpy(dim,beta2,devPtrY,incx,devPtrX,incx); |
283 |
|
284 |
printVectorGPU(dim,devPtrX,"X","Errors"); |
285 |
|
286 |
/* Estimate the second checker */
|
287 |
checksA[i]=(double)cublasDnrm2(dim,devPtrX,incx);
|
288 |
|
289 |
/* Swap vector X and Y */
|
290 |
cublasDswap(dim,devPtrX,incx,devPtrY,incx); |
291 |
|
292 |
#else
|
293 |
|
294 |
printVectorGPU(dim,devPtrX,"X","Roots"); |
295 |
|
296 |
/* Multiply Y <- A.X */
|
297 |
cublasSgemv(trans,dim,dim,alpha,devPtrA,dim, |
298 |
devPtrX,incx,beta,devPtrY,incx); |
299 |
|
300 |
printVectorGPU(dim,devPtrY,"Y","Results"); |
301 |
|
302 |
/* Solve linear system Y <- A-1.Y */
|
303 |
cublasStrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx); |
304 |
|
305 |
printVectorGPU(dim,devPtrY,"Y","Solutions"); |
306 |
|
307 |
/* Add vectors X and -Y */
|
308 |
cublasSaxpy(dim,beta2,devPtrY,incx,devPtrX,incx); |
309 |
|
310 |
printVectorGPU(dim,devPtrX,"X","Errors"); |
311 |
|
312 |
/* Estimate the second checker */
|
313 |
checksA[i]=(double)cublasSnrm2(dim,devPtrX,incx);
|
314 |
|
315 |
/* Swap vector X and Y */
|
316 |
cublasSswap(dim,devPtrX,incx,devPtrY,incx); |
317 |
|
318 |
#endif
|
319 |
|
320 |
} |
321 |
|
322 |
stat1=cublasGetMatrix(dim,dim,sizeof(A[0]),devPtrA,dim,A,dim); |
323 |
stat2=cublasGetVector(dim,sizeof(X[0]),devPtrX,incx,X,incx); |
324 |
stat3=cublasGetVector(dim,sizeof(Y[0]),devPtrY,incx,Y,incx); |
325 |
|
326 |
cublasFree (devPtrA); |
327 |
cublasFree (devPtrX); |
328 |
cublasFree (devPtrY); |
329 |
|
330 |
if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
|
331 |
(stat2 != CUBLAS_STATUS_SUCCESS) || |
332 |
(stat3 != CUBLAS_STATUS_SUCCESS)) { |
333 |
wrapperError ("LinearSystem", CUBLAS_WRAPPER_ERROR_GET);
|
334 |
} |
335 |
|
336 |
/* Get fourth timer after memory free */
|
337 |
gettimeofday(&tv4, &tz); |
338 |
|
339 |
#elif THUNKING
|
340 |
|
341 |
/* Order is Row : Have to swap uplo='U' and trans='N' */
|
342 |
char uplo='L',trans='T',diag='N'; |
343 |
printf("Using CuBLAS/Thunking: %i iterations for %ix%i matrix\n",
|
344 |
RUNS,dim,dim); |
345 |
|
346 |
for (i=0;i<RUNS;i++) |
347 |
{ |
348 |
#ifdef FP64
|
349 |
|
350 |
printVector(dim,X,"X","Roots"); |
351 |
|
352 |
/* Multiply A by X as Y <- A.X */
|
353 |
CUBLAS_DGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
354 |
|
355 |
printVector(dim,Y,"Y","Results"); |
356 |
|
357 |
/* Solve linear system */
|
358 |
CUBLAS_DTRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); |
359 |
|
360 |
printVector(dim,Y,"Y","Solutions"); |
361 |
|
362 |
/* Compare the roots X and Y */
|
363 |
CUBLAS_DAXPY(&dim,&beta2,Y,&incx,X,&incx); |
364 |
|
365 |
printVector(dim,X,"X","Errors"); |
366 |
|
367 |
/* Store the checker of errors */
|
368 |
checksA[i]=(double)CUBLAS_DNRM2(&dim,X,&incx);
|
369 |
|
370 |
/* Swap vector X and Y */
|
371 |
CUBLAS_DSWAP(&dim,X,&incx,Y,&incx); |
372 |
#else
|
373 |
|
374 |
printVector(dim,X,"X","Roots"); |
375 |
|
376 |
/* Multiply A by X as Y <- A.X */
|
377 |
CUBLAS_SGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
378 |
|
379 |
printVector(dim,Y,"Y","Results"); |
380 |
|
381 |
/* Solve linear system */
|
382 |
CUBLAS_STRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); |
383 |
|
384 |
printVector(dim,Y,"Y","Solutions"); |
385 |
|
386 |
/* Compare the roots X and Y */
|
387 |
CUBLAS_SAXPY(&dim,&beta2,Y,&incx,X,&incx); |
388 |
|
389 |
printVector(dim,X,"X","Errors"); |
390 |
|
391 |
/* Store the checker of errors */
|
392 |
checksA[i]=(double)CUBLAS_SNRM2(&dim,X,&incx);
|
393 |
|
394 |
/* Swap vector X and Y */
|
395 |
CUBLAS_SSWAP(&dim,X,&incx,Y,&incx); |
396 |
#endif
|
397 |
|
398 |
#ifdef PRINT
|
399 |
printf("Iteration %i, checker is %2.5f and error is %2.10f\n",
|
400 |
i,checksA[i],fabs(checksB[i]-checksA[i])/factor); |
401 |
#endif
|
402 |
} |
403 |
|
404 |
#elif FBLAS
|
405 |
|
406 |
/* Order is Row : Have to swap uplo='U' and trans='N' */
|
407 |
char uplo='L',trans='T',diag='N'; |
408 |
|
409 |
printf("Using FBLAS: %i iterations for %ix%i matrix\n",
|
410 |
RUNS,dim,dim); |
411 |
|
412 |
for (i=0;i<RUNS;i++) |
413 |
{ |
414 |
#ifdef FP64
|
415 |
|
416 |
printVector(dim,X,"X","Initial roots"); |
417 |
|
418 |
/* /\* Multiply A by X as Y <- A.X *\/ */
|
419 |
/* dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); */
|
420 |
|
421 |
/* printVector(dim,Y,"Y<-A.X","Estimated results"); */
|
422 |
|
423 |
/* /\* Solve linear system *\/ */
|
424 |
/* dtrsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); */
|
425 |
|
426 |
/* printVector(dim,Y,"X","Solutions from A.X=Y"); */
|
427 |
|
428 |
/* /\* Compare the roots X and Y *\/ */
|
429 |
/* daxpy_(&dim,&beta2,Y,&incx,X,&incx); */
|
430 |
|
431 |
/* printVector(dim,X,"X","Differences initial and estimated"); */
|
432 |
|
433 |
/* /\* Store the checker of errors *\/ */
|
434 |
/* dnrm2_(&dim,X,&incx,&checksA[i]); */
|
435 |
|
436 |
/* /\* Swap vector X and Y *\/ */
|
437 |
/* dswap_(&dim,X,&incx,Y,&incx); */
|
438 |
|
439 |
/* Multiply A by X as Y <- A.X */
|
440 |
dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
441 |
|
442 |
printVector(dim,Y,"Y<-A.X","Estimated results"); |
443 |
|
444 |
/* Solve linear system */
|
445 |
dtrsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); |
446 |
|
447 |
printVector(dim,Y,"X","Solutions from A.X=Y"); |
448 |
|
449 |
/* Compare the roots X and Y */
|
450 |
daxpy_(&dim,&beta2,Y,&incx,X,&incx); |
451 |
|
452 |
printVector(dim,X,"X","Differences initial and estimated"); |
453 |
|
454 |
/* Store the checker of errors */
|
455 |
checksA[i]=(double)dnrm2_(&dim,X,&incx);
|
456 |
|
457 |
/* Swap vector X and Y */
|
458 |
dswap_(&dim,X,&incx,Y,&incx); |
459 |
|
460 |
#else
|
461 |
|
462 |
printVector(dim,X,"X","Roots"); |
463 |
|
464 |
/* Multiply A by X as Y <- A.X */
|
465 |
sgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
466 |
|
467 |
printVector(dim,Y,"Y","Results"); |
468 |
|
469 |
/* Solve linear system */
|
470 |
strsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); |
471 |
|
472 |
printVector(dim,Y,"Y","Solutions"); |
473 |
|
474 |
/* Compare the roots X and Y */
|
475 |
saxpy_(&dim,&beta2,Y,&incx,X,&incx); |
476 |
|
477 |
printVector(dim,X,"X","Errors"); |
478 |
|
479 |
/* Store the checker of errors */
|
480 |
checksA[i]=(LENGTH)snrm2_(&dim,X,&incx); |
481 |
|
482 |
/* Swap vector X and Y */
|
483 |
sswap_(&dim,X,&incx,Y,&incx); |
484 |
#endif
|
485 |
|
486 |
} |
487 |
|
488 |
#elif ACML
|
489 |
|
490 |
/* Order is Row : Have to swap uplo='U' and trans='N' */
|
491 |
char uplo='L',trans='T',diag='N'; |
492 |
|
493 |
printf("Using ACML: %i iterations for %ix%i matrix\n",
|
494 |
RUNS,dim,dim); |
495 |
|
496 |
for (i=0;i<RUNS;i++) |
497 |
{ |
498 |
#ifdef FP64
|
499 |
|
500 |
printVector(dim,X,"X","Roots"); |
501 |
|
502 |
/* Multiply A by X as Y <- A.X */
|
503 |
dgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
504 |
|
505 |
printVector(dim,Y,"Y","Results"); |
506 |
|
507 |
/* Solve linear system */
|
508 |
dtrsv(uplo,trans,diag,dim,A,dim,Y,incx); |
509 |
|
510 |
printVector(dim,Y,"Y","Solutions"); |
511 |
|
512 |
/* Compare the roots X and Y */
|
513 |
daxpy(dim,beta2,Y,incx,X,incx); |
514 |
|
515 |
printVector(dim,X,"X","Errors"); |
516 |
|
517 |
/* Store the checker of errors */
|
518 |
dnrm2_(&dim,X,&incx,&checksA[i]); |
519 |
|
520 |
/* Swap vector X and Y */
|
521 |
dswap(dim,X,incx,Y,incx); |
522 |
|
523 |
#else
|
524 |
|
525 |
printVector(dim,X,"X","Roots"); |
526 |
|
527 |
/* Multiply A by X as Y <- A.X */
|
528 |
sgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
529 |
|
530 |
printVector(dim,Y,"Y","Results"); |
531 |
|
532 |
/* Solve linear system */
|
533 |
strsv(uplo,trans,diag,dim,A,dim,Y,incx); |
534 |
|
535 |
printVector(dim,Y,"Y","Solutions"); |
536 |
|
537 |
/* Compare the roots X and Y */
|
538 |
saxpy(dim,beta2,Y,incx,X,incx); |
539 |
|
540 |
printVector(dim,X,"X","Errors"); |
541 |
|
542 |
/* Store the checker of errors */
|
543 |
snrm2_(&dim,X,&incx,&checksA[i]); |
544 |
|
545 |
/* Swap vector X and Y */
|
546 |
sswap(dim,X,incx,Y,incx); |
547 |
#endif
|
548 |
|
549 |
} |
550 |
|
551 |
#elif GSL
|
552 |
|
553 |
printf("Using GSL: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
|
554 |
|
555 |
/*
|
556 |
RowMajor : Matrix is read row by row
|
557 |
Upper : the no null elements are on top
|
558 |
NoTrans : no transposition before estimation
|
559 |
NonUnit : Matrix is not unit
|
560 |
*/
|
561 |
|
562 |
for (i=0;i<RUNS;i++) |
563 |
{ |
564 |
|
565 |
#ifdef FP64
|
566 |
|
567 |
printVector(dim,X,"X","Roots"); |
568 |
|
569 |
/* Multiply A by X as Y <- A.X */
|
570 |
cblas_dgemv(CblasRowMajor,CblasNoTrans, |
571 |
dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
572 |
|
573 |
printVector(dim,Y,"Y","Results"); |
574 |
|
575 |
/* Solve linear system : Y <- A-1.Y */
|
576 |
cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit, |
577 |
dim,A,dim,Y,incx); |
578 |
|
579 |
printVector(dim,Y,"Y","Solutions"); |
580 |
|
581 |
cblas_daxpy(dim,beta2,Y,incx,X,incx); |
582 |
|
583 |
printVector(dim,X,"X","Errors"); |
584 |
|
585 |
/* Store the checker of errors */
|
586 |
checksA[i]=(double)cblas_dnrm2(dim,X,incx);
|
587 |
|
588 |
cblas_dswap(dim,X,incx,Y,incx); |
589 |
|
590 |
#else
|
591 |
|
592 |
printVector(dim,X,"X","Roots"); |
593 |
|
594 |
/* Multiply A by X as Y <- A.X */
|
595 |
cblas_sgemv(CblasRowMajor,CblasNoTrans, |
596 |
dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
597 |
|
598 |
printVector(dim,Y,"Y","Results"); |
599 |
|
600 |
/* Solve linear system : Y <- A-1.Y */
|
601 |
cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit, |
602 |
dim,A,dim,Y,incx); |
603 |
|
604 |
printVector(dim,Y,"Y","Solutions"); |
605 |
|
606 |
cblas_saxpy(dim,beta2,Y,incx,X,incx); |
607 |
|
608 |
printVector(dim,X,"X","Errors"); |
609 |
|
610 |
/* Store the checker of errors */
|
611 |
checksA[i]=(double)cblas_snrm2(dim,X,incx);
|
612 |
|
613 |
cblas_sswap(dim,X,incx,Y,incx); |
614 |
|
615 |
#endif
|
616 |
|
617 |
} |
618 |
#else
|
619 |
|
620 |
printf("Using CBLAS: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
|
621 |
|
622 |
/*
|
623 |
RowMajor : Matrix is read row bu row
|
624 |
Upper : the no null elements are on top
|
625 |
NoTrans : no transposition before estimation
|
626 |
NonUnit : Matrix is not unit
|
627 |
*/
|
628 |
|
629 |
for (i=0;i<RUNS;i++) |
630 |
{ |
631 |
|
632 |
#ifdef FP64
|
633 |
|
634 |
printVector(dim,X,"X","Roots"); |
635 |
|
636 |
/* Multiply A by X as Y <- A.X */
|
637 |
cblas_dgemv(CblasRowMajor,CblasNoTrans, |
638 |
dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
639 |
|
640 |
printVector(dim,Y,"Y","Results"); |
641 |
|
642 |
/* Solve linear system : Y <- A-1.Y */
|
643 |
cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit, |
644 |
dim,A,dim,Y,incx); |
645 |
|
646 |
printVector(dim,Y,"Y","Solutions"); |
647 |
|
648 |
cblas_daxpy(dim,beta2,Y,incx,X,incx); |
649 |
|
650 |
printVector(dim,X,"X","Errors"); |
651 |
|
652 |
/* Store the checker of errors */
|
653 |
checksA[i]=(double)cblas_dnrm2(dim,X,incx);
|
654 |
|
655 |
cblas_dswap(dim,X,incx,Y,incx); |
656 |
|
657 |
#else
|
658 |
|
659 |
printVector(dim,X,"X","Roots"); |
660 |
|
661 |
/* Multiply A by X as Y <- A.X */
|
662 |
cblas_sgemv(CblasRowMajor,CblasNoTrans, |
663 |
dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
664 |
|
665 |
printVector(dim,Y,"Y","Results"); |
666 |
|
667 |
/* Solve linear system : Y <- A-1.Y */
|
668 |
cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit, |
669 |
dim,A,dim,Y,incx); |
670 |
|
671 |
printVector(dim,Y,"Y","Solutions"); |
672 |
|
673 |
cblas_saxpy(dim,beta2,Y,incx,X,incx); |
674 |
|
675 |
printVector(dim,X,"X","Errors"); |
676 |
|
677 |
/* Store the checker of errors */
|
678 |
checksA[i]=(double)cblas_snrm2(dim,X,incx);
|
679 |
|
680 |
cblas_sswap(dim,X,incx,Y,incx); |
681 |
|
682 |
#endif
|
683 |
|
684 |
} |
685 |
#endif
|
686 |
putchar('\n');
|
687 |
|
688 |
/* Get second timer after launching */
|
689 |
gettimeofday(&tv2, &tz); |
690 |
|
691 |
#ifdef CUBLAS
|
692 |
double memoryIn,memoryOut;
|
693 |
|
694 |
memoryIn=(double)((tv3.tv_sec-tv1.tv_sec) * 1000000L + \ |
695 |
(tv3.tv_usec-tv1.tv_usec))/1000000.; |
696 |
|
697 |
memoryOut=(double)((tv2.tv_sec-tv4.tv_sec) * 1000000L + \ |
698 |
(tv2.tv_usec-tv4.tv_usec))/1000000.; |
699 |
|
700 |
duration=(double)((tv4.tv_sec-tv3.tv_sec) * 1000000L + \ |
701 |
(tv4.tv_usec-tv3.tv_usec))/1000000./RUNS; |
702 |
|
703 |
printf("Duration of memory allocation : %2.10f s\n",memoryIn);
|
704 |
printf("Duration of memory free : %2.10f s\n",memoryOut);
|
705 |
#else
|
706 |
duration=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L + \ |
707 |
(tv2.tv_usec-tv1.tv_usec))/1000000./RUNS; |
708 |
|
709 |
#endif
|
710 |
|
711 |
printf("Duration of each cycle : %2.10f s\n",duration);
|
712 |
|
713 |
printResults(RUNS,checksA,"C","Errors cumulated"); |
714 |
|
715 |
putchar('\n');
|
716 |
|
717 |
/*
|
718 |
#ifdef PRINT
|
719 |
for (i=0; i<dim; i++) {
|
720 |
for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]);
|
721 |
putchar('\n');
|
722 |
}
|
723 |
|
724 |
for (i=0; i<dim; i++) {
|
725 |
printf("X[%i]=%2.5f",i,X[i]);
|
726 |
putchar('\n');
|
727 |
}
|
728 |
putchar('\n');
|
729 |
for (i=0; i<dim; i++) {
|
730 |
printf("Y[%i]=%2.5f",i,Y[i]);
|
731 |
putchar('\n');
|
732 |
}
|
733 |
#endif
|
734 |
*/
|
735 |
|
736 |
return 0; |
737 |
} |
738 |
|
739 |
int main(int argc,char **argv) |
740 |
{ |
741 |
if ((argc==1)|| |
742 |
(strcmp(argv[1],"-h")==0)|| |
743 |
(strcmp(argv[1],"--help")==0)) |
744 |
{ |
745 |
printf("\nPerforms a bench using BLAS library implementation:\n\n"
|
746 |
"\t#1 Size on triangular system\n"
|
747 |
"\t#2 Number of iterations\n\n");
|
748 |
} |
749 |
else if ((atoi(argv[1])>=2)&& |
750 |
(atoi(argv[2])>=1)) |
751 |
{ |
752 |
bench(atoi(argv[1]),atoi(argv[2])); |
753 |
} |
754 |
|
755 |
return 0; |
756 |
} |