root / BLAS / xTRSV / xTRSV.c @ 251
Historique | Voir | Annoter | Télécharger (17,48 ko)
1 | 1 | equemene | /*
|
---|---|---|---|
2 | 1 | equemene | Performs a linear system solving of random generated system
|
3 | 1 | equemene | Estimates a test
|
4 | 1 | equemene | |
5 | 1 | equemene | Matrix is triangular
|
6 | 1 | equemene | |
7 | 1 | equemene | Thanks for help from aurel32@debian.org
|
8 | 1 | equemene | */
|
9 | 1 | equemene | |
10 | 1 | equemene | #include <stdio.h> |
11 | 1 | equemene | #include <math.h> |
12 | 1 | equemene | #include <stdlib.h> |
13 | 1 | equemene | #include <sys/time.h> |
14 | 1 | equemene | #include <string.h> |
15 | 1 | equemene | |
16 | 1 | equemene | #ifdef CUBLAS
|
17 | 1 | equemene | #include <cublas.h> |
18 | 1 | equemene | #define CUBLAS_WRAPPER_ERROR_NOERR 0 |
19 | 1 | equemene | #define CUBLAS_WRAPPER_ERROR_ALLOC 1 |
20 | 1 | equemene | #define CUBLAS_WRAPPER_ERROR_SET 2 |
21 | 1 | equemene | #define CUBLAS_WRAPPER_ERROR_GET 3 |
22 | 1 | equemene | #define CUBLAS_WRAPPER_ERROR_STUB 4 |
23 | 1 | equemene | #elif THUNKING
|
24 | 1 | equemene | #include <cublas.h> |
25 | 7 | equemene | #include "fortran_common.h" |
26 | 7 | equemene | #include "fortran_thunking.h" |
27 | 1 | equemene | #elif FBLAS
|
28 | 251 | equemene | #include <f77blas.h> |
29 | 1 | equemene | #elif GSL
|
30 | 1 | equemene | #include <gsl_cblas.h> |
31 | 1 | equemene | #elif ACML
|
32 | 1 | equemene | #include <acml.h> |
33 | 1 | equemene | #else
|
34 | 1 | equemene | #include <cblas.h> |
35 | 251 | equemene | // #include <blaswrap.h>
|
36 | 1 | equemene | #endif
|
37 | 1 | equemene | |
38 | 251 | equemene | #ifdef FP64
|
39 | 1 | equemene | #define LENGTH double |
40 | 1 | equemene | #else
|
41 | 1 | equemene | #define LENGTH float |
42 | 1 | equemene | #endif
|
43 | 1 | equemene | |
44 | 1 | equemene | /* Matrix with only defined triangular terms */
|
45 | 1 | equemene | /* Even if there are 0 in matrix, must be defined at all ! */
|
46 | 1 | equemene | |
47 | 1 | equemene | /* Get from fortran.c */
|
48 | 1 | equemene | |
49 | 1 | equemene | #ifdef CUBLAS
|
50 | 1 | equemene | static char *errMsg[5] = |
51 | 1 | equemene | { |
52 | 1 | equemene | "no error",
|
53 | 1 | equemene | "allocation error",
|
54 | 1 | equemene | "setVector/setMatrix error",
|
55 | 1 | equemene | "getVector/getMatrix error",
|
56 | 1 | equemene | "not implemented"
|
57 | 1 | equemene | }; |
58 | 1 | equemene | |
59 | 1 | equemene | static void wrapperError (const char *funcName, int error) |
60 | 1 | equemene | { |
61 | 1 | equemene | printf ("cublas%s wrapper: %s\n", funcName, errMsg[error]);
|
62 | 1 | equemene | fflush (stdout); |
63 | 1 | equemene | } |
64 | 1 | equemene | #endif
|
65 | 1 | equemene | |
66 | 1 | equemene | int printVector(const int dimVector,const LENGTH *dataVector, |
67 | 1 | equemene | char *nameVector,char *mesgVector) |
68 | 1 | equemene | { |
69 | 1 | equemene | #ifndef QUIET
|
70 | 1 | equemene | |
71 | 1 | equemene | int i;
|
72 | 1 | equemene | printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
73 | 1 | equemene | for (i=0;i<dimVector;i++) |
74 | 1 | equemene | { |
75 | 1 | equemene | printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
|
76 | 1 | equemene | } |
77 | 1 | equemene | #endif
|
78 | 1 | equemene | |
79 | 1 | equemene | return 0; |
80 | 1 | equemene | } |
81 | 1 | equemene | |
82 | 1 | equemene | int printResults(const int dimVector,const LENGTH *dataVector, |
83 | 1 | equemene | char *nameVector,char *mesgVector) |
84 | 1 | equemene | { |
85 | 1 | equemene | #ifdef RESULTS
|
86 | 1 | equemene | int i;
|
87 | 1 | equemene | |
88 | 1 | equemene | printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
89 | 1 | equemene | for (i=0;i<dimVector;i++) |
90 | 1 | equemene | { |
91 | 1 | equemene | printf("%s[%i]=%2.10e\n",nameVector,i,dataVector[i]);
|
92 | 1 | equemene | } |
93 | 1 | equemene | #endif
|
94 | 1 | equemene | return 0; |
95 | 1 | equemene | } |
96 | 1 | equemene | |
97 | 1 | equemene | #ifdef CUBLAS
|
98 | 1 | equemene | int printVectorGPU(const int dimVector,const LENGTH *dataVector, |
99 | 1 | equemene | char *nameVector,char *mesgVector) |
100 | 1 | equemene | { |
101 | 1 | equemene | #ifndef QUIET
|
102 | 1 | equemene | int i;
|
103 | 1 | equemene | cublasStatus stat; |
104 | 1 | equemene | LENGTH *P=0;
|
105 | 1 | equemene | int incx=1; |
106 | 1 | equemene | |
107 | 1 | equemene | P=malloc(dimVector*sizeof(LENGTH));
|
108 | 1 | equemene | |
109 | 1 | equemene | stat=cublasGetVector(dimVector,sizeof(P[0]),dataVector,incx,P,incx); |
110 | 1 | equemene | |
111 | 1 | equemene | if (stat != CUBLAS_STATUS_SUCCESS) {
|
112 | 1 | equemene | wrapperError ("ToGet", CUBLAS_WRAPPER_ERROR_GET);
|
113 | 1 | equemene | } |
114 | 1 | equemene | |
115 | 1 | equemene | printf("\n%s of %s, size %i:\n",mesgVector,nameVector,dimVector);
|
116 | 1 | equemene | for (i=0;i<dimVector;i++) |
117 | 1 | equemene | { |
118 | 1 | equemene | printf("%s[%i]=%2.10e\n",nameVector,i,P[i]);
|
119 | 1 | equemene | } |
120 | 1 | equemene | |
121 | 1 | equemene | free(P); |
122 | 1 | equemene | #endif
|
123 | 1 | equemene | |
124 | 1 | equemene | return 0; |
125 | 1 | equemene | } |
126 | 1 | equemene | #endif
|
127 | 1 | equemene | |
128 | 1 | equemene | int bench(int dim,int RUNS) |
129 | 1 | equemene | { |
130 | 1 | equemene | /*
|
131 | 1 | equemene | int dim=1000;
|
132 | 1 | equemene | int RUNS=100;
|
133 | 1 | equemene | */
|
134 | 1 | equemene | int incx=1; |
135 | 1 | equemene | #ifdef PRINT
|
136 | 1 | equemene | LENGTH factor=1.;
|
137 | 1 | equemene | #endif
|
138 | 1 | equemene | |
139 | 1 | equemene | LENGTH alpha=1.,beta=0.,beta2=-1.; |
140 | 1 | equemene | LENGTH *A,*X,*Y; |
141 | 1 | equemene | |
142 | 1 | equemene | /* checkBefore checkAfter checks */
|
143 | 1 | equemene | LENGTH *checksA,*checksB; |
144 | 1 | equemene | |
145 | 1 | equemene | int i=0, j=0; |
146 | 1 | equemene | |
147 | 1 | equemene | double duration;
|
148 | 1 | equemene | |
149 | 1 | equemene | struct timeval tv1,tv2;
|
150 | 1 | equemene | struct timezone tz;
|
151 | 1 | equemene | |
152 | 1 | equemene | /* Create 1 Matrix and 2 Vectors of dimension dim */
|
153 | 1 | equemene | |
154 | 1 | equemene | A=malloc(dim*dim*sizeof(LENGTH));
|
155 | 1 | equemene | X=malloc(dim*sizeof(LENGTH));
|
156 | 1 | equemene | Y=malloc(dim*sizeof(LENGTH));
|
157 | 1 | equemene | |
158 | 1 | equemene | /* Create 2 vectors for checker Before and After */
|
159 | 1 | equemene | |
160 | 1 | equemene | checksA=malloc(RUNS*sizeof(double)); |
161 | 1 | equemene | checksB=malloc(RUNS*sizeof(double)); |
162 | 1 | equemene | |
163 | 1 | equemene | /* Initialize elements with random numbers */
|
164 | 1 | equemene | /* Initialize the seed for rand() */
|
165 | 1 | equemene | /* srand(time()); */
|
166 | 1 | equemene | |
167 | 1 | equemene | #ifdef UNIT
|
168 | 1 | equemene | /* Fill the matrix and vector with random numbers */
|
169 | 1 | equemene | for (i=0; i<dim; i++) { |
170 | 1 | equemene | for (j=0; j<dim; j++) |
171 | 1 | equemene | if (j>=i)
|
172 | 1 | equemene | { |
173 | 1 | equemene | /* Normalization is necessary to avoid problems */
|
174 | 1 | equemene | A[i*dim+j]=1.;
|
175 | 1 | equemene | } |
176 | 1 | equemene | else
|
177 | 1 | equemene | { |
178 | 1 | equemene | A[i*dim+j]=0.;
|
179 | 1 | equemene | } |
180 | 1 | equemene | X[i]=1;
|
181 | 1 | equemene | } |
182 | 1 | equemene | #else
|
183 | 1 | equemene | for (i=0; i<dim; i++) { |
184 | 1 | equemene | for (j=0; j<dim; j++) |
185 | 1 | equemene | if (j>i)
|
186 | 1 | equemene | { |
187 | 1 | equemene | /* Normalization is necessary to avoid problems */
|
188 | 180 | equemene | /* A[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.) */
|
189 | 180 | equemene | /* *(LENGTH)(i+1.)/(LENGTH)(j+1.); */
|
190 | 1 | equemene | A[i*dim+j]=(LENGTH)rand()/(RAND_MAX+1.)
|
191 | 180 | equemene | /(LENGTH)(dim-j); |
192 | 1 | equemene | } |
193 | 1 | equemene | else if (j==i) |
194 | 1 | equemene | { |
195 | 1 | equemene | A[i*dim+j]=1.;
|
196 | 1 | equemene | } |
197 | 1 | equemene | else
|
198 | 1 | equemene | { |
199 | 1 | equemene | A[i*dim+j]=0.;
|
200 | 1 | equemene | } |
201 | 1 | equemene | X[i]=(LENGTH)rand()/(RAND_MAX+1.);
|
202 | 1 | equemene | } |
203 | 1 | equemene | #endif
|
204 | 1 | equemene | |
205 | 1 | equemene | /* Print the matrix */
|
206 | 1 | equemene | |
207 | 1 | equemene | #ifdef QUIET
|
208 | 1 | equemene | #else
|
209 | 1 | equemene | for (i=0; i<dim; i++) { |
210 | 1 | equemene | for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]); |
211 | 1 | equemene | printf("\tX[%i]=%1.5f ", i,X[i]);
|
212 | 1 | equemene | putchar('\n');
|
213 | 1 | equemene | } |
214 | 1 | equemene | putchar('\n');
|
215 | 1 | equemene | #endif
|
216 | 1 | equemene | |
217 | 1 | equemene | /* Get first timer before launching */
|
218 | 1 | equemene | gettimeofday(&tv1, &tz); |
219 | 1 | equemene | |
220 | 1 | equemene | /* Compute with CuBLAS library */
|
221 | 1 | equemene | #ifdef CUBLAS
|
222 | 1 | equemene | LENGTH *devPtrA=0, *devPtrX=0, *devPtrY=0; |
223 | 1 | equemene | cublasStatus stat1, stat2, stat3; |
224 | 1 | equemene | struct timeval tv3,tv4;
|
225 | 1 | equemene | |
226 | 1 | equemene | /* Order is Row */
|
227 | 1 | equemene | /* Have to swap uplo and trans */
|
228 | 1 | equemene | char uplo='L',trans='T',diag='N'; |
229 | 1 | equemene | |
230 | 1 | equemene | printf("Using CuBLAS: %i iterations for %ix%i matrix\n",
|
231 | 1 | equemene | RUNS,dim,dim); |
232 | 1 | equemene | |
233 | 1 | equemene | stat1=cublasAlloc(dim*dim,sizeof(devPtrA[0]),(void**)&devPtrA); |
234 | 1 | equemene | stat2=cublasAlloc(dim,sizeof(devPtrX[0]),(void**)&devPtrX); |
235 | 1 | equemene | stat3=cublasAlloc(dim,sizeof(devPtrY[0]),(void**)&devPtrY); |
236 | 1 | equemene | |
237 | 1 | equemene | if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
|
238 | 1 | equemene | (stat2 != CUBLAS_STATUS_SUCCESS) || |
239 | 1 | equemene | (stat3 != CUBLAS_STATUS_SUCCESS)) { |
240 | 1 | equemene | wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_ALLOC);
|
241 | 1 | equemene | cublasFree (devPtrA); |
242 | 1 | equemene | cublasFree (devPtrX); |
243 | 1 | equemene | cublasFree (devPtrY); |
244 | 1 | equemene | return 1; |
245 | 1 | equemene | } |
246 | 1 | equemene | |
247 | 1 | equemene | stat1=cublasSetMatrix(dim,dim,sizeof(A[0]),A,dim,devPtrA,dim); |
248 | 1 | equemene | stat2=cublasSetVector(dim,sizeof(X[0]),X,incx,devPtrX,incx); |
249 | 1 | equemene | stat3=cublasSetVector(dim,sizeof(Y[0]),Y,incx,devPtrY,incx); |
250 | 1 | equemene | |
251 | 1 | equemene | if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
|
252 | 1 | equemene | (stat2 != CUBLAS_STATUS_SUCCESS) || |
253 | 1 | equemene | (stat3 != CUBLAS_STATUS_SUCCESS)) { |
254 | 1 | equemene | wrapperError ("Dtrsv", CUBLAS_WRAPPER_ERROR_SET);
|
255 | 1 | equemene | cublasFree (devPtrA); |
256 | 1 | equemene | cublasFree (devPtrX); |
257 | 1 | equemene | cublasFree (devPtrY); |
258 | 1 | equemene | return 1; |
259 | 1 | equemene | } |
260 | 1 | equemene | |
261 | 1 | equemene | /* Get third timer after memory operation */
|
262 | 1 | equemene | gettimeofday(&tv3, &tz); |
263 | 1 | equemene | |
264 | 1 | equemene | for (i=0;i<RUNS;i++) |
265 | 1 | equemene | { |
266 | 251 | equemene | #ifdef FP64
|
267 | 1 | equemene | |
268 | 1 | equemene | printVectorGPU(dim,devPtrX,"X","Roots"); |
269 | 1 | equemene | |
270 | 1 | equemene | /* Multiply Y <- A.X */
|
271 | 1 | equemene | cublasDgemv(trans,dim,dim,alpha,devPtrA,dim, |
272 | 1 | equemene | devPtrX,incx,beta,devPtrY,incx); |
273 | 1 | equemene | |
274 | 1 | equemene | printVectorGPU(dim,devPtrY,"Y","Results"); |
275 | 1 | equemene | |
276 | 1 | equemene | /* Solve linear system A.X=Y : Y <- A-1.Y */
|
277 | 1 | equemene | cublasDtrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx); |
278 | 1 | equemene | |
279 | 1 | equemene | printVectorGPU(dim,devPtrY,"Y","Solutions"); |
280 | 1 | equemene | |
281 | 1 | equemene | /* Estimate the difference between X and Y : Y <- -Y+X */
|
282 | 1 | equemene | cublasDaxpy(dim,beta2,devPtrY,incx,devPtrX,incx); |
283 | 1 | equemene | |
284 | 1 | equemene | printVectorGPU(dim,devPtrX,"X","Errors"); |
285 | 1 | equemene | |
286 | 1 | equemene | /* Estimate the second checker */
|
287 | 180 | equemene | checksA[i]=(double)cublasDnrm2(dim,devPtrX,incx);
|
288 | 1 | equemene | |
289 | 1 | equemene | /* Swap vector X and Y */
|
290 | 1 | equemene | cublasDswap(dim,devPtrX,incx,devPtrY,incx); |
291 | 1 | equemene | |
292 | 1 | equemene | #else
|
293 | 1 | equemene | |
294 | 1 | equemene | printVectorGPU(dim,devPtrX,"X","Roots"); |
295 | 1 | equemene | |
296 | 1 | equemene | /* Multiply Y <- A.X */
|
297 | 1 | equemene | cublasSgemv(trans,dim,dim,alpha,devPtrA,dim, |
298 | 1 | equemene | devPtrX,incx,beta,devPtrY,incx); |
299 | 1 | equemene | |
300 | 1 | equemene | printVectorGPU(dim,devPtrY,"Y","Results"); |
301 | 1 | equemene | |
302 | 1 | equemene | /* Solve linear system Y <- A-1.Y */
|
303 | 1 | equemene | cublasStrsv(uplo,trans,diag,dim,devPtrA,dim,devPtrY,incx); |
304 | 1 | equemene | |
305 | 1 | equemene | printVectorGPU(dim,devPtrY,"Y","Solutions"); |
306 | 1 | equemene | |
307 | 1 | equemene | /* Add vectors X and -Y */
|
308 | 1 | equemene | cublasSaxpy(dim,beta2,devPtrY,incx,devPtrX,incx); |
309 | 1 | equemene | |
310 | 1 | equemene | printVectorGPU(dim,devPtrX,"X","Errors"); |
311 | 1 | equemene | |
312 | 1 | equemene | /* Estimate the second checker */
|
313 | 180 | equemene | checksA[i]=(double)cublasSnrm2(dim,devPtrX,incx);
|
314 | 1 | equemene | |
315 | 1 | equemene | /* Swap vector X and Y */
|
316 | 1 | equemene | cublasSswap(dim,devPtrX,incx,devPtrY,incx); |
317 | 1 | equemene | |
318 | 1 | equemene | #endif
|
319 | 1 | equemene | |
320 | 1 | equemene | } |
321 | 1 | equemene | |
322 | 1 | equemene | stat1=cublasGetMatrix(dim,dim,sizeof(A[0]),devPtrA,dim,A,dim); |
323 | 1 | equemene | stat2=cublasGetVector(dim,sizeof(X[0]),devPtrX,incx,X,incx); |
324 | 1 | equemene | stat3=cublasGetVector(dim,sizeof(Y[0]),devPtrY,incx,Y,incx); |
325 | 1 | equemene | |
326 | 1 | equemene | cublasFree (devPtrA); |
327 | 1 | equemene | cublasFree (devPtrX); |
328 | 1 | equemene | cublasFree (devPtrY); |
329 | 1 | equemene | |
330 | 1 | equemene | if ((stat1 != CUBLAS_STATUS_SUCCESS) ||
|
331 | 1 | equemene | (stat2 != CUBLAS_STATUS_SUCCESS) || |
332 | 1 | equemene | (stat3 != CUBLAS_STATUS_SUCCESS)) { |
333 | 1 | equemene | wrapperError ("LinearSystem", CUBLAS_WRAPPER_ERROR_GET);
|
334 | 1 | equemene | } |
335 | 1 | equemene | |
336 | 1 | equemene | /* Get fourth timer after memory free */
|
337 | 1 | equemene | gettimeofday(&tv4, &tz); |
338 | 1 | equemene | |
339 | 1 | equemene | #elif THUNKING
|
340 | 1 | equemene | |
341 | 1 | equemene | /* Order is Row : Have to swap uplo='U' and trans='N' */
|
342 | 1 | equemene | char uplo='L',trans='T',diag='N'; |
343 | 1 | equemene | printf("Using CuBLAS/Thunking: %i iterations for %ix%i matrix\n",
|
344 | 1 | equemene | RUNS,dim,dim); |
345 | 1 | equemene | |
346 | 1 | equemene | for (i=0;i<RUNS;i++) |
347 | 1 | equemene | { |
348 | 251 | equemene | #ifdef FP64
|
349 | 1 | equemene | |
350 | 1 | equemene | printVector(dim,X,"X","Roots"); |
351 | 1 | equemene | |
352 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
353 | 1 | equemene | CUBLAS_DGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
354 | 1 | equemene | |
355 | 1 | equemene | printVector(dim,Y,"Y","Results"); |
356 | 1 | equemene | |
357 | 1 | equemene | /* Solve linear system */
|
358 | 1 | equemene | CUBLAS_DTRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); |
359 | 1 | equemene | |
360 | 1 | equemene | printVector(dim,Y,"Y","Solutions"); |
361 | 1 | equemene | |
362 | 1 | equemene | /* Compare the roots X and Y */
|
363 | 1 | equemene | CUBLAS_DAXPY(&dim,&beta2,Y,&incx,X,&incx); |
364 | 1 | equemene | |
365 | 1 | equemene | printVector(dim,X,"X","Errors"); |
366 | 1 | equemene | |
367 | 1 | equemene | /* Store the checker of errors */
|
368 | 180 | equemene | checksA[i]=(double)CUBLAS_DNRM2(&dim,X,&incx);
|
369 | 1 | equemene | |
370 | 1 | equemene | /* Swap vector X and Y */
|
371 | 1 | equemene | CUBLAS_DSWAP(&dim,X,&incx,Y,&incx); |
372 | 1 | equemene | #else
|
373 | 1 | equemene | |
374 | 1 | equemene | printVector(dim,X,"X","Roots"); |
375 | 1 | equemene | |
376 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
377 | 1 | equemene | CUBLAS_SGEMV(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
378 | 1 | equemene | |
379 | 1 | equemene | printVector(dim,Y,"Y","Results"); |
380 | 1 | equemene | |
381 | 1 | equemene | /* Solve linear system */
|
382 | 1 | equemene | CUBLAS_STRSV(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); |
383 | 1 | equemene | |
384 | 1 | equemene | printVector(dim,Y,"Y","Solutions"); |
385 | 1 | equemene | |
386 | 1 | equemene | /* Compare the roots X and Y */
|
387 | 1 | equemene | CUBLAS_SAXPY(&dim,&beta2,Y,&incx,X,&incx); |
388 | 1 | equemene | |
389 | 1 | equemene | printVector(dim,X,"X","Errors"); |
390 | 1 | equemene | |
391 | 1 | equemene | /* Store the checker of errors */
|
392 | 180 | equemene | checksA[i]=(double)CUBLAS_SNRM2(&dim,X,&incx);
|
393 | 1 | equemene | |
394 | 1 | equemene | /* Swap vector X and Y */
|
395 | 1 | equemene | CUBLAS_SSWAP(&dim,X,&incx,Y,&incx); |
396 | 1 | equemene | #endif
|
397 | 1 | equemene | |
398 | 1 | equemene | #ifdef PRINT
|
399 | 1 | equemene | printf("Iteration %i, checker is %2.5f and error is %2.10f\n",
|
400 | 1 | equemene | i,checksA[i],fabs(checksB[i]-checksA[i])/factor); |
401 | 1 | equemene | #endif
|
402 | 1 | equemene | } |
403 | 1 | equemene | |
404 | 1 | equemene | #elif FBLAS
|
405 | 1 | equemene | |
406 | 1 | equemene | /* Order is Row : Have to swap uplo='U' and trans='N' */
|
407 | 1 | equemene | char uplo='L',trans='T',diag='N'; |
408 | 1 | equemene | |
409 | 1 | equemene | printf("Using FBLAS: %i iterations for %ix%i matrix\n",
|
410 | 1 | equemene | RUNS,dim,dim); |
411 | 1 | equemene | |
412 | 1 | equemene | for (i=0;i<RUNS;i++) |
413 | 1 | equemene | { |
414 | 251 | equemene | #ifdef FP64
|
415 | 1 | equemene | |
416 | 180 | equemene | printVector(dim,X,"X","Initial roots"); |
417 | 1 | equemene | |
418 | 251 | equemene | /* /\* Multiply A by X as Y <- A.X *\/ */
|
419 | 251 | equemene | /* dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); */
|
420 | 251 | equemene | |
421 | 251 | equemene | /* printVector(dim,Y,"Y<-A.X","Estimated results"); */
|
422 | 251 | equemene | |
423 | 251 | equemene | /* /\* Solve linear system *\/ */
|
424 | 251 | equemene | /* dtrsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); */
|
425 | 251 | equemene | |
426 | 251 | equemene | /* printVector(dim,Y,"X","Solutions from A.X=Y"); */
|
427 | 251 | equemene | |
428 | 251 | equemene | /* /\* Compare the roots X and Y *\/ */
|
429 | 251 | equemene | /* daxpy_(&dim,&beta2,Y,&incx,X,&incx); */
|
430 | 251 | equemene | |
431 | 251 | equemene | /* printVector(dim,X,"X","Differences initial and estimated"); */
|
432 | 251 | equemene | |
433 | 251 | equemene | /* /\* Store the checker of errors *\/ */
|
434 | 251 | equemene | /* dnrm2_(&dim,X,&incx,&checksA[i]); */
|
435 | 251 | equemene | |
436 | 251 | equemene | /* /\* Swap vector X and Y *\/ */
|
437 | 251 | equemene | /* dswap_(&dim,X,&incx,Y,&incx); */
|
438 | 251 | equemene | |
439 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
440 | 1 | equemene | dgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
441 | 1 | equemene | |
442 | 180 | equemene | printVector(dim,Y,"Y<-A.X","Estimated results"); |
443 | 1 | equemene | |
444 | 1 | equemene | /* Solve linear system */
|
445 | 1 | equemene | dtrsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); |
446 | 1 | equemene | |
447 | 180 | equemene | printVector(dim,Y,"X","Solutions from A.X=Y"); |
448 | 1 | equemene | |
449 | 1 | equemene | /* Compare the roots X and Y */
|
450 | 1 | equemene | daxpy_(&dim,&beta2,Y,&incx,X,&incx); |
451 | 1 | equemene | |
452 | 180 | equemene | printVector(dim,X,"X","Differences initial and estimated"); |
453 | 1 | equemene | |
454 | 1 | equemene | /* Store the checker of errors */
|
455 | 251 | equemene | checksA[i]=(double)dnrm2_(&dim,X,&incx);
|
456 | 1 | equemene | |
457 | 1 | equemene | /* Swap vector X and Y */
|
458 | 1 | equemene | dswap_(&dim,X,&incx,Y,&incx); |
459 | 1 | equemene | |
460 | 1 | equemene | #else
|
461 | 1 | equemene | |
462 | 1 | equemene | printVector(dim,X,"X","Roots"); |
463 | 1 | equemene | |
464 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
465 | 1 | equemene | sgemv_(&trans,&dim,&dim,&alpha,A,&dim,X,&incx,&beta,Y,&incx); |
466 | 1 | equemene | |
467 | 1 | equemene | printVector(dim,Y,"Y","Results"); |
468 | 1 | equemene | |
469 | 1 | equemene | /* Solve linear system */
|
470 | 1 | equemene | strsv_(&uplo,&trans,&diag,&dim,A,&dim,Y,&incx); |
471 | 1 | equemene | |
472 | 1 | equemene | printVector(dim,Y,"Y","Solutions"); |
473 | 1 | equemene | |
474 | 1 | equemene | /* Compare the roots X and Y */
|
475 | 1 | equemene | saxpy_(&dim,&beta2,Y,&incx,X,&incx); |
476 | 1 | equemene | |
477 | 1 | equemene | printVector(dim,X,"X","Errors"); |
478 | 1 | equemene | |
479 | 1 | equemene | /* Store the checker of errors */
|
480 | 251 | equemene | checksA[i]=(LENGTH)snrm2_(&dim,X,&incx); |
481 | 1 | equemene | |
482 | 1 | equemene | /* Swap vector X and Y */
|
483 | 1 | equemene | sswap_(&dim,X,&incx,Y,&incx); |
484 | 1 | equemene | #endif
|
485 | 1 | equemene | |
486 | 1 | equemene | } |
487 | 1 | equemene | |
488 | 1 | equemene | #elif ACML
|
489 | 1 | equemene | |
490 | 1 | equemene | /* Order is Row : Have to swap uplo='U' and trans='N' */
|
491 | 1 | equemene | char uplo='L',trans='T',diag='N'; |
492 | 1 | equemene | |
493 | 1 | equemene | printf("Using ACML: %i iterations for %ix%i matrix\n",
|
494 | 1 | equemene | RUNS,dim,dim); |
495 | 1 | equemene | |
496 | 1 | equemene | for (i=0;i<RUNS;i++) |
497 | 1 | equemene | { |
498 | 251 | equemene | #ifdef FP64
|
499 | 1 | equemene | |
500 | 1 | equemene | printVector(dim,X,"X","Roots"); |
501 | 1 | equemene | |
502 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
503 | 1 | equemene | dgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
504 | 1 | equemene | |
505 | 1 | equemene | printVector(dim,Y,"Y","Results"); |
506 | 1 | equemene | |
507 | 1 | equemene | /* Solve linear system */
|
508 | 1 | equemene | dtrsv(uplo,trans,diag,dim,A,dim,Y,incx); |
509 | 1 | equemene | |
510 | 1 | equemene | printVector(dim,Y,"Y","Solutions"); |
511 | 1 | equemene | |
512 | 1 | equemene | /* Compare the roots X and Y */
|
513 | 1 | equemene | daxpy(dim,beta2,Y,incx,X,incx); |
514 | 1 | equemene | |
515 | 1 | equemene | printVector(dim,X,"X","Errors"); |
516 | 1 | equemene | |
517 | 1 | equemene | /* Store the checker of errors */
|
518 | 180 | equemene | dnrm2_(&dim,X,&incx,&checksA[i]); |
519 | 1 | equemene | |
520 | 1 | equemene | /* Swap vector X and Y */
|
521 | 1 | equemene | dswap(dim,X,incx,Y,incx); |
522 | 1 | equemene | |
523 | 1 | equemene | #else
|
524 | 1 | equemene | |
525 | 1 | equemene | printVector(dim,X,"X","Roots"); |
526 | 1 | equemene | |
527 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
528 | 1 | equemene | sgemv(trans,dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
529 | 1 | equemene | |
530 | 1 | equemene | printVector(dim,Y,"Y","Results"); |
531 | 1 | equemene | |
532 | 1 | equemene | /* Solve linear system */
|
533 | 1 | equemene | strsv(uplo,trans,diag,dim,A,dim,Y,incx); |
534 | 1 | equemene | |
535 | 1 | equemene | printVector(dim,Y,"Y","Solutions"); |
536 | 1 | equemene | |
537 | 1 | equemene | /* Compare the roots X and Y */
|
538 | 1 | equemene | saxpy(dim,beta2,Y,incx,X,incx); |
539 | 1 | equemene | |
540 | 1 | equemene | printVector(dim,X,"X","Errors"); |
541 | 1 | equemene | |
542 | 1 | equemene | /* Store the checker of errors */
|
543 | 180 | equemene | snrm2_(&dim,X,&incx,&checksA[i]); |
544 | 1 | equemene | |
545 | 1 | equemene | /* Swap vector X and Y */
|
546 | 1 | equemene | sswap(dim,X,incx,Y,incx); |
547 | 1 | equemene | #endif
|
548 | 1 | equemene | |
549 | 1 | equemene | } |
550 | 1 | equemene | |
551 | 1 | equemene | #elif GSL
|
552 | 1 | equemene | |
553 | 1 | equemene | printf("Using GSL: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
|
554 | 1 | equemene | |
555 | 1 | equemene | /*
|
556 | 1 | equemene | RowMajor : Matrix is read row by row
|
557 | 1 | equemene | Upper : the no null elements are on top
|
558 | 1 | equemene | NoTrans : no transposition before estimation
|
559 | 1 | equemene | NonUnit : Matrix is not unit
|
560 | 1 | equemene | */
|
561 | 1 | equemene | |
562 | 1 | equemene | for (i=0;i<RUNS;i++) |
563 | 1 | equemene | { |
564 | 1 | equemene | |
565 | 251 | equemene | #ifdef FP64
|
566 | 1 | equemene | |
567 | 1 | equemene | printVector(dim,X,"X","Roots"); |
568 | 1 | equemene | |
569 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
570 | 1 | equemene | cblas_dgemv(CblasRowMajor,CblasNoTrans, |
571 | 1 | equemene | dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
572 | 1 | equemene | |
573 | 1 | equemene | printVector(dim,Y,"Y","Results"); |
574 | 1 | equemene | |
575 | 1 | equemene | /* Solve linear system : Y <- A-1.Y */
|
576 | 1 | equemene | cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit, |
577 | 1 | equemene | dim,A,dim,Y,incx); |
578 | 1 | equemene | |
579 | 1 | equemene | printVector(dim,Y,"Y","Solutions"); |
580 | 1 | equemene | |
581 | 1 | equemene | cblas_daxpy(dim,beta2,Y,incx,X,incx); |
582 | 1 | equemene | |
583 | 1 | equemene | printVector(dim,X,"X","Errors"); |
584 | 1 | equemene | |
585 | 1 | equemene | /* Store the checker of errors */
|
586 | 180 | equemene | checksA[i]=(double)cblas_dnrm2(dim,X,incx);
|
587 | 1 | equemene | |
588 | 1 | equemene | cblas_dswap(dim,X,incx,Y,incx); |
589 | 1 | equemene | |
590 | 1 | equemene | #else
|
591 | 1 | equemene | |
592 | 1 | equemene | printVector(dim,X,"X","Roots"); |
593 | 1 | equemene | |
594 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
595 | 1 | equemene | cblas_sgemv(CblasRowMajor,CblasNoTrans, |
596 | 1 | equemene | dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
597 | 1 | equemene | |
598 | 1 | equemene | printVector(dim,Y,"Y","Results"); |
599 | 1 | equemene | |
600 | 1 | equemene | /* Solve linear system : Y <- A-1.Y */
|
601 | 1 | equemene | cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit, |
602 | 1 | equemene | dim,A,dim,Y,incx); |
603 | 1 | equemene | |
604 | 1 | equemene | printVector(dim,Y,"Y","Solutions"); |
605 | 1 | equemene | |
606 | 1 | equemene | cblas_saxpy(dim,beta2,Y,incx,X,incx); |
607 | 1 | equemene | |
608 | 1 | equemene | printVector(dim,X,"X","Errors"); |
609 | 1 | equemene | |
610 | 1 | equemene | /* Store the checker of errors */
|
611 | 180 | equemene | checksA[i]=(double)cblas_snrm2(dim,X,incx);
|
612 | 1 | equemene | |
613 | 1 | equemene | cblas_sswap(dim,X,incx,Y,incx); |
614 | 1 | equemene | |
615 | 1 | equemene | #endif
|
616 | 1 | equemene | |
617 | 1 | equemene | } |
618 | 1 | equemene | #else
|
619 | 1 | equemene | |
620 | 1 | equemene | printf("Using CBLAS: %i iterations for %ix%i matrix\n",RUNS,dim,dim);
|
621 | 1 | equemene | |
622 | 1 | equemene | /*
|
623 | 1 | equemene | RowMajor : Matrix is read row bu row
|
624 | 1 | equemene | Upper : the no null elements are on top
|
625 | 1 | equemene | NoTrans : no transposition before estimation
|
626 | 1 | equemene | NonUnit : Matrix is not unit
|
627 | 1 | equemene | */
|
628 | 1 | equemene | |
629 | 1 | equemene | for (i=0;i<RUNS;i++) |
630 | 1 | equemene | { |
631 | 1 | equemene | |
632 | 251 | equemene | #ifdef FP64
|
633 | 1 | equemene | |
634 | 1 | equemene | printVector(dim,X,"X","Roots"); |
635 | 1 | equemene | |
636 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
637 | 1 | equemene | cblas_dgemv(CblasRowMajor,CblasNoTrans, |
638 | 1 | equemene | dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
639 | 1 | equemene | |
640 | 1 | equemene | printVector(dim,Y,"Y","Results"); |
641 | 1 | equemene | |
642 | 1 | equemene | /* Solve linear system : Y <- A-1.Y */
|
643 | 1 | equemene | cblas_dtrsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit, |
644 | 1 | equemene | dim,A,dim,Y,incx); |
645 | 1 | equemene | |
646 | 1 | equemene | printVector(dim,Y,"Y","Solutions"); |
647 | 1 | equemene | |
648 | 1 | equemene | cblas_daxpy(dim,beta2,Y,incx,X,incx); |
649 | 1 | equemene | |
650 | 1 | equemene | printVector(dim,X,"X","Errors"); |
651 | 1 | equemene | |
652 | 1 | equemene | /* Store the checker of errors */
|
653 | 180 | equemene | checksA[i]=(double)cblas_dnrm2(dim,X,incx);
|
654 | 1 | equemene | |
655 | 1 | equemene | cblas_dswap(dim,X,incx,Y,incx); |
656 | 1 | equemene | |
657 | 1 | equemene | #else
|
658 | 1 | equemene | |
659 | 1 | equemene | printVector(dim,X,"X","Roots"); |
660 | 1 | equemene | |
661 | 1 | equemene | /* Multiply A by X as Y <- A.X */
|
662 | 1 | equemene | cblas_sgemv(CblasRowMajor,CblasNoTrans, |
663 | 1 | equemene | dim,dim,alpha,A,dim,X,incx,beta,Y,incx); |
664 | 1 | equemene | |
665 | 1 | equemene | printVector(dim,Y,"Y","Results"); |
666 | 1 | equemene | |
667 | 1 | equemene | /* Solve linear system : Y <- A-1.Y */
|
668 | 1 | equemene | cblas_strsv(CblasRowMajor,CblasUpper,CblasNoTrans,CblasNonUnit, |
669 | 1 | equemene | dim,A,dim,Y,incx); |
670 | 1 | equemene | |
671 | 1 | equemene | printVector(dim,Y,"Y","Solutions"); |
672 | 1 | equemene | |
673 | 1 | equemene | cblas_saxpy(dim,beta2,Y,incx,X,incx); |
674 | 1 | equemene | |
675 | 1 | equemene | printVector(dim,X,"X","Errors"); |
676 | 1 | equemene | |
677 | 1 | equemene | /* Store the checker of errors */
|
678 | 180 | equemene | checksA[i]=(double)cblas_snrm2(dim,X,incx);
|
679 | 180 | equemene | |
680 | 1 | equemene | cblas_sswap(dim,X,incx,Y,incx); |
681 | 1 | equemene | |
682 | 1 | equemene | #endif
|
683 | 1 | equemene | |
684 | 1 | equemene | } |
685 | 1 | equemene | #endif
|
686 | 1 | equemene | putchar('\n');
|
687 | 1 | equemene | |
688 | 1 | equemene | /* Get second timer after launching */
|
689 | 1 | equemene | gettimeofday(&tv2, &tz); |
690 | 1 | equemene | |
691 | 1 | equemene | #ifdef CUBLAS
|
692 | 1 | equemene | double memoryIn,memoryOut;
|
693 | 1 | equemene | |
694 | 1 | equemene | memoryIn=(double)((tv3.tv_sec-tv1.tv_sec) * 1000000L + \ |
695 | 1 | equemene | (tv3.tv_usec-tv1.tv_usec))/1000000.; |
696 | 1 | equemene | |
697 | 1 | equemene | memoryOut=(double)((tv2.tv_sec-tv4.tv_sec) * 1000000L + \ |
698 | 1 | equemene | (tv2.tv_usec-tv4.tv_usec))/1000000.; |
699 | 1 | equemene | |
700 | 1 | equemene | duration=(double)((tv4.tv_sec-tv3.tv_sec) * 1000000L + \ |
701 | 1 | equemene | (tv4.tv_usec-tv3.tv_usec))/1000000./RUNS; |
702 | 1 | equemene | |
703 | 1 | equemene | printf("Duration of memory allocation : %2.10f s\n",memoryIn);
|
704 | 1 | equemene | printf("Duration of memory free : %2.10f s\n",memoryOut);
|
705 | 1 | equemene | #else
|
706 | 1 | equemene | duration=(double)((tv2.tv_sec-tv1.tv_sec) * 1000000L + \ |
707 | 1 | equemene | (tv2.tv_usec-tv1.tv_usec))/1000000./RUNS; |
708 | 1 | equemene | |
709 | 1 | equemene | #endif
|
710 | 1 | equemene | |
711 | 1 | equemene | printf("Duration of each cycle : %2.10f s\n",duration);
|
712 | 1 | equemene | |
713 | 1 | equemene | printResults(RUNS,checksA,"C","Errors cumulated"); |
714 | 1 | equemene | |
715 | 1 | equemene | putchar('\n');
|
716 | 1 | equemene | |
717 | 1 | equemene | /*
|
718 | 1 | equemene | #ifdef PRINT
|
719 | 1 | equemene | for (i=0; i<dim; i++) {
|
720 | 1 | equemene | for (j=0; j<dim; j++) printf("A[%i,%i]=%1.5f ", i,j,A[i*dim+j]);
|
721 | 1 | equemene | putchar('\n');
|
722 | 1 | equemene | }
|
723 | 1 | equemene | |
724 | 1 | equemene | for (i=0; i<dim; i++) {
|
725 | 1 | equemene | printf("X[%i]=%2.5f",i,X[i]);
|
726 | 1 | equemene | putchar('\n');
|
727 | 1 | equemene | }
|
728 | 1 | equemene | putchar('\n');
|
729 | 1 | equemene | for (i=0; i<dim; i++) {
|
730 | 1 | equemene | printf("Y[%i]=%2.5f",i,Y[i]);
|
731 | 1 | equemene | putchar('\n');
|
732 | 1 | equemene | }
|
733 | 1 | equemene | #endif
|
734 | 1 | equemene | */
|
735 | 1 | equemene | |
736 | 1 | equemene | return 0; |
737 | 1 | equemene | } |
738 | 1 | equemene | |
739 | 1 | equemene | int main(int argc,char **argv) |
740 | 1 | equemene | { |
741 | 1 | equemene | if ((argc==1)|| |
742 | 1 | equemene | (strcmp(argv[1],"-h")==0)|| |
743 | 1 | equemene | (strcmp(argv[1],"--help")==0)) |
744 | 1 | equemene | { |
745 | 1 | equemene | printf("\nPerforms a bench using BLAS library implementation:\n\n"
|
746 | 1 | equemene | "\t#1 Size on triangular system\n"
|
747 | 1 | equemene | "\t#2 Number of iterations\n\n");
|
748 | 1 | equemene | } |
749 | 1 | equemene | else if ((atoi(argv[1])>=2)&& |
750 | 1 | equemene | (atoi(argv[2])>=1)) |
751 | 1 | equemene | { |
752 | 1 | equemene | bench(atoi(argv[1]),atoi(argv[2])); |
753 | 1 | equemene | } |
754 | 1 | equemene | |
755 | 1 | equemene | return 0; |
756 | 1 | equemene | } |