root / src / auxil / HPL_dlamch.c @ 1
Historique | Voir | Annoter | Télécharger (28,57 ko)
1 |
/*
|
---|---|
2 |
* -- High Performance Computing Linpack Benchmark (HPL)
|
3 |
* HPL - 2.0 - September 10, 2008
|
4 |
* Antoine P. Petitet
|
5 |
* University of Tennessee, Knoxville
|
6 |
* Innovative Computing Laboratory
|
7 |
* (C) Copyright 2000-2008 All Rights Reserved
|
8 |
*
|
9 |
* -- Copyright notice and Licensing terms:
|
10 |
*
|
11 |
* Redistribution and use in source and binary forms, with or without
|
12 |
* modification, are permitted provided that the following conditions
|
13 |
* are met:
|
14 |
*
|
15 |
* 1. Redistributions of source code must retain the above copyright
|
16 |
* notice, this list of conditions and the following disclaimer.
|
17 |
*
|
18 |
* 2. Redistributions in binary form must reproduce the above copyright
|
19 |
* notice, this list of conditions, and the following disclaimer in the
|
20 |
* documentation and/or other materials provided with the distribution.
|
21 |
*
|
22 |
* 3. All advertising materials mentioning features or use of this
|
23 |
* software must display the following acknowledgement:
|
24 |
* This product includes software developed at the University of
|
25 |
* Tennessee, Knoxville, Innovative Computing Laboratory.
|
26 |
*
|
27 |
* 4. The name of the University, the name of the Laboratory, or the
|
28 |
* names of its contributors may not be used to endorse or promote
|
29 |
* products derived from this software without specific written
|
30 |
* permission.
|
31 |
*
|
32 |
* -- Disclaimer:
|
33 |
*
|
34 |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
35 |
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
36 |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
37 |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
|
38 |
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
39 |
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
40 |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
41 |
* DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
42 |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
43 |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
44 |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
45 |
* ---------------------------------------------------------------------
|
46 |
*/
|
47 |
/*
|
48 |
* Include files
|
49 |
*/
|
50 |
#include "hpl.h" |
51 |
/*
|
52 |
* ---------------------------------------------------------------------
|
53 |
* Static function prototypes
|
54 |
* ---------------------------------------------------------------------
|
55 |
*/
|
56 |
static void HPL_dlamc1 |
57 |
STDC_ARGS( |
58 |
( int *, int *, int *, int * ) ); |
59 |
static void HPL_dlamc2 |
60 |
STDC_ARGS( |
61 |
( int *, int *, int *, double *, |
62 |
int *, double *, int *, double * ) ); |
63 |
static double HPL_dlamc3 |
64 |
STDC_ARGS( |
65 |
( const double, const double ) ); |
66 |
static void HPL_dlamc4 |
67 |
STDC_ARGS( |
68 |
( int *, const double, const int ) ); |
69 |
static void HPL_dlamc5 |
70 |
STDC_ARGS( |
71 |
( const int, const int, const int, const int, |
72 |
int *, double * ) ); |
73 |
static double HPL_dipow |
74 |
STDC_ARGS( |
75 |
( const double, const int ) ); |
76 |
|
77 |
#ifdef STDC_HEADERS
|
78 |
double HPL_dlamch
|
79 |
( |
80 |
const HPL_T_MACH CMACH
|
81 |
) |
82 |
#else
|
83 |
double HPL_dlamch
|
84 |
( CMACH ) |
85 |
const HPL_T_MACH CMACH;
|
86 |
#endif
|
87 |
{ |
88 |
/*
|
89 |
* Purpose
|
90 |
* =======
|
91 |
*
|
92 |
* HPL_dlamch determines machine-specific arithmetic constants such as
|
93 |
* the relative machine precision (eps), the safe minimum (sfmin) such
|
94 |
* that 1 / sfmin does not overflow, the base of the machine (base), the
|
95 |
* precision (prec), the number of (base) digits in the mantissa (t),
|
96 |
* whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the
|
97 |
* minimum exponent before (gradual) underflow (emin), the underflow
|
98 |
* threshold (rmin) base**(emin-1), the largest exponent before overflow
|
99 |
* (emax), the overflow threshold (rmax) (base**emax)*(1-eps).
|
100 |
*
|
101 |
* Notes
|
102 |
* =====
|
103 |
*
|
104 |
* This function has been manually translated from the Fortran 77 LAPACK
|
105 |
* auxiliary function dlamch.f (version 2.0 -- 1992), that was itself
|
106 |
* based on the function ENVRON by Malcolm and incorporated suggestions
|
107 |
* by Gentleman and Marovich. See
|
108 |
*
|
109 |
* Malcolm M. A., Algorithms to reveal properties of floating-point
|
110 |
* arithmetic., Comms. of the ACM, 15, 949-951 (1972).
|
111 |
*
|
112 |
* Gentleman W. M. and Marovich S. B., More on algorithms that reveal
|
113 |
* properties of floating point arithmetic units., Comms. of the ACM,
|
114 |
* 17, 276-277 (1974).
|
115 |
*
|
116 |
* Arguments
|
117 |
* =========
|
118 |
*
|
119 |
* CMACH (local input) const HPL_T_MACH
|
120 |
* Specifies the value to be returned by HPL_dlamch
|
121 |
* = HPL_MACH_EPS, HPL_dlamch := eps (default)
|
122 |
* = HPL_MACH_SFMIN, HPL_dlamch := sfmin
|
123 |
* = HPL_MACH_BASE, HPL_dlamch := base
|
124 |
* = HPL_MACH_PREC, HPL_dlamch := eps*base
|
125 |
* = HPL_MACH_MLEN, HPL_dlamch := t
|
126 |
* = HPL_MACH_RND, HPL_dlamch := rnd
|
127 |
* = HPL_MACH_EMIN, HPL_dlamch := emin
|
128 |
* = HPL_MACH_RMIN, HPL_dlamch := rmin
|
129 |
* = HPL_MACH_EMAX, HPL_dlamch := emax
|
130 |
* = HPL_MACH_RMAX, HPL_dlamch := rmax
|
131 |
*
|
132 |
* where
|
133 |
*
|
134 |
* eps = relative machine precision,
|
135 |
* sfmin = safe minimum,
|
136 |
* base = base of the machine,
|
137 |
* prec = eps*base,
|
138 |
* t = number of digits in the mantissa,
|
139 |
* rnd = 1.0 if rounding occurs in addition,
|
140 |
* emin = minimum exponent before underflow,
|
141 |
* rmin = underflow threshold,
|
142 |
* emax = largest exponent before overflow,
|
143 |
* rmax = overflow threshold.
|
144 |
*
|
145 |
* ---------------------------------------------------------------------
|
146 |
*/
|
147 |
/*
|
148 |
* .. Local Variables ..
|
149 |
*/
|
150 |
static double eps, sfmin, base, t, rnd, emin, rmin, emax, |
151 |
rmax, prec; |
152 |
double small;
|
153 |
static int first=1; |
154 |
int beta=0, imax=0, imin=0, it=0, lrnd=0; |
155 |
/* ..
|
156 |
* .. Executable Statements ..
|
157 |
*/
|
158 |
if( first != 0 ) |
159 |
{ |
160 |
first = 0;
|
161 |
HPL_dlamc2( &beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax ); |
162 |
base = (double)(beta); t = (double)(it); |
163 |
if( lrnd != 0 ) |
164 |
{ rnd = HPL_rone; eps = HPL_dipow( base, 1 - it ) / HPL_rtwo; }
|
165 |
else
|
166 |
{ rnd = HPL_rzero; eps = HPL_dipow( base, 1 - it ); }
|
167 |
prec = eps * base; emin = (double)(imin); emax = (double)(imax); |
168 |
sfmin = rmin; small = HPL_rone / rmax; |
169 |
/*
|
170 |
* Use SMALL plus a bit, to avoid the possibility of rounding causing
|
171 |
* overflow when computing 1/sfmin.
|
172 |
*/
|
173 |
if( small >= sfmin ) sfmin = small * ( HPL_rone + eps );
|
174 |
} |
175 |
|
176 |
if( CMACH == HPL_MACH_EPS ) return( eps ); |
177 |
if( CMACH == HPL_MACH_SFMIN ) return( sfmin ); |
178 |
if( CMACH == HPL_MACH_BASE ) return( base ); |
179 |
if( CMACH == HPL_MACH_PREC ) return( prec ); |
180 |
if( CMACH == HPL_MACH_MLEN ) return( t ); |
181 |
if( CMACH == HPL_MACH_RND ) return( rnd ); |
182 |
if( CMACH == HPL_MACH_EMIN ) return( emin ); |
183 |
if( CMACH == HPL_MACH_RMIN ) return( rmin ); |
184 |
if( CMACH == HPL_MACH_EMAX ) return( emax ); |
185 |
if( CMACH == HPL_MACH_RMAX ) return( rmax ); |
186 |
|
187 |
return( eps );
|
188 |
/*
|
189 |
* End of HPL_dlamch
|
190 |
*/
|
191 |
} |
192 |
|
193 |
#ifdef STDC_HEADERS
|
194 |
static void HPL_dlamc1 |
195 |
( |
196 |
int * BETA,
|
197 |
int * T,
|
198 |
int * RND,
|
199 |
int * IEEE1
|
200 |
) |
201 |
#else
|
202 |
static void HPL_dlamc1 |
203 |
( BETA, T, RND, IEEE1 ) |
204 |
/*
|
205 |
* .. Scalar Arguments ..
|
206 |
*/
|
207 |
int * BETA, * IEEE1, * RND, * T;
|
208 |
#endif
|
209 |
{ |
210 |
/*
|
211 |
* Purpose
|
212 |
* =======
|
213 |
*
|
214 |
* HPL_dlamc1 determines the machine parameters given by BETA, T, RND,
|
215 |
* and IEEE1.
|
216 |
*
|
217 |
* Notes
|
218 |
* =====
|
219 |
*
|
220 |
* This function has been manually translated from the Fortran 77 LAPACK
|
221 |
* auxiliary function dlamc1.f (version 2.0 -- 1992), that was itself
|
222 |
* based on the function ENVRON by Malcolm and incorporated suggestions
|
223 |
* by Gentleman and Marovich. See
|
224 |
*
|
225 |
* Malcolm M. A., Algorithms to reveal properties of floating-point
|
226 |
* arithmetic., Comms. of the ACM, 15, 949-951 (1972).
|
227 |
*
|
228 |
* Gentleman W. M. and Marovich S. B., More on algorithms that reveal
|
229 |
* properties of floating point arithmetic units., Comms. of the ACM,
|
230 |
* 17, 276-277 (1974).
|
231 |
*
|
232 |
* Arguments
|
233 |
* =========
|
234 |
*
|
235 |
* BETA (local output) int *
|
236 |
* The base of the machine.
|
237 |
*
|
238 |
* T (local output) int *
|
239 |
* The number of ( BETA ) digits in the mantissa.
|
240 |
*
|
241 |
* RND (local output) int *
|
242 |
* Specifies whether proper rounding (RND=1) or chopping (RND=0)
|
243 |
* occurs in addition. This may not be a reliable guide to the
|
244 |
* way in which the machine performs its arithmetic.
|
245 |
*
|
246 |
* IEEE1 (local output) int *
|
247 |
* Specifies whether rounding appears to be done in the IEEE
|
248 |
* `round to nearest' style (IEEE1=1), (IEEE1=0) otherwise.
|
249 |
*
|
250 |
* ---------------------------------------------------------------------
|
251 |
*/
|
252 |
/*
|
253 |
* .. Local Variables ..
|
254 |
*/
|
255 |
double a, b, c, f, one, qtr, savec, t1, t2;
|
256 |
static int first=1, lbeta, lieee1, lrnd, lt; |
257 |
/* ..
|
258 |
* .. Executable Statements ..
|
259 |
*/
|
260 |
if( first != 0 ) |
261 |
{ |
262 |
first = 0; one = HPL_rone;
|
263 |
/*
|
264 |
* lbeta, lieee1, lt and lrnd are the local values of BETA, IEEE1, T and
|
265 |
* RND. Throughout this routine we use the function HPL_dlamc3 to ensure
|
266 |
* that relevant values are stored and not held in registers, or are not
|
267 |
* affected by optimizers.
|
268 |
*
|
269 |
* Compute a = 2.0**m with the smallest positive integer m such that
|
270 |
* fl( a + 1.0 ) == a.
|
271 |
*/
|
272 |
a = HPL_rone; c = HPL_rone; |
273 |
do
|
274 |
{ a *= HPL_rtwo; c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); } |
275 |
while( c == HPL_rone );
|
276 |
/*
|
277 |
* Now compute b = 2.0**m with the smallest positive integer m such that
|
278 |
* fl( a + b ) > a.
|
279 |
*/
|
280 |
b = HPL_rone; c = HPL_dlamc3( a, b ); |
281 |
while( c == a ) { b *= HPL_rtwo; c = HPL_dlamc3( a, b ); }
|
282 |
/*
|
283 |
* Now compute the base. a and c are neighbouring floating point num-
|
284 |
* bers in the interval ( BETA**T, BETA**( T + 1 ) ) and so their diffe-
|
285 |
* rence is BETA. Adding 0.25 to c is to ensure that it is truncated to
|
286 |
* BETA and not (BETA-1).
|
287 |
*/
|
288 |
qtr = one / 4.0; savec = c; |
289 |
c = HPL_dlamc3( c, -a ); lbeta = (int)(c+qtr);
|
290 |
/*
|
291 |
* Now determine whether rounding or chopping occurs, by adding a bit
|
292 |
* less than BETA/2 and a bit more than BETA/2 to a.
|
293 |
*/
|
294 |
b = (double)(lbeta);
|
295 |
f = HPL_dlamc3( b / HPL_rtwo, -b / 100.0 ); c = HPL_dlamc3( f, a ); |
296 |
if( c == a ) { lrnd = 1; } else { lrnd = 0; } |
297 |
f = HPL_dlamc3( b / HPL_rtwo, b / 100.0 ); c = HPL_dlamc3( f, a ); |
298 |
if( ( lrnd != 0 ) && ( c == a ) ) lrnd = 0; |
299 |
/*
|
300 |
* Try and decide whether rounding is done in the IEEE round to nea-
|
301 |
* rest style. b/2 is half a unit in the last place of the two numbers
|
302 |
* a and savec. Furthermore, a is even, i.e. has last bit zero, and sa-
|
303 |
* vec is odd. Thus adding b/2 to a should not change a, but adding b/2
|
304 |
* to savec should change savec.
|
305 |
*/
|
306 |
t1 = HPL_dlamc3( b / HPL_rtwo, a ); |
307 |
t2 = HPL_dlamc3( b / HPL_rtwo, savec ); |
308 |
if ( ( t1 == a ) && ( t2 > savec ) && ( lrnd != 0 ) ) lieee1 = 1; |
309 |
else lieee1 = 0; |
310 |
/*
|
311 |
* Now find the mantissa, T. It should be the integer part of log to the
|
312 |
* base BETA of a, however it is safer to determine T by powering. So we
|
313 |
* find T as the smallest positive integer for which fl( beta**t + 1.0 )
|
314 |
* is equal to 1.0.
|
315 |
*/
|
316 |
lt = 0; a = HPL_rone; c = HPL_rone;
|
317 |
|
318 |
do
|
319 |
{ |
320 |
lt++; a *= (double)(lbeta);
|
321 |
c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); |
322 |
} while( c == HPL_rone );
|
323 |
} |
324 |
|
325 |
*BETA = lbeta; *T = lt; *RND = lrnd; *IEEE1 = lieee1; |
326 |
} |
327 |
|
328 |
#ifdef STDC_HEADERS
|
329 |
static void HPL_dlamc2 |
330 |
( |
331 |
int * BETA,
|
332 |
int * T,
|
333 |
int * RND,
|
334 |
double * EPS,
|
335 |
int * EMIN,
|
336 |
double * RMIN,
|
337 |
int * EMAX,
|
338 |
double * RMAX
|
339 |
) |
340 |
#else
|
341 |
static void HPL_dlamc2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) |
342 |
/*
|
343 |
* .. Scalar Arguments ..
|
344 |
*/
|
345 |
int * BETA, * EMAX, * EMIN, * RND, * T;
|
346 |
double * EPS, * RMAX, * RMIN;
|
347 |
#endif
|
348 |
{ |
349 |
/*
|
350 |
* Purpose
|
351 |
* =======
|
352 |
*
|
353 |
* HPL_dlamc2 determines the machine parameters specified in its argu-
|
354 |
* ment list.
|
355 |
*
|
356 |
* Notes
|
357 |
* =====
|
358 |
*
|
359 |
* This function has been manually translated from the Fortran 77 LAPACK
|
360 |
* auxiliary function dlamc2.f (version 2.0 -- 1992), that was itself
|
361 |
* based on a function PARANOIA by W. Kahan of the University of Cali-
|
362 |
* fornia at Berkeley for the computation of the relative machine epsi-
|
363 |
* lon eps.
|
364 |
*
|
365 |
* Arguments
|
366 |
* =========
|
367 |
*
|
368 |
* BETA (local output) int *
|
369 |
* The base of the machine.
|
370 |
*
|
371 |
* T (local output) int *
|
372 |
* The number of ( BETA ) digits in the mantissa.
|
373 |
*
|
374 |
* RND (local output) int *
|
375 |
* Specifies whether proper rounding (RND=1) or chopping (RND=0)
|
376 |
* occurs in addition. This may not be a reliable guide to the
|
377 |
* way in which the machine performs its arithmetic.
|
378 |
*
|
379 |
* EPS (local output) double *
|
380 |
* The smallest positive number such that fl( 1.0 - EPS ) < 1.0,
|
381 |
* where fl denotes the computed value.
|
382 |
*
|
383 |
* EMIN (local output) int *
|
384 |
* The minimum exponent before (gradual) underflow occurs.
|
385 |
*
|
386 |
* RMIN (local output) double *
|
387 |
* The smallest normalized number for the machine, given by
|
388 |
* BASE**( EMIN - 1 ), where BASE is the floating point value
|
389 |
* of BETA.
|
390 |
*
|
391 |
* EMAX (local output) int *
|
392 |
* The maximum exponent before overflow occurs.
|
393 |
*
|
394 |
* RMAX (local output) double *
|
395 |
* The largest positive number for the machine, given by
|
396 |
* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point
|
397 |
* value of BETA.
|
398 |
*
|
399 |
* ---------------------------------------------------------------------
|
400 |
*/
|
401 |
/*
|
402 |
* .. Local Variables ..
|
403 |
*/
|
404 |
static double leps, lrmax, lrmin; |
405 |
double a, b, c, half, one, rbase, sixth, small,
|
406 |
third, two, zero; |
407 |
static int first=1, iwarn=0, lbeta=0, lemax, lemin, |
408 |
lt=0;
|
409 |
int gnmin=0, gpmin=0, i, ieee, lieee1=0, |
410 |
lrnd=0, ngnmin=0, ngpmin=0; |
411 |
/* ..
|
412 |
* .. Executable Statements ..
|
413 |
*/
|
414 |
if( first != 0 ) |
415 |
{ |
416 |
first = 0; zero = HPL_rzero; one = HPL_rone; two = HPL_rtwo;
|
417 |
/*
|
418 |
* lbeta, lt, lrnd, leps, lemin and lrmin are the local values of BETA,
|
419 |
* T, RND, EPS, EMIN and RMIN.
|
420 |
*
|
421 |
* Throughout this routine we use the function HPL_dlamc3 to ensure that
|
422 |
* relevant values are stored and not held in registers, or are not af-
|
423 |
* fected by optimizers.
|
424 |
*
|
425 |
* HPL_dlamc1 returns the parameters lbeta, lt, lrnd and lieee1.
|
426 |
*/
|
427 |
HPL_dlamc1( &lbeta, <, &lrnd, &lieee1 ); |
428 |
/*
|
429 |
* Start to find eps.
|
430 |
*/
|
431 |
b = (double)(lbeta); a = HPL_dipow( b, -lt ); leps = a;
|
432 |
/*
|
433 |
* Try some tricks to see whether or not this is the correct EPS.
|
434 |
*/
|
435 |
b = two / 3.0; |
436 |
half = one / HPL_rtwo; |
437 |
sixth = HPL_dlamc3( b, -half ); |
438 |
third = HPL_dlamc3( sixth, sixth ); |
439 |
b = HPL_dlamc3( third, -half ); |
440 |
b = HPL_dlamc3( b, sixth ); |
441 |
b = Mabs( b ); if( b < leps ) b = leps;
|
442 |
|
443 |
leps = HPL_rone; |
444 |
|
445 |
while( ( leps > b ) && ( b > zero ) )
|
446 |
{ |
447 |
leps = b; |
448 |
c = HPL_dlamc3( half * leps, |
449 |
HPL_dipow( two, 5 ) * HPL_dipow( leps, 2 ) ); |
450 |
c = HPL_dlamc3( half, -c ); b = HPL_dlamc3( half, c ); |
451 |
c = HPL_dlamc3( half, -b ); b = HPL_dlamc3( half, c ); |
452 |
} |
453 |
if( a < leps ) leps = a;
|
454 |
/*
|
455 |
* Computation of EPS complete.
|
456 |
*
|
457 |
* Now find EMIN. Let a = + or - 1, and + or - (1 + BASE**(-3)). Keep
|
458 |
* dividing a by BETA until (gradual) underflow occurs. This is detected
|
459 |
* when we cannot recover the previous a.
|
460 |
*/
|
461 |
rbase = one / (double)(lbeta); small = one;
|
462 |
for( i = 0; i < 3; i++ ) small = HPL_dlamc3( small * rbase, zero ); |
463 |
a = HPL_dlamc3( one, small ); |
464 |
HPL_dlamc4( &ngpmin, one, lbeta ); HPL_dlamc4( &ngnmin, -one, lbeta ); |
465 |
HPL_dlamc4( &gpmin, a, lbeta ); HPL_dlamc4( &gnmin, -a, lbeta ); |
466 |
|
467 |
ieee = 0;
|
468 |
|
469 |
if( ( ngpmin == ngnmin ) && ( gpmin == gnmin ) )
|
470 |
{ |
471 |
if( ngpmin == gpmin )
|
472 |
{ |
473 |
/*
|
474 |
* Non twos-complement machines, no gradual underflow; e.g., VAX )
|
475 |
*/
|
476 |
lemin = ngpmin; |
477 |
} |
478 |
else if( ( gpmin-ngpmin ) == 3 ) |
479 |
{ |
480 |
/*
|
481 |
* Non twos-complement machines with gradual underflow; e.g., IEEE stan-
|
482 |
* dard followers
|
483 |
*/
|
484 |
lemin = ngpmin - 1 + lt; ieee = 1; |
485 |
} |
486 |
else
|
487 |
{ |
488 |
/*
|
489 |
* A guess; no known machine
|
490 |
*/
|
491 |
lemin = Mmin( ngpmin, gpmin ); |
492 |
iwarn = 1;
|
493 |
} |
494 |
} |
495 |
else if( ( ngpmin == gpmin ) && ( ngnmin == gnmin ) ) |
496 |
{ |
497 |
if( Mabs( ngpmin-ngnmin ) == 1 ) |
498 |
{ |
499 |
/*
|
500 |
* Twos-complement machines, no gradual underflow; e.g., CYBER 205
|
501 |
*/
|
502 |
lemin = Mmax( ngpmin, ngnmin ); |
503 |
} |
504 |
else
|
505 |
{ |
506 |
/*
|
507 |
* A guess; no known machine
|
508 |
*/
|
509 |
lemin = Mmin( ngpmin, ngnmin ); |
510 |
iwarn = 1;
|
511 |
} |
512 |
} |
513 |
else if( ( Mabs( ngpmin-ngnmin ) == 1 ) && ( gpmin == gnmin ) ) |
514 |
{ |
515 |
if( ( gpmin - Mmin( ngpmin, ngnmin ) ) == 3 ) |
516 |
{ |
517 |
/*
|
518 |
* Twos-complement machines with gradual underflow; no known machine
|
519 |
*/
|
520 |
lemin = Mmax( ngpmin, ngnmin ) - 1 + lt;
|
521 |
} |
522 |
else
|
523 |
{ |
524 |
/*
|
525 |
* A guess; no known machine
|
526 |
*/
|
527 |
lemin = Mmin( ngpmin, ngnmin ); |
528 |
iwarn = 1;
|
529 |
} |
530 |
} |
531 |
else
|
532 |
{ |
533 |
/*
|
534 |
* A guess; no known machine
|
535 |
*/
|
536 |
lemin = Mmin( ngpmin, ngnmin ); lemin = Mmin( lemin, gpmin ); |
537 |
lemin = Mmin( lemin, gnmin ); iwarn = 1;
|
538 |
} |
539 |
/*
|
540 |
* Comment out this if block if EMIN is ok
|
541 |
*/
|
542 |
if( iwarn != 0 ) |
543 |
{ |
544 |
first = 1;
|
545 |
HPL_fprintf( stderr, "\n %s %8d\n%s\n%s\n%s\n",
|
546 |
"WARNING. The value EMIN may be incorrect:- EMIN =", lemin,
|
547 |
"If, after inspection, the value EMIN looks acceptable, please comment ",
|
548 |
"out the if block as marked within the code of routine HPL_dlamc2, ",
|
549 |
"otherwise supply EMIN explicitly." );
|
550 |
} |
551 |
/*
|
552 |
* Assume IEEE arithmetic if we found denormalised numbers above, or if
|
553 |
* arithmetic seems to round in the IEEE style, determined in routine
|
554 |
* HPL_dlamc1. A true IEEE machine should have both things true; how-
|
555 |
* ever, faulty machines may have one or the other.
|
556 |
*/
|
557 |
if( ( ieee != 0 ) || ( lieee1 != 0 ) ) ieee = 1; |
558 |
else ieee = 0; |
559 |
/*
|
560 |
* Compute RMIN by successive division by BETA. We could compute RMIN
|
561 |
* as BASE**( EMIN - 1 ), but some machines underflow during this compu-
|
562 |
* tation.
|
563 |
*/
|
564 |
lrmin = HPL_rone; |
565 |
for( i = 0; i < 1 - lemin; i++ ) |
566 |
lrmin = HPL_dlamc3( lrmin*rbase, zero ); |
567 |
/*
|
568 |
* Finally, call HPL_dlamc5 to compute emax and rmax.
|
569 |
*/
|
570 |
HPL_dlamc5( lbeta, lt, lemin, ieee, &lemax, &lrmax ); |
571 |
} |
572 |
*BETA = lbeta; *T = lt; *RND = lrnd; *EPS = leps; |
573 |
*EMIN = lemin; *RMIN = lrmin; *EMAX = lemax; *RMAX = lrmax; |
574 |
} |
575 |
|
576 |
#ifdef STDC_HEADERS
|
577 |
static double HPL_dlamc3( const double A, const double B ) |
578 |
#else
|
579 |
static double HPL_dlamc3( A, B ) |
580 |
/*
|
581 |
* .. Scalar Arguments ..
|
582 |
*/
|
583 |
const double A, B; |
584 |
#endif
|
585 |
{ |
586 |
/*
|
587 |
* Purpose
|
588 |
* =======
|
589 |
*
|
590 |
* HPL_dlamc3 is intended to force a and b to be stored prior to doing
|
591 |
* the addition of a and b, for use in situations where optimizers
|
592 |
* might hold one of these in a register.
|
593 |
*
|
594 |
* Notes
|
595 |
* =====
|
596 |
*
|
597 |
* This function has been manually translated from the Fortran 77 LAPACK
|
598 |
* auxiliary function dlamc3.f (version 2.0 -- 1992).
|
599 |
*
|
600 |
* Arguments
|
601 |
* =========
|
602 |
*
|
603 |
* A, B (local input) double
|
604 |
* The values a and b.
|
605 |
*
|
606 |
* ---------------------------------------------------------------------
|
607 |
*/
|
608 |
/* ..
|
609 |
* .. Executable Statements ..
|
610 |
*/
|
611 |
return( A + B );
|
612 |
} |
613 |
|
614 |
#ifdef STDC_HEADERS
|
615 |
static void HPL_dlamc4 |
616 |
( |
617 |
int * EMIN,
|
618 |
const double START, |
619 |
const int BASE |
620 |
) |
621 |
#else
|
622 |
static void HPL_dlamc4( EMIN, START, BASE ) |
623 |
/*
|
624 |
* .. Scalar Arguments ..
|
625 |
*/
|
626 |
int * EMIN;
|
627 |
const int BASE; |
628 |
const double START; |
629 |
#endif
|
630 |
{ |
631 |
/*
|
632 |
* Purpose
|
633 |
* =======
|
634 |
*
|
635 |
* HPL_dlamc4 is a service function for HPL_dlamc2.
|
636 |
*
|
637 |
* Notes
|
638 |
* =====
|
639 |
*
|
640 |
* This function has been manually translated from the Fortran 77 LAPACK
|
641 |
* auxiliary function dlamc4.f (version 2.0 -- 1992).
|
642 |
*
|
643 |
* Arguments
|
644 |
* =========
|
645 |
*
|
646 |
* EMIN (local output) int *
|
647 |
* The minimum exponent before (gradual) underflow, computed by
|
648 |
* setting A = START and dividing by BASE until the previous A
|
649 |
* can not be recovered.
|
650 |
*
|
651 |
* START (local input) double
|
652 |
* The starting point for determining EMIN.
|
653 |
*
|
654 |
* BASE (local input) int
|
655 |
* The base of the machine.
|
656 |
*
|
657 |
* ---------------------------------------------------------------------
|
658 |
*/
|
659 |
/*
|
660 |
* .. Local Variables ..
|
661 |
*/
|
662 |
double a, b1, b2, c1, c2, d1, d2, one, rbase, zero;
|
663 |
int i;
|
664 |
/* ..
|
665 |
* .. Executable Statements ..
|
666 |
*/
|
667 |
a = START; one = HPL_rone; rbase = one / (double)(BASE);
|
668 |
zero = HPL_rzero; |
669 |
*EMIN = 1; b1 = HPL_dlamc3( a * rbase, zero ); c1 = c2 = d1 = d2 = a;
|
670 |
|
671 |
do
|
672 |
{ |
673 |
(*EMIN)--; a = b1; |
674 |
b1 = HPL_dlamc3( a / BASE, zero ); |
675 |
c1 = HPL_dlamc3( b1 * BASE, zero ); |
676 |
d1 = zero; for( i = 0; i < BASE; i++ ) d1 = d1 + b1; |
677 |
b2 = HPL_dlamc3( a * rbase, zero ); |
678 |
c2 = HPL_dlamc3( b2 / rbase, zero ); |
679 |
d2 = zero; for( i = 0; i < BASE; i++ ) d2 = d2 + b2; |
680 |
} while( ( c1 == a ) && ( c2 == a ) && ( d1 == a ) && ( d2 == a ) );
|
681 |
} |
682 |
|
683 |
#ifdef STDC_HEADERS
|
684 |
static void HPL_dlamc5 |
685 |
( |
686 |
const int BETA, |
687 |
const int P, |
688 |
const int EMIN, |
689 |
const int IEEE, |
690 |
int * EMAX,
|
691 |
double * RMAX
|
692 |
) |
693 |
#else
|
694 |
static void HPL_dlamc5( BETA, P, EMIN, IEEE, EMAX, RMAX ) |
695 |
/*
|
696 |
* .. Scalar Arguments ..
|
697 |
*/
|
698 |
const int BETA, EMIN, IEEE, P; |
699 |
int * EMAX;
|
700 |
double * RMAX;
|
701 |
#endif
|
702 |
{ |
703 |
/*
|
704 |
* Purpose
|
705 |
* =======
|
706 |
*
|
707 |
* HPL_dlamc5 attempts to compute RMAX, the largest machine floating-
|
708 |
* point number, without overflow. It assumes that EMAX + abs(EMIN) sum
|
709 |
* approximately to a power of 2. It will fail on machines where this
|
710 |
* assumption does not hold, for example, the Cyber 205 (EMIN = -28625,
|
711 |
* EMAX = 28718). It will also fail if the value supplied for EMIN is
|
712 |
* too large (i.e. too close to zero), probably with overflow.
|
713 |
*
|
714 |
* Notes
|
715 |
* =====
|
716 |
*
|
717 |
* This function has been manually translated from the Fortran 77 LAPACK
|
718 |
* auxiliary function dlamc5.f (version 2.0 -- 1992).
|
719 |
*
|
720 |
* Arguments
|
721 |
* =========
|
722 |
*
|
723 |
* BETA (local input) int
|
724 |
* The base of floating-point arithmetic.
|
725 |
*
|
726 |
* P (local input) int
|
727 |
* The number of base BETA digits in the mantissa of a floating-
|
728 |
* point value.
|
729 |
*
|
730 |
* EMIN (local input) int
|
731 |
* The minimum exponent before (gradual) underflow.
|
732 |
*
|
733 |
* IEEE (local input) int
|
734 |
* A logical flag specifying whether or not the arithmetic sys-
|
735 |
* tem is thought to comply with the IEEE standard.
|
736 |
*
|
737 |
* EMAX (local output) int *
|
738 |
* The largest exponent before overflow.
|
739 |
*
|
740 |
* RMAX (local output) double *
|
741 |
* The largest machine floating-point number.
|
742 |
*
|
743 |
* ---------------------------------------------------------------------
|
744 |
*/
|
745 |
/*
|
746 |
* .. Local Variables ..
|
747 |
*/
|
748 |
double oldy=HPL_rzero, recbas, y, z;
|
749 |
int exbits=1, expsum, i, lexp=1, nbits, try, |
750 |
uexp; |
751 |
/* ..
|
752 |
* .. Executable Statements ..
|
753 |
*/
|
754 |
/*
|
755 |
* First compute lexp and uexp, two powers of 2 that bound abs(EMIN).
|
756 |
* We then assume that EMAX + abs( EMIN ) will sum approximately to the
|
757 |
* bound that is closest to abs( EMIN ). (EMAX is the exponent of the
|
758 |
* required number RMAX).
|
759 |
*/
|
760 |
l_10:
|
761 |
try = (int)( (unsigned int)(lexp) << 1 ); |
762 |
if( try <= ( -EMIN ) ) { lexp = try; exbits++; goto l_10; } |
763 |
|
764 |
if( lexp == -EMIN ) { uexp = lexp; } else { uexp = try; exbits++; } |
765 |
/*
|
766 |
* Now -lexp is less than or equal to EMIN, and -uexp is greater than or
|
767 |
* equal to EMIN. exbits is the number of bits needed to store the expo-
|
768 |
* nent.
|
769 |
*/
|
770 |
if( ( uexp+EMIN ) > ( -lexp-EMIN ) )
|
771 |
{ expsum = (int)( (unsigned int)(lexp) << 1 ); } |
772 |
else
|
773 |
{ expsum = (int)( (unsigned int)(uexp) << 1 ); } |
774 |
/*
|
775 |
* expsum is the exponent range, approximately equal to EMAX - EMIN + 1.
|
776 |
*/
|
777 |
*EMAX = expsum + EMIN - 1;
|
778 |
/*
|
779 |
* nbits is the total number of bits needed to store a floating-point
|
780 |
* number.
|
781 |
*/
|
782 |
nbits = 1 + exbits + P;
|
783 |
|
784 |
if( ( nbits % 2 == 1 ) && ( BETA == 2 ) ) |
785 |
{ |
786 |
/*
|
787 |
* Either there are an odd number of bits used to store a floating-point
|
788 |
* number, which is unlikely, or some bits are not used in the represen-
|
789 |
* tation of numbers, which is possible, (e.g. Cray machines) or the
|
790 |
* mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines),
|
791 |
* which is perhaps the most likely. We have to assume the last alterna-
|
792 |
* tive. If this is true, then we need to reduce EMAX by one because
|
793 |
* there must be some way of representing zero in an implicit-bit sys-
|
794 |
* tem. On machines like Cray we are reducing EMAX by one unnecessarily.
|
795 |
*/
|
796 |
(*EMAX)--; |
797 |
} |
798 |
|
799 |
if( IEEE != 0 ) |
800 |
{ |
801 |
/*
|
802 |
* Assume we are on an IEEE machine which reserves one exponent for in-
|
803 |
* finity and NaN.
|
804 |
*/
|
805 |
(*EMAX)--; |
806 |
} |
807 |
/*
|
808 |
* Now create RMAX, the largest machine number, which should be equal to
|
809 |
* (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0-BETA**(-P), being
|
810 |
* careful that the result is less than 1.0.
|
811 |
*/
|
812 |
recbas = HPL_rone / (double)(BETA);
|
813 |
z = (double)(BETA) - HPL_rone;
|
814 |
y = HPL_rzero; |
815 |
|
816 |
for( i = 0; i < P; i++ ) |
817 |
{ z *= recbas; if( y < HPL_rone ) oldy = y; y = HPL_dlamc3( y, z ); }
|
818 |
|
819 |
if( y >= HPL_rone ) y = oldy;
|
820 |
/*
|
821 |
* Now multiply by BETA**EMAX to get RMAX.
|
822 |
*/
|
823 |
for( i = 0; i < *EMAX; i++ ) y = HPL_dlamc3( y * BETA, HPL_rzero ); |
824 |
|
825 |
*RMAX = y; |
826 |
/*
|
827 |
* End of HPL_dlamch
|
828 |
*/
|
829 |
} |
830 |
|
831 |
#ifdef STDC_HEADERS
|
832 |
static double HPL_dipow |
833 |
( |
834 |
const double X, |
835 |
const int N |
836 |
) |
837 |
#else
|
838 |
static double HPL_dipow( X, N ) |
839 |
/*
|
840 |
* .. Scalar Arguments ..
|
841 |
*/
|
842 |
const int N; |
843 |
const double X; |
844 |
#endif
|
845 |
{ |
846 |
/*
|
847 |
* Purpose
|
848 |
* =======
|
849 |
*
|
850 |
* HPL_dipow computes the integer n-th power of a real scalar x.
|
851 |
*
|
852 |
* Arguments
|
853 |
* =========
|
854 |
*
|
855 |
* X (local input) const double
|
856 |
* The real scalar x.
|
857 |
*
|
858 |
* N (local input) const int
|
859 |
* The integer power to raise x to.
|
860 |
*
|
861 |
* ---------------------------------------------------------------------
|
862 |
*/
|
863 |
/*
|
864 |
* .. Local Variables ..
|
865 |
*/
|
866 |
double r, y=HPL_rone;
|
867 |
int k, n;
|
868 |
/* ..
|
869 |
* .. Executable Statements ..
|
870 |
*/
|
871 |
if( X == HPL_rzero ) return( HPL_rzero ); |
872 |
if( N < 0 ) { n = -N; r = HPL_rone / X; } else { n = N; r = X; } |
873 |
for( k = 0; k < n; k++ ) y *= r; |
874 |
|
875 |
return( y );
|
876 |
} |