Statistiques
| Révision :

root / src / lapack / util / iparmq.f @ 10

Historique | Voir | Annoter | Télécharger (9,66 ko)

1 1 pfleura2
      INTEGER FUNCTION IPARMQ( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK )
2 1 pfleura2
*
3 1 pfleura2
*  -- LAPACK auxiliary routine (version 3.2) --
4 1 pfleura2
*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
5 1 pfleura2
*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
6 1 pfleura2
*     November 2006
7 1 pfleura2
*
8 1 pfleura2
*     .. Scalar Arguments ..
9 1 pfleura2
      INTEGER            IHI, ILO, ISPEC, LWORK, N
10 1 pfleura2
      CHARACTER          NAME*( * ), OPTS*( * )
11 1 pfleura2
*
12 1 pfleura2
*  Purpose
13 1 pfleura2
*  =======
14 1 pfleura2
*
15 1 pfleura2
*       This program sets problem and machine dependent parameters
16 1 pfleura2
*       useful for xHSEQR and its subroutines. It is called whenever
17 1 pfleura2
*       ILAENV is called with 12 <= ISPEC <= 16
18 1 pfleura2
*
19 1 pfleura2
*  Arguments
20 1 pfleura2
*  =========
21 1 pfleura2
*
22 1 pfleura2
*       ISPEC  (input) integer scalar
23 1 pfleura2
*              ISPEC specifies which tunable parameter IPARMQ should
24 1 pfleura2
*              return.
25 1 pfleura2
*
26 1 pfleura2
*              ISPEC=12: (INMIN)  Matrices of order nmin or less
27 1 pfleura2
*                        are sent directly to xLAHQR, the implicit
28 1 pfleura2
*                        double shift QR algorithm.  NMIN must be
29 1 pfleura2
*                        at least 11.
30 1 pfleura2
*
31 1 pfleura2
*              ISPEC=13: (INWIN)  Size of the deflation window.
32 1 pfleura2
*                        This is best set greater than or equal to
33 1 pfleura2
*                        the number of simultaneous shifts NS.
34 1 pfleura2
*                        Larger matrices benefit from larger deflation
35 1 pfleura2
*                        windows.
36 1 pfleura2
*
37 1 pfleura2
*              ISPEC=14: (INIBL) Determines when to stop nibbling and
38 1 pfleura2
*                        invest in an (expensive) multi-shift QR sweep.
39 1 pfleura2
*                        If the aggressive early deflation subroutine
40 1 pfleura2
*                        finds LD converged eigenvalues from an order
41 1 pfleura2
*                        NW deflation window and LD.GT.(NW*NIBBLE)/100,
42 1 pfleura2
*                        then the next QR sweep is skipped and early
43 1 pfleura2
*                        deflation is applied immediately to the
44 1 pfleura2
*                        remaining active diagonal block.  Setting
45 1 pfleura2
*                        IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a
46 1 pfleura2
*                        multi-shift QR sweep whenever early deflation
47 1 pfleura2
*                        finds a converged eigenvalue.  Setting
48 1 pfleura2
*                        IPARMQ(ISPEC=14) greater than or equal to 100
49 1 pfleura2
*                        prevents TTQRE from skipping a multi-shift
50 1 pfleura2
*                        QR sweep.
51 1 pfleura2
*
52 1 pfleura2
*              ISPEC=15: (NSHFTS) The number of simultaneous shifts in
53 1 pfleura2
*                        a multi-shift QR iteration.
54 1 pfleura2
*
55 1 pfleura2
*              ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the
56 1 pfleura2
*                        following meanings.
57 1 pfleura2
*                        0:  During the multi-shift QR sweep,
58 1 pfleura2
*                            xLAQR5 does not accumulate reflections and
59 1 pfleura2
*                            does not use matrix-matrix multiply to
60 1 pfleura2
*                            update the far-from-diagonal matrix
61 1 pfleura2
*                            entries.
62 1 pfleura2
*                        1:  During the multi-shift QR sweep,
63 1 pfleura2
*                            xLAQR5 and/or xLAQRaccumulates reflections and uses
64 1 pfleura2
*                            matrix-matrix multiply to update the
65 1 pfleura2
*                            far-from-diagonal matrix entries.
66 1 pfleura2
*                        2:  During the multi-shift QR sweep.
67 1 pfleura2
*                            xLAQR5 accumulates reflections and takes
68 1 pfleura2
*                            advantage of 2-by-2 block structure during
69 1 pfleura2
*                            matrix-matrix multiplies.
70 1 pfleura2
*                        (If xTRMM is slower than xGEMM, then
71 1 pfleura2
*                        IPARMQ(ISPEC=16)=1 may be more efficient than
72 1 pfleura2
*                        IPARMQ(ISPEC=16)=2 despite the greater level of
73 1 pfleura2
*                        arithmetic work implied by the latter choice.)
74 1 pfleura2
*
75 1 pfleura2
*       NAME    (input) character string
76 1 pfleura2
*               Name of the calling subroutine
77 1 pfleura2
*
78 1 pfleura2
*       OPTS    (input) character string
79 1 pfleura2
*               This is a concatenation of the string arguments to
80 1 pfleura2
*               TTQRE.
81 1 pfleura2
*
82 1 pfleura2
*       N       (input) integer scalar
83 1 pfleura2
*               N is the order of the Hessenberg matrix H.
84 1 pfleura2
*
85 1 pfleura2
*       ILO     (input) INTEGER
86 1 pfleura2
*       IHI     (input) INTEGER
87 1 pfleura2
*               It is assumed that H is already upper triangular
88 1 pfleura2
*               in rows and columns 1:ILO-1 and IHI+1:N.
89 1 pfleura2
*
90 1 pfleura2
*       LWORK   (input) integer scalar
91 1 pfleura2
*               The amount of workspace available.
92 1 pfleura2
*
93 1 pfleura2
*  Further Details
94 1 pfleura2
*  ===============
95 1 pfleura2
*
96 1 pfleura2
*       Little is known about how best to choose these parameters.
97 1 pfleura2
*       It is possible to use different values of the parameters
98 1 pfleura2
*       for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR.
99 1 pfleura2
*
100 1 pfleura2
*       It is probably best to choose different parameters for
101 1 pfleura2
*       different matrices and different parameters at different
102 1 pfleura2
*       times during the iteration, but this has not been
103 1 pfleura2
*       implemented --- yet.
104 1 pfleura2
*
105 1 pfleura2
*
106 1 pfleura2
*       The best choices of most of the parameters depend
107 1 pfleura2
*       in an ill-understood way on the relative execution
108 1 pfleura2
*       rate of xLAQR3 and xLAQR5 and on the nature of each
109 1 pfleura2
*       particular eigenvalue problem.  Experiment may be the
110 1 pfleura2
*       only practical way to determine which choices are most
111 1 pfleura2
*       effective.
112 1 pfleura2
*
113 1 pfleura2
*       Following is a list of default values supplied by IPARMQ.
114 1 pfleura2
*       These defaults may be adjusted in order to attain better
115 1 pfleura2
*       performance in any particular computational environment.
116 1 pfleura2
*
117 1 pfleura2
*       IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point.
118 1 pfleura2
*                        Default: 75. (Must be at least 11.)
119 1 pfleura2
*
120 1 pfleura2
*       IPARMQ(ISPEC=13) Recommended deflation window size.
121 1 pfleura2
*                        This depends on ILO, IHI and NS, the
122 1 pfleura2
*                        number of simultaneous shifts returned
123 1 pfleura2
*                        by IPARMQ(ISPEC=15).  The default for
124 1 pfleura2
*                        (IHI-ILO+1).LE.500 is NS.  The default
125 1 pfleura2
*                        for (IHI-ILO+1).GT.500 is 3*NS/2.
126 1 pfleura2
*
127 1 pfleura2
*       IPARMQ(ISPEC=14) Nibble crossover point.  Default: 14.
128 1 pfleura2
*
129 1 pfleura2
*       IPARMQ(ISPEC=15) Number of simultaneous shifts, NS.
130 1 pfleura2
*                        a multi-shift QR iteration.
131 1 pfleura2
*
132 1 pfleura2
*                        If IHI-ILO+1 is ...
133 1 pfleura2
*
134 1 pfleura2
*                        greater than      ...but less    ... the
135 1 pfleura2
*                        or equal to ...      than        default is
136 1 pfleura2
*
137 1 pfleura2
*                                0               30       NS =   2+
138 1 pfleura2
*                               30               60       NS =   4+
139 1 pfleura2
*                               60              150       NS =  10
140 1 pfleura2
*                              150              590       NS =  **
141 1 pfleura2
*                              590             3000       NS =  64
142 1 pfleura2
*                             3000             6000       NS = 128
143 1 pfleura2
*                             6000             infinity   NS = 256
144 1 pfleura2
*
145 1 pfleura2
*                    (+)  By default matrices of this order are
146 1 pfleura2
*                         passed to the implicit double shift routine
147 1 pfleura2
*                         xLAHQR.  See IPARMQ(ISPEC=12) above.   These
148 1 pfleura2
*                         values of NS are used only in case of a rare
149 1 pfleura2
*                         xLAHQR failure.
150 1 pfleura2
*
151 1 pfleura2
*                    (**) The asterisks (**) indicate an ad-hoc
152 1 pfleura2
*                         function increasing from 10 to 64.
153 1 pfleura2
*
154 1 pfleura2
*       IPARMQ(ISPEC=16) Select structured matrix multiply.
155 1 pfleura2
*                        (See ISPEC=16 above for details.)
156 1 pfleura2
*                        Default: 3.
157 1 pfleura2
*
158 1 pfleura2
*     ================================================================
159 1 pfleura2
*     .. Parameters ..
160 1 pfleura2
      INTEGER            INMIN, INWIN, INIBL, ISHFTS, IACC22
161 1 pfleura2
      PARAMETER          ( INMIN = 12, INWIN = 13, INIBL = 14,
162 1 pfleura2
     $                   ISHFTS = 15, IACC22 = 16 )
163 1 pfleura2
      INTEGER            NMIN, K22MIN, KACMIN, NIBBLE, KNWSWP
164 1 pfleura2
      PARAMETER          ( NMIN = 75, K22MIN = 14, KACMIN = 14,
165 1 pfleura2
     $                   NIBBLE = 14, KNWSWP = 500 )
166 1 pfleura2
      REAL               TWO
167 1 pfleura2
      PARAMETER          ( TWO = 2.0 )
168 1 pfleura2
*     ..
169 1 pfleura2
*     .. Local Scalars ..
170 1 pfleura2
      INTEGER            NH, NS
171 1 pfleura2
*     ..
172 1 pfleura2
*     .. Intrinsic Functions ..
173 1 pfleura2
      INTRINSIC          LOG, MAX, MOD, NINT, REAL
174 1 pfleura2
*     ..
175 1 pfleura2
*     .. Executable Statements ..
176 1 pfleura2
      IF( ( ISPEC.EQ.ISHFTS ) .OR. ( ISPEC.EQ.INWIN ) .OR.
177 1 pfleura2
     $    ( ISPEC.EQ.IACC22 ) ) THEN
178 1 pfleura2
*
179 1 pfleura2
*        ==== Set the number simultaneous shifts ====
180 1 pfleura2
*
181 1 pfleura2
         NH = IHI - ILO + 1
182 1 pfleura2
         NS = 2
183 1 pfleura2
         IF( NH.GE.30 )
184 1 pfleura2
     $      NS = 4
185 1 pfleura2
         IF( NH.GE.60 )
186 1 pfleura2
     $      NS = 10
187 1 pfleura2
         IF( NH.GE.150 )
188 1 pfleura2
     $      NS = MAX( 10, NH / NINT( LOG( REAL( NH ) ) / LOG( TWO ) ) )
189 1 pfleura2
         IF( NH.GE.590 )
190 1 pfleura2
     $      NS = 64
191 1 pfleura2
         IF( NH.GE.3000 )
192 1 pfleura2
     $      NS = 128
193 1 pfleura2
         IF( NH.GE.6000 )
194 1 pfleura2
     $      NS = 256
195 1 pfleura2
         NS = MAX( 2, NS-MOD( NS, 2 ) )
196 1 pfleura2
      END IF
197 1 pfleura2
*
198 1 pfleura2
      IF( ISPEC.EQ.INMIN ) THEN
199 1 pfleura2
*
200 1 pfleura2
*
201 1 pfleura2
*        ===== Matrices of order smaller than NMIN get sent
202 1 pfleura2
*        .     to xLAHQR, the classic double shift algorithm.
203 1 pfleura2
*        .     This must be at least 11. ====
204 1 pfleura2
*
205 1 pfleura2
         IPARMQ = NMIN
206 1 pfleura2
*
207 1 pfleura2
      ELSE IF( ISPEC.EQ.INIBL ) THEN
208 1 pfleura2
*
209 1 pfleura2
*        ==== INIBL: skip a multi-shift qr iteration and
210 1 pfleura2
*        .    whenever aggressive early deflation finds
211 1 pfleura2
*        .    at least (NIBBLE*(window size)/100) deflations. ====
212 1 pfleura2
*
213 1 pfleura2
         IPARMQ = NIBBLE
214 1 pfleura2
*
215 1 pfleura2
      ELSE IF( ISPEC.EQ.ISHFTS ) THEN
216 1 pfleura2
*
217 1 pfleura2
*        ==== NSHFTS: The number of simultaneous shifts =====
218 1 pfleura2
*
219 1 pfleura2
         IPARMQ = NS
220 1 pfleura2
*
221 1 pfleura2
      ELSE IF( ISPEC.EQ.INWIN ) THEN
222 1 pfleura2
*
223 1 pfleura2
*        ==== NW: deflation window size.  ====
224 1 pfleura2
*
225 1 pfleura2
         IF( NH.LE.KNWSWP ) THEN
226 1 pfleura2
            IPARMQ = NS
227 1 pfleura2
         ELSE
228 1 pfleura2
            IPARMQ = 3*NS / 2
229 1 pfleura2
         END IF
230 1 pfleura2
*
231 1 pfleura2
      ELSE IF( ISPEC.EQ.IACC22 ) THEN
232 1 pfleura2
*
233 1 pfleura2
*        ==== IACC22: Whether to accumulate reflections
234 1 pfleura2
*        .     before updating the far-from-diagonal elements
235 1 pfleura2
*        .     and whether to use 2-by-2 block structure while
236 1 pfleura2
*        .     doing it.  A small amount of work could be saved
237 1 pfleura2
*        .     by making this choice dependent also upon the
238 1 pfleura2
*        .     NH=IHI-ILO+1.
239 1 pfleura2
*
240 1 pfleura2
         IPARMQ = 0
241 1 pfleura2
         IF( NS.GE.KACMIN )
242 1 pfleura2
     $      IPARMQ = 1
243 1 pfleura2
         IF( NS.GE.K22MIN )
244 1 pfleura2
     $      IPARMQ = 2
245 1 pfleura2
*
246 1 pfleura2
      ELSE
247 1 pfleura2
*        ===== invalid value of ispec =====
248 1 pfleura2
         IPARMQ = -1
249 1 pfleura2
*
250 1 pfleura2
      END IF
251 1 pfleura2
*
252 1 pfleura2
*     ==== End of IPARMQ ====
253 1 pfleura2
*
254 1 pfleura2
      END