ergo
template_lapack_larrv.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LARRV_HEADER
36 #define TEMPLATE_LAPACK_LARRV_HEADER
37 
38 template<class Treal>
39 int template_lapack_larrv(const integer *n, Treal *vl, Treal *vu,
40  Treal *d__, Treal *l, Treal *pivmin, integer *isplit,
41  integer *m, integer *dol, integer *dou, Treal *minrgp,
42  Treal *rtol1, Treal *rtol2, Treal *w, Treal *werr,
43  Treal *wgap, integer *iblock, integer *indexw, Treal *gers,
44  Treal *z__, const integer *ldz, integer *isuppz, Treal *work,
45  integer *iwork, integer *info)
46 {
47  /* System generated locals */
48  integer z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
49  Treal d__1, d__2;
50  logical L__1;
51 
52 
53  /* Local variables */
54  integer minwsize, i__, j, k, p, q, miniwsize, ii;
55  Treal gl;
56  integer im, in;
57  Treal gu, gap, eps, tau, tol, tmp;
58  integer zto;
59  Treal ztz;
60  integer iend, jblk;
61  Treal lgap;
62  integer done;
63  Treal rgap, left;
64  integer wend, iter;
65  Treal bstw = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
66  integer itmp1;
67  integer indld;
68  Treal fudge;
69  integer idone;
70  Treal sigma;
71  integer iinfo, iindr;
72  Treal resid;
73  logical eskip;
74  Treal right;
75  integer nclus, zfrom;
76  Treal rqtol;
77  integer iindc1, iindc2;
78  logical stp2ii;
79  Treal lambda;
80  integer ibegin, indeig;
81  logical needbs;
82  integer indlld;
83  Treal sgndef, mingma;
84  integer oldien, oldncl, wbegin;
85  Treal spdiam;
86  integer negcnt;
87  integer oldcls;
88  Treal savgap;
89  integer ndepth;
90  Treal ssigma;
91  logical usedbs;
92  integer iindwk, offset;
93  Treal gaptol;
94  integer newcls, oldfst, indwrk, windex, oldlst;
95  logical usedrq;
96  integer newfst, newftt, parity, windmn, windpl, isupmn, newlst, zusedl;
97  Treal bstres = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
98  integer newsiz, zusedu, zusedw;
99  Treal nrminv, rqcorr;
100  logical tryrqc;
101  integer isupmx;
102 
103 
104 /* -- LAPACK auxiliary routine (version 3.2) -- */
105 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
106 /* November 2006 */
107 
108 /* .. Scalar Arguments .. */
109 /* .. */
110 /* .. Array Arguments .. */
111 /* .. */
112 
113 /* Purpose */
114 /* ======= */
115 
116 /* DLARRV computes the eigenvectors of the tridiagonal matrix */
117 /* T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T. */
118 /* The input eigenvalues should have been computed by DLARRE. */
119 
120 /* Arguments */
121 /* ========= */
122 
123 /* N (input) INTEGER */
124 /* The order of the matrix. N >= 0. */
125 
126 /* VL (input) DOUBLE PRECISION */
127 /* VU (input) DOUBLE PRECISION */
128 /* Lower and upper bounds of the interval that contains the desired */
129 /* eigenvalues. VL < VU. Needed to compute gaps on the left or right */
130 /* end of the extremal eigenvalues in the desired RANGE. */
131 
132 /* D (input/output) DOUBLE PRECISION array, dimension (N) */
133 /* On entry, the N diagonal elements of the diagonal matrix D. */
134 /* On exit, D may be overwritten. */
135 
136 /* L (input/output) DOUBLE PRECISION array, dimension (N) */
137 /* On entry, the (N-1) subdiagonal elements of the unit */
138 /* bidiagonal matrix L are in elements 1 to N-1 of L */
139 /* (if the matrix is not splitted.) At the end of each block */
140 /* is stored the corresponding shift as given by DLARRE. */
141 /* On exit, L is overwritten. */
142 
143 /* PIVMIN (in) DOUBLE PRECISION */
144 /* The minimum pivot allowed in the Sturm sequence. */
145 
146 /* ISPLIT (input) INTEGER array, dimension (N) */
147 /* The splitting points, at which T breaks up into blocks. */
148 /* The first block consists of rows/columns 1 to */
149 /* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */
150 /* through ISPLIT( 2 ), etc. */
151 
152 /* M (input) INTEGER */
153 /* The total number of input eigenvalues. 0 <= M <= N. */
154 
155 /* DOL (input) INTEGER */
156 /* DOU (input) INTEGER */
157 /* If the user wants to compute only selected eigenvectors from all */
158 /* the eigenvalues supplied, he can specify an index range DOL:DOU. */
159 /* Or else the setting DOL=1, DOU=M should be applied. */
160 /* Note that DOL and DOU refer to the order in which the eigenvalues */
161 /* are stored in W. */
162 /* If the user wants to compute only selected eigenpairs, then */
163 /* the columns DOL-1 to DOU+1 of the eigenvector space Z contain the */
164 /* computed eigenvectors. All other columns of Z are set to zero. */
165 
166 /* MINRGP (input) DOUBLE PRECISION */
167 
168 /* RTOL1 (input) DOUBLE PRECISION */
169 /* RTOL2 (input) DOUBLE PRECISION */
170 /* Parameters for bisection. */
171 /* An interval [LEFT,RIGHT] has converged if */
172 /* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
173 
174 /* W (input/output) DOUBLE PRECISION array, dimension (N) */
175 /* The first M elements of W contain the APPROXIMATE eigenvalues for */
176 /* which eigenvectors are to be computed. The eigenvalues */
177 /* should be grouped by split-off block and ordered from */
178 /* smallest to largest within the block ( The output array */
179 /* W from DLARRE is expected here ). Furthermore, they are with */
180 /* respect to the shift of the corresponding root representation */
181 /* for their block. On exit, W holds the eigenvalues of the */
182 /* UNshifted matrix. */
183 
184 /* WERR (input/output) DOUBLE PRECISION array, dimension (N) */
185 /* The first M elements contain the semiwidth of the uncertainty */
186 /* interval of the corresponding eigenvalue in W */
187 
188 /* WGAP (input/output) DOUBLE PRECISION array, dimension (N) */
189 /* The separation from the right neighbor eigenvalue in W. */
190 
191 /* IBLOCK (input) INTEGER array, dimension (N) */
192 /* The indices of the blocks (submatrices) associated with the */
193 /* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */
194 /* W(i) belongs to the first block from the top, =2 if W(i) */
195 /* belongs to the second block, etc. */
196 
197 /* INDEXW (input) INTEGER array, dimension (N) */
198 /* The indices of the eigenvalues within each block (submatrix); */
199 /* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */
200 /* i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. */
201 
202 /* GERS (input) DOUBLE PRECISION array, dimension (2*N) */
203 /* The N Gerschgorin intervals (the i-th Gerschgorin interval */
204 /* is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should */
205 /* be computed from the original UNshifted matrix. */
206 
207 /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */
208 /* If INFO = 0, the first M columns of Z contain the */
209 /* orthonormal eigenvectors of the matrix T */
210 /* corresponding to the input eigenvalues, with the i-th */
211 /* column of Z holding the eigenvector associated with W(i). */
212 /* Note: the user must ensure that at least max(1,M) columns are */
213 /* supplied in the array Z. */
214 
215 /* LDZ (input) INTEGER */
216 /* The leading dimension of the array Z. LDZ >= 1, and if */
217 /* JOBZ = 'V', LDZ >= max(1,N). */
218 
219 /* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) */
220 /* The support of the eigenvectors in Z, i.e., the indices */
221 /* indicating the nonzero elements in Z. The I-th eigenvector */
222 /* is nonzero only in elements ISUPPZ( 2*I-1 ) through */
223 /* ISUPPZ( 2*I ). */
224 
225 /* WORK (workspace) DOUBLE PRECISION array, dimension (12*N) */
226 
227 /* IWORK (workspace) INTEGER array, dimension (7*N) */
228 
229 /* INFO (output) INTEGER */
230 /* = 0: successful exit */
231 
232 /* > 0: A problem occured in DLARRV. */
233 /* < 0: One of the called subroutines signaled an internal problem. */
234 /* Needs inspection of the corresponding parameter IINFO */
235 /* for further information. */
236 
237 /* =-1: Problem in DLARRB when refining a child's eigenvalues. */
238 /* =-2: Problem in DLARRF when computing the RRR of a child. */
239 /* When a child is inside a tight cluster, it can be difficult */
240 /* to find an RRR. A partial remedy from the user's point of */
241 /* view is to make the parameter MINRGP smaller and recompile. */
242 /* However, as the orthogonality of the computed vectors is */
243 /* proportional to 1/MINRGP, the user should be aware that */
244 /* he might be trading in precision when he decreases MINRGP. */
245 /* =-3: Problem in DLARRB when refining a single eigenvalue */
246 /* after the Rayleigh correction was rejected. */
247 /* = 5: The Rayleigh Quotient Iteration failed to converge to */
248 /* full accuracy in MAXITR steps. */
249 
250 /* Further Details */
251 /* =============== */
252 
253 /* Based on contributions by */
254 /* Beresford Parlett, University of California, Berkeley, USA */
255 /* Jim Demmel, University of California, Berkeley, USA */
256 /* Inderjit Dhillon, University of Texas, Austin, USA */
257 /* Osni Marques, LBNL/NERSC, USA */
258 /* Christof Voemel, University of California, Berkeley, USA */
259 
260 /* ===================================================================== */
261 
262 /* .. Parameters .. */
263 /* .. */
264 /* .. Local Scalars .. */
265 /* .. */
266 /* .. External Functions .. */
267 /* .. */
268 /* .. External Subroutines .. */
269 /* .. */
270 /* .. Intrinsic Functions .. */
271 /* .. */
272 /* .. Executable Statements .. */
273 /* .. */
274 /* The first N entries of WORK are reserved for the eigenvalues */
275 
276 /* Table of constant values */
277 
278  Treal c_b5 = 0.;
279  integer c__1 = 1;
280  integer c__2 = 2;
281 
282 
283  /* Parameter adjustments */
284  --d__;
285  --l;
286  --isplit;
287  --w;
288  --werr;
289  --wgap;
290  --iblock;
291  --indexw;
292  --gers;
293  z_dim1 = *ldz;
294  z_offset = 1 + z_dim1;
295  z__ -= z_offset;
296  --isuppz;
297  --work;
298  --iwork;
299 
300  /* Function Body */
301  indld = *n + 1;
302  indlld = (*n << 1) + 1;
303  indwrk = *n * 3 + 1;
304  minwsize = *n * 12;
305  i__1 = minwsize;
306  for (i__ = 1; i__ <= i__1; ++i__) {
307  work[i__] = 0.;
308 /* L5: */
309  }
310 /* IWORK(IINDR+1:IINDR+N) hold the twist indices R for the */
311 /* factorization used to compute the FP vector */
312  iindr = 0;
313 /* IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current */
314 /* layer and the one above. */
315  iindc1 = *n;
316  iindc2 = *n << 1;
317  iindwk = *n * 3 + 1;
318  miniwsize = *n * 7;
319  i__1 = miniwsize;
320  for (i__ = 1; i__ <= i__1; ++i__) {
321  iwork[i__] = 0;
322 /* L10: */
323  }
324  zusedl = 1;
325  if (*dol > 1) {
326 /* Set lower bound for use of Z */
327  zusedl = *dol - 1;
328  }
329  zusedu = *m;
330  if (*dou < *m) {
331 /* Set lower bound for use of Z */
332  zusedu = *dou + 1;
333  }
334 /* The width of the part of Z that is used */
335  zusedw = zusedu - zusedl + 1;
336  template_lapack_laset("Full", n, &zusedw, &c_b5, &c_b5, &z__[zusedl * z_dim1 + 1], ldz);
337  eps = template_lapack_lamch("Precision",(Treal)0);
338  rqtol = eps * 2.;
339 
340 /* Set expert flags for standard code. */
341  tryrqc = TRUE_;
342  if (*dol == 1 && *dou == *m) {
343  } else {
344 /* Only selected eigenpairs are computed. Since the other evalues */
345 /* are not refined by RQ iteration, bisection has to compute to full */
346 /* accuracy. */
347  *rtol1 = eps * 4.;
348  *rtol2 = eps * 4.;
349  }
350 /* The entries WBEGIN:WEND in W, WERR, WGAP correspond to the */
351 /* desired eigenvalues. The support of the nonzero eigenvector */
352 /* entries is contained in the interval IBEGIN:IEND. */
353 /* Remark that if k eigenpairs are desired, then the eigenvectors */
354 /* are stored in k contiguous columns of Z. */
355 /* DONE is the number of eigenvectors already computed */
356  done = 0;
357  ibegin = 1;
358  wbegin = 1;
359  i__1 = iblock[*m];
360  for (jblk = 1; jblk <= i__1; ++jblk) {
361  iend = isplit[jblk];
362  sigma = l[iend];
363 /* Find the eigenvectors of the submatrix indexed IBEGIN */
364 /* through IEND. */
365  wend = wbegin - 1;
366 L15:
367  if (wend < *m) {
368  if (iblock[wend + 1] == jblk) {
369  ++wend;
370  goto L15;
371  }
372  }
373  if (wend < wbegin) {
374  ibegin = iend + 1;
375  goto L170;
376  } else if (wend < *dol || wbegin > *dou) {
377  ibegin = iend + 1;
378  wbegin = wend + 1;
379  goto L170;
380  }
381 /* Find local spectral diameter of the block */
382  gl = gers[(ibegin << 1) - 1];
383  gu = gers[ibegin * 2];
384  i__2 = iend;
385  for (i__ = ibegin + 1; i__ <= i__2; ++i__) {
386 /* Computing MIN */
387  d__1 = gers[(i__ << 1) - 1];
388  gl = minMACRO(d__1,gl);
389 /* Computing MAX */
390  d__1 = gers[i__ * 2];
391  gu = maxMACRO(d__1,gu);
392 /* L20: */
393  }
394  spdiam = gu - gl;
395 /* OLDIEN is the last index of the previous block */
396  oldien = ibegin - 1;
397 /* Calculate the size of the current block */
398  in = iend - ibegin + 1;
399 /* The number of eigenvalues in the current block */
400  im = wend - wbegin + 1;
401 /* This is for a 1x1 block */
402  if (ibegin == iend) {
403  ++done;
404  z__[ibegin + wbegin * z_dim1] = 1.;
405  isuppz[(wbegin << 1) - 1] = ibegin;
406  isuppz[wbegin * 2] = ibegin;
407  w[wbegin] += sigma;
408  work[wbegin] = w[wbegin];
409  ibegin = iend + 1;
410  ++wbegin;
411  goto L170;
412  }
413 /* The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) */
414 /* Note that these can be approximations, in this case, the corresp. */
415 /* entries of WERR give the size of the uncertainty interval. */
416 /* The eigenvalue approximations will be refined when necessary as */
417 /* high relative accuracy is required for the computation of the */
418 /* corresponding eigenvectors. */
419  template_blas_copy(&im, &w[wbegin], &c__1, &work[wbegin], &c__1);
420 /* We store in W the eigenvalue approximations w.r.t. the original */
421 /* matrix T. */
422  i__2 = im;
423  for (i__ = 1; i__ <= i__2; ++i__) {
424  w[wbegin + i__ - 1] += sigma;
425 /* L30: */
426  }
427 /* NDEPTH is the current depth of the representation tree */
428  ndepth = 0;
429 /* PARITY is either 1 or 0 */
430  parity = 1;
431 /* NCLUS is the number of clusters for the next level of the */
432 /* representation tree, we start with NCLUS = 1 for the root */
433  nclus = 1;
434  iwork[iindc1 + 1] = 1;
435  iwork[iindc1 + 2] = im;
436 /* IDONE is the number of eigenvectors already computed in the current */
437 /* block */
438  idone = 0;
439 /* loop while( IDONE.LT.IM ) */
440 /* generate the representation tree for the current block and */
441 /* compute the eigenvectors */
442 L40:
443  if (idone < im) {
444 /* This is a crude protection against infinitely deep trees */
445  if (ndepth > *m) {
446  *info = -2;
447  return 0;
448  }
449 /* breadth first processing of the current level of the representation */
450 /* tree: OLDNCL = number of clusters on current level */
451  oldncl = nclus;
452 /* reset NCLUS to count the number of child clusters */
453  nclus = 0;
454 
455  parity = 1 - parity;
456  if (parity == 0) {
457  oldcls = iindc1;
458  newcls = iindc2;
459  } else {
460  oldcls = iindc2;
461  newcls = iindc1;
462  }
463 /* Process the clusters on the current level */
464  i__2 = oldncl;
465  for (i__ = 1; i__ <= i__2; ++i__) {
466  j = oldcls + (i__ << 1);
467 /* OLDFST, OLDLST = first, last index of current cluster. */
468 /* cluster indices start with 1 and are relative */
469 /* to WBEGIN when accessing W, WGAP, WERR, Z */
470  oldfst = iwork[j - 1];
471  oldlst = iwork[j];
472  if (ndepth > 0) {
473 /* Retrieve relatively robust representation (RRR) of cluster */
474 /* that has been computed at the previous level */
475 /* The RRR is stored in Z and overwritten once the eigenvectors */
476 /* have been computed or when the cluster is refined */
477  if (*dol == 1 && *dou == *m) {
478 /* Get representation from location of the leftmost evalue */
479 /* of the cluster */
480  j = wbegin + oldfst - 1;
481  } else {
482  if (wbegin + oldfst - 1 < *dol) {
483 /* Get representation from the left end of Z array */
484  j = *dol - 1;
485  } else if (wbegin + oldfst - 1 > *dou) {
486 /* Get representation from the right end of Z array */
487  j = *dou;
488  } else {
489  j = wbegin + oldfst - 1;
490  }
491  }
492  template_blas_copy(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin]
493 , &c__1);
494  i__3 = in - 1;
495  template_blas_copy(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[
496  ibegin], &c__1);
497  sigma = z__[iend + (j + 1) * z_dim1];
498 /* Set the corresponding entries in Z to zero */
499  template_lapack_laset("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j
500  * z_dim1], ldz);
501  }
502 /* Compute DL and DLL of current RRR */
503  i__3 = iend - 1;
504  for (j = ibegin; j <= i__3; ++j) {
505  tmp = d__[j] * l[j];
506  work[indld - 1 + j] = tmp;
507  work[indlld - 1 + j] = tmp * l[j];
508 /* L50: */
509  }
510  if (ndepth > 0) {
511 /* P and Q are index of the first and last eigenvalue to compute */
512 /* within the current block */
513  p = indexw[wbegin - 1 + oldfst];
514  q = indexw[wbegin - 1 + oldlst];
515 /* Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET */
516 /* thru' Q-OFFSET elements of these arrays are to be used. */
517 /* OFFSET = P-OLDFST */
518  offset = indexw[wbegin] - 1;
519 /* perform limited bisection (if necessary) to get approximate */
520 /* eigenvalues to the precision needed. */
521  template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p,
522  &q, rtol1, rtol2, &offset, &work[wbegin], &wgap[
523  wbegin], &werr[wbegin], &work[indwrk], &iwork[
524  iindwk], pivmin, &spdiam, &in, &iinfo);
525  if (iinfo != 0) {
526  *info = -1;
527  return 0;
528  }
529 /* We also recompute the extremal gaps. W holds all eigenvalues */
530 /* of the unshifted matrix and must be used for computation */
531 /* of WGAP, the entries of WORK might stem from RRRs with */
532 /* different shifts. The gaps from WBEGIN-1+OLDFST to */
533 /* WBEGIN-1+OLDLST are correctly computed in DLARRB. */
534 /* However, we only allow the gaps to become greater since */
535 /* this is what should happen when we decrease WERR */
536  if (oldfst > 1) {
537 /* Computing MAX */
538  d__1 = wgap[wbegin + oldfst - 2], d__2 = w[wbegin +
539  oldfst - 1] - werr[wbegin + oldfst - 1] - w[
540  wbegin + oldfst - 2] - werr[wbegin + oldfst -
541  2];
542  wgap[wbegin + oldfst - 2] = maxMACRO(d__1,d__2);
543  }
544  if (wbegin + oldlst - 1 < wend) {
545 /* Computing MAX */
546  d__1 = wgap[wbegin + oldlst - 1], d__2 = w[wbegin +
547  oldlst] - werr[wbegin + oldlst] - w[wbegin +
548  oldlst - 1] - werr[wbegin + oldlst - 1];
549  wgap[wbegin + oldlst - 1] = maxMACRO(d__1,d__2);
550  }
551 /* Each time the eigenvalues in WORK get refined, we store */
552 /* the newly found approximation with all shifts applied in W */
553  i__3 = oldlst;
554  for (j = oldfst; j <= i__3; ++j) {
555  w[wbegin + j - 1] = work[wbegin + j - 1] + sigma;
556 /* L53: */
557  }
558  }
559 /* Process the current node. */
560  newfst = oldfst;
561  i__3 = oldlst;
562  for (j = oldfst; j <= i__3; ++j) {
563  if (j == oldlst) {
564 /* we are at the right end of the cluster, this is also the */
565 /* boundary of the child cluster */
566  newlst = j;
567  } else if (wgap[wbegin + j - 1] >= *minrgp * (d__1 = work[
568  wbegin + j - 1], absMACRO(d__1))) {
569 /* the right relative gap is big enough, the child cluster */
570 /* (NEWFST,..,NEWLST) is well separated from the following */
571  newlst = j;
572  } else {
573 /* inside a child cluster, the relative gap is not */
574 /* big enough. */
575  goto L140;
576  }
577 /* Compute size of child cluster found */
578  newsiz = newlst - newfst + 1;
579 /* NEWFTT is the place in Z where the new RRR or the computed */
580 /* eigenvector is to be stored */
581  if (*dol == 1 && *dou == *m) {
582 /* Store representation at location of the leftmost evalue */
583 /* of the cluster */
584  newftt = wbegin + newfst - 1;
585  } else {
586  if (wbegin + newfst - 1 < *dol) {
587 /* Store representation at the left end of Z array */
588  newftt = *dol - 1;
589  } else if (wbegin + newfst - 1 > *dou) {
590 /* Store representation at the right end of Z array */
591  newftt = *dou;
592  } else {
593  newftt = wbegin + newfst - 1;
594  }
595  }
596  if (newsiz > 1) {
597 
598 /* Current child is not a singleton but a cluster. */
599 /* Compute and store new representation of child. */
600 
601 
602 /* Compute left and right cluster gap. */
603 
604 /* LGAP and RGAP are not computed from WORK because */
605 /* the eigenvalue approximations may stem from RRRs */
606 /* different shifts. However, W hold all eigenvalues */
607 /* of the unshifted matrix. Still, the entries in WGAP */
608 /* have to be computed from WORK since the entries */
609 /* in W might be of the same order so that gaps are not */
610 /* exhibited correctly for very close eigenvalues. */
611  if (newfst == 1) {
612 /* Computing MAX */
613  d__1 = 0., d__2 = w[wbegin] - werr[wbegin] - *vl;
614  lgap = maxMACRO(d__1,d__2);
615  } else {
616  lgap = wgap[wbegin + newfst - 2];
617  }
618  rgap = wgap[wbegin + newlst - 1];
619 
620 /* Compute left- and rightmost eigenvalue of child */
621 /* to high precision in order to shift as close */
622 /* as possible and obtain as large relative gaps */
623 /* as possible */
624 
625  for (k = 1; k <= 2; ++k) {
626  if (k == 1) {
627  p = indexw[wbegin - 1 + newfst];
628  } else {
629  p = indexw[wbegin - 1 + newlst];
630  }
631  offset = indexw[wbegin] - 1;
632  template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin
633  - 1], &p, &p, &rqtol, &rqtol, &offset, &
634  work[wbegin], &wgap[wbegin], &werr[wbegin]
635 , &work[indwrk], &iwork[iindwk], pivmin, &
636  spdiam, &in, &iinfo);
637 /* L55: */
638  }
639 
640  if (wbegin + newlst - 1 < *dol || wbegin + newfst - 1
641  > *dou) {
642 /* if the cluster contains no desired eigenvalues */
643 /* skip the computation of that branch of the rep. tree */
644 
645 /* We could skip before the refinement of the extremal */
646 /* eigenvalues of the child, but then the representation */
647 /* tree could be different from the one when nothing is */
648 /* skipped. For this reason we skip at this place. */
649  idone = idone + newlst - newfst + 1;
650  goto L139;
651  }
652 
653 /* Compute RRR of child cluster. */
654 /* Note that the new RRR is stored in Z */
655 
656 /* DLARRF needs LWORK = 2*N */
657  template_lapack_larrf(&in, &d__[ibegin], &l[ibegin], &work[indld +
658  ibegin - 1], &newfst, &newlst, &work[wbegin],
659  &wgap[wbegin], &werr[wbegin], &spdiam, &lgap,
660  &rgap, pivmin, &tau, &z__[ibegin + newftt *
661  z_dim1], &z__[ibegin + (newftt + 1) * z_dim1],
662  &work[indwrk], &iinfo);
663  if (iinfo == 0) {
664 /* a new RRR for the cluster was found by DLARRF */
665 /* update shift and store it */
666  ssigma = sigma + tau;
667  z__[iend + (newftt + 1) * z_dim1] = ssigma;
668 /* WORK() are the midpoints and WERR() the semi-width */
669 /* Note that the entries in W are unchanged. */
670  i__4 = newlst;
671  for (k = newfst; k <= i__4; ++k) {
672  fudge = eps * 3. * (d__1 = work[wbegin + k -
673  1], absMACRO(d__1));
674  work[wbegin + k - 1] -= tau;
675  fudge += eps * 4. * (d__1 = work[wbegin + k -
676  1], absMACRO(d__1));
677 /* Fudge errors */
678  werr[wbegin + k - 1] += fudge;
679 /* Gaps are not fudged. Provided that WERR is small */
680 /* when eigenvalues are close, a zero gap indicates */
681 /* that a new representation is needed for resolving */
682 /* the cluster. A fudge could lead to a wrong decision */
683 /* of judging eigenvalues 'separated' which in */
684 /* reality are not. This could have a negative impact */
685 /* on the orthogonality of the computed eigenvectors. */
686 /* L116: */
687  }
688  ++nclus;
689  k = newcls + (nclus << 1);
690  iwork[k - 1] = newfst;
691  iwork[k] = newlst;
692  } else {
693  *info = -2;
694  return 0;
695  }
696  } else {
697 
698 /* Compute eigenvector of singleton */
699 
700  iter = 0;
701 
702  tol = template_blas_log((Treal) in) * 4. * eps;
703 
704  k = newfst;
705  windex = wbegin + k - 1;
706 /* Computing MAX */
707  i__4 = windex - 1;
708  windmn = maxMACRO(i__4,1);
709 /* Computing MIN */
710  i__4 = windex + 1;
711  windpl = minMACRO(i__4,*m);
712  lambda = work[windex];
713  ++done;
714 /* Check if eigenvector computation is to be skipped */
715  if (windex < *dol || windex > *dou) {
716  eskip = TRUE_;
717  goto L125;
718  } else {
719  eskip = FALSE_;
720  }
721  left = work[windex] - werr[windex];
722  right = work[windex] + werr[windex];
723  indeig = indexw[windex];
724 /* Note that since we compute the eigenpairs for a child, */
725 /* all eigenvalue approximations are w.r.t the same shift. */
726 /* In this case, the entries in WORK should be used for */
727 /* computing the gaps since they exhibit even very small */
728 /* differences in the eigenvalues, as opposed to the */
729 /* entries in W which might "look" the same. */
730  if (k == 1) {
731 /* In the case RANGE='I' and with not much initial */
732 /* accuracy in LAMBDA and VL, the formula */
733 /* LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) */
734 /* can lead to an overestimation of the left gap and */
735 /* thus to inadequately early RQI 'convergence'. */
736 /* Prevent this by forcing a small left gap. */
737 /* Computing MAX */
738  d__1 = absMACRO(left), d__2 = absMACRO(right);
739  lgap = eps * maxMACRO(d__1,d__2);
740  } else {
741  lgap = wgap[windmn];
742  }
743  if (k == im) {
744 /* In the case RANGE='I' and with not much initial */
745 /* accuracy in LAMBDA and VU, the formula */
746 /* can lead to an overestimation of the right gap and */
747 /* thus to inadequately early RQI 'convergence'. */
748 /* Prevent this by forcing a small right gap. */
749 /* Computing MAX */
750  d__1 = absMACRO(left), d__2 = absMACRO(right);
751  rgap = eps * maxMACRO(d__1,d__2);
752  } else {
753  rgap = wgap[windex];
754  }
755  gap = minMACRO(lgap,rgap);
756  if (k == 1 || k == im) {
757 /* The eigenvector support can become wrong */
758 /* because significant entries could be cut off due to a */
759 /* large GAPTOL parameter in LAR1V. Prevent this. */
760  gaptol = 0.;
761  } else {
762  gaptol = gap * eps;
763  }
764  isupmn = in;
765  isupmx = 1;
766 /* Update WGAP so that it holds the minimum gap */
767 /* to the left or the right. This is crucial in the */
768 /* case where bisection is used to ensure that the */
769 /* eigenvalue is refined up to the required precision. */
770 /* The correct value is restored afterwards. */
771  savgap = wgap[windex];
772  wgap[windex] = gap;
773 /* We want to use the Rayleigh Quotient Correction */
774 /* as often as possible since it converges quadratically */
775 /* when we are close enough to the desired eigenvalue. */
776 /* However, the Rayleigh Quotient can have the wrong sign */
777 /* and lead us away from the desired eigenvalue. In this */
778 /* case, the best we can do is to use bisection. */
779  usedbs = FALSE_;
780  usedrq = FALSE_;
781 /* Bisection is initially turned off unless it is forced */
782  needbs = ! tryrqc;
783 L120:
784 /* Check if bisection should be used to refine eigenvalue */
785  if (needbs) {
786 /* Take the bisection as new iterate */
787  usedbs = TRUE_;
788  itmp1 = iwork[iindr + windex];
789  offset = indexw[wbegin] - 1;
790  d__1 = eps * 2.;
791  template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin
792  - 1], &indeig, &indeig, &c_b5, &d__1, &
793  offset, &work[wbegin], &wgap[wbegin], &
794  werr[wbegin], &work[indwrk], &iwork[
795  iindwk], pivmin, &spdiam, &itmp1, &iinfo);
796  if (iinfo != 0) {
797  *info = -3;
798  return 0;
799  }
800  lambda = work[windex];
801 /* Reset twist index from inaccurate LAMBDA to */
802 /* force computation of true MINGMA */
803  iwork[iindr + windex] = 0;
804  }
805 /* Given LAMBDA, compute the eigenvector. */
806  L__1 = ! usedbs;
807  template_lapack_lar1v(&in, &c__1, &in, &lambda, &d__[ibegin], &l[
808  ibegin], &work[indld + ibegin - 1], &work[
809  indlld + ibegin - 1], pivmin, &gaptol, &z__[
810  ibegin + windex * z_dim1], &L__1, &negcnt, &
811  ztz, &mingma, &iwork[iindr + windex], &isuppz[
812  (windex << 1) - 1], &nrminv, &resid, &rqcorr,
813  &work[indwrk]);
814  if (iter == 0) {
815  bstres = resid;
816  bstw = lambda;
817  } else if (resid < bstres) {
818  bstres = resid;
819  bstw = lambda;
820  }
821 /* Computing MIN */
822  i__4 = isupmn, i__5 = isuppz[(windex << 1) - 1];
823  isupmn = minMACRO(i__4,i__5);
824 /* Computing MAX */
825  i__4 = isupmx, i__5 = isuppz[windex * 2];
826  isupmx = maxMACRO(i__4,i__5);
827  ++iter;
828 /* sin alpha <= |resid|/gap */
829 /* Note that both the residual and the gap are */
830 /* proportional to the matrix, so ||T|| doesn't play */
831 /* a role in the quotient */
832 
833 /* Convergence test for Rayleigh-Quotient iteration */
834 /* (omitted when Bisection has been used) */
835 
836  if (resid > tol * gap && absMACRO(rqcorr) > rqtol * absMACRO(
837  lambda) && ! usedbs) {
838 /* We need to check that the RQCORR update doesn't */
839 /* move the eigenvalue away from the desired one and */
840 /* towards a neighbor. -> protection with bisection */
841  if (indeig <= negcnt) {
842 /* The wanted eigenvalue lies to the left */
843  sgndef = -1.;
844  } else {
845 /* The wanted eigenvalue lies to the right */
846  sgndef = 1.;
847  }
848 /* We only use the RQCORR if it improves the */
849 /* the iterate reasonably. */
850  if (rqcorr * sgndef >= 0. && lambda + rqcorr <=
851  right && lambda + rqcorr >= left) {
852  usedrq = TRUE_;
853 /* Store new midpoint of bisection interval in WORK */
854  if (sgndef == 1.) {
855 /* The current LAMBDA is on the left of the true */
856 /* eigenvalue */
857  left = lambda;
858 /* We prefer to assume that the error estimate */
859 /* is correct. We could make the interval not */
860 /* as a bracket but to be modified if the RQCORR */
861 /* chooses to. In this case, the RIGHT side should */
862 /* be modified as follows: */
863 /* RIGHT = MAX(RIGHT, LAMBDA + RQCORR) */
864  } else {
865 /* The current LAMBDA is on the right of the true */
866 /* eigenvalue */
867  right = lambda;
868 /* See comment about assuming the error estimate is */
869 /* correct above. */
870 /* LEFT = MIN(LEFT, LAMBDA + RQCORR) */
871  }
872  work[windex] = (right + left) * .5;
873 /* Take RQCORR since it has the correct sign and */
874 /* improves the iterate reasonably */
875  lambda += rqcorr;
876 /* Update width of error interval */
877  werr[windex] = (right - left) * .5;
878  } else {
879  needbs = TRUE_;
880  }
881  if (right - left < rqtol * absMACRO(lambda)) {
882 /* The eigenvalue is computed to bisection accuracy */
883 /* compute eigenvector and stop */
884  usedbs = TRUE_;
885  goto L120;
886  } else if (iter < 10) {
887  goto L120;
888  } else if (iter == 10) {
889  needbs = TRUE_;
890  goto L120;
891  } else {
892  *info = 5;
893  return 0;
894  }
895  } else {
896  stp2ii = FALSE_;
897  if (usedrq && usedbs && bstres <= resid) {
898  lambda = bstw;
899  stp2ii = TRUE_;
900  }
901  if (stp2ii) {
902 /* improve error angle by second step */
903  L__1 = ! usedbs;
904  template_lapack_lar1v(&in, &c__1, &in, &lambda, &d__[ibegin]
905 , &l[ibegin], &work[indld + ibegin -
906  1], &work[indlld + ibegin - 1],
907  pivmin, &gaptol, &z__[ibegin + windex
908  * z_dim1], &L__1, &negcnt, &ztz, &
909  mingma, &iwork[iindr + windex], &
910  isuppz[(windex << 1) - 1], &nrminv, &
911  resid, &rqcorr, &work[indwrk]);
912  }
913  work[windex] = lambda;
914  }
915 
916 /* Compute FP-vector support w.r.t. whole matrix */
917 
918  isuppz[(windex << 1) - 1] += oldien;
919  isuppz[windex * 2] += oldien;
920  zfrom = isuppz[(windex << 1) - 1];
921  zto = isuppz[windex * 2];
922  isupmn += oldien;
923  isupmx += oldien;
924 /* Ensure vector is ok if support in the RQI has changed */
925  if (isupmn < zfrom) {
926  i__4 = zfrom - 1;
927  for (ii = isupmn; ii <= i__4; ++ii) {
928  z__[ii + windex * z_dim1] = 0.;
929 /* L122: */
930  }
931  }
932  if (isupmx > zto) {
933  i__4 = isupmx;
934  for (ii = zto + 1; ii <= i__4; ++ii) {
935  z__[ii + windex * z_dim1] = 0.;
936 /* L123: */
937  }
938  }
939  i__4 = zto - zfrom + 1;
940  template_blas_scal(&i__4, &nrminv, &z__[zfrom + windex * z_dim1],
941  &c__1);
942 L125:
943 /* Update W */
944  w[windex] = lambda + sigma;
945 /* Recompute the gaps on the left and right */
946 /* But only allow them to become larger and not */
947 /* smaller (which can only happen through "bad" */
948 /* cancellation and doesn't reflect the theory */
949 /* where the initial gaps are underestimated due */
950 /* to WERR being too crude.) */
951  if (! eskip) {
952  if (k > 1) {
953 /* Computing MAX */
954  d__1 = wgap[windmn], d__2 = w[windex] - werr[
955  windex] - w[windmn] - werr[windmn];
956  wgap[windmn] = maxMACRO(d__1,d__2);
957  }
958  if (windex < wend) {
959 /* Computing MAX */
960  d__1 = savgap, d__2 = w[windpl] - werr[windpl]
961  - w[windex] - werr[windex];
962  wgap[windex] = maxMACRO(d__1,d__2);
963  }
964  }
965  ++idone;
966  }
967 /* here ends the code for the current child */
968 
969 L139:
970 /* Proceed to any remaining child nodes */
971  newfst = j + 1;
972 L140:
973  ;
974  }
975 /* L150: */
976  }
977  ++ndepth;
978  goto L40;
979  }
980  ibegin = iend + 1;
981  wbegin = wend + 1;
982 L170:
983  ;
984  }
985 
986  return 0;
987 
988 /* End of DLARRV */
989 
990 } /* dlarrv_ */
991 
992 #endif