ergo
template_lapack_larre.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LARRE_HEADER
36 #define TEMPLATE_LAPACK_LARRE_HEADER
37 
38 
39 template<class Treal>
40 int template_lapack_larre(const char *range, const integer *n, Treal *vl,
41  Treal *vu, integer *il, integer *iu, Treal *d__, Treal
42  *e, Treal *e2, Treal *rtol1, Treal *rtol2, Treal *
43  spltol, integer *nsplit, integer *isplit, integer *m, Treal *w,
44  Treal *werr, Treal *wgap, integer *iblock, integer *indexw,
45  Treal *gers, Treal *pivmin, Treal *work, integer *
46  iwork, integer *info)
47 {
48  /* System generated locals */
49  integer i__1, i__2;
50  Treal d__1, d__2, d__3;
51 
52 
53  /* Local variables */
54  integer i__, j;
55  Treal s1, s2;
56  integer mb = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
57  Treal gl;
58  integer in, mm;
59  Treal gu;
60  integer cnt;
61  Treal eps, tau, tmp, rtl;
62  integer cnt1, cnt2;
63  Treal tmp1, eabs;
64  integer iend, jblk;
65  Treal eold;
66  integer indl;
67  Treal dmax__, emax;
68  integer wend = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
69  integer idum, indu;
70  Treal rtol;
71  integer iseed[4];
72  Treal avgap, sigma;
73  integer iinfo;
74  logical norep;
75  integer ibegin;
76  logical forceb;
77  integer irange = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
78  Treal sgndef;
79  integer wbegin;
80  Treal safmin, spdiam;
81  logical usedqd;
82  Treal clwdth, isleft;
83  Treal isrght, bsrtol, dpivot;
84 
85 
86 /* -- LAPACK auxiliary routine (version 3.2) -- */
87 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
88 /* November 2006 */
89 
90 /* .. Scalar Arguments .. */
91 /* .. */
92 /* .. Array Arguments .. */
93 /* .. */
94 
95 /* Purpose */
96 /* ======= */
97 
98 /* To find the desired eigenvalues of a given real symmetric */
99 /* tridiagonal matrix T, DLARRE sets any "small" off-diagonal */
100 /* elements to zero, and for each unreduced block T_i, it finds */
101 /* (a) a suitable shift at one end of the block's spectrum, */
102 /* (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and */
103 /* (c) eigenvalues of each L_i D_i L_i^T. */
104 /* The representations and eigenvalues found are then used by */
105 /* DSTEMR to compute the eigenvectors of T. */
106 /* The accuracy varies depending on whether bisection is used to */
107 /* find a few eigenvalues or the dqds algorithm (subroutine DLASQ2) to */
108 /* conpute all and then discard any unwanted one. */
109 /* As an added benefit, DLARRE also outputs the n */
110 /* Gerschgorin intervals for the matrices L_i D_i L_i^T. */
111 
112 /* Arguments */
113 /* ========= */
114 
115 /* RANGE (input) CHARACTER */
116 /* = 'A': ("All") all eigenvalues will be found. */
117 /* = 'V': ("Value") all eigenvalues in the half-open interval */
118 /* (VL, VU] will be found. */
119 /* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */
120 /* entire matrix) will be found. */
121 
122 /* N (input) INTEGER */
123 /* The order of the matrix. N > 0. */
124 
125 /* VL (input/output) DOUBLE PRECISION */
126 /* VU (input/output) DOUBLE PRECISION */
127 /* If RANGE='V', the lower and upper bounds for the eigenvalues. */
128 /* Eigenvalues less than or equal to VL, or greater than VU, */
129 /* will not be returned. VL < VU. */
130 /* If RANGE='I' or ='A', DLARRE computes bounds on the desired */
131 /* part of the spectrum. */
132 
133 /* IL (input) INTEGER */
134 /* IU (input) INTEGER */
135 /* If RANGE='I', the indices (in ascending order) of the */
136 /* smallest and largest eigenvalues to be returned. */
137 /* 1 <= IL <= IU <= N. */
138 
139 /* D (input/output) DOUBLE PRECISION array, dimension (N) */
140 /* On entry, the N diagonal elements of the tridiagonal */
141 /* matrix T. */
142 /* On exit, the N diagonal elements of the diagonal */
143 /* matrices D_i. */
144 
145 /* E (input/output) DOUBLE PRECISION array, dimension (N) */
146 /* On entry, the first (N-1) entries contain the subdiagonal */
147 /* elements of the tridiagonal matrix T; E(N) need not be set. */
148 /* On exit, E contains the subdiagonal elements of the unit */
149 /* bidiagonal matrices L_i. The entries E( ISPLIT( I ) ), */
150 /* 1 <= I <= NSPLIT, contain the base points sigma_i on output. */
151 
152 /* E2 (input/output) DOUBLE PRECISION array, dimension (N) */
153 /* On entry, the first (N-1) entries contain the SQUARES of the */
154 /* subdiagonal elements of the tridiagonal matrix T; */
155 /* E2(N) need not be set. */
156 /* On exit, the entries E2( ISPLIT( I ) ), */
157 /* 1 <= I <= NSPLIT, have been set to zero */
158 
159 /* RTOL1 (input) DOUBLE PRECISION */
160 /* RTOL2 (input) DOUBLE PRECISION */
161 /* Parameters for bisection. */
162 /* An interval [LEFT,RIGHT] has converged if */
163 /* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
164 
165 /* SPLTOL (input) DOUBLE PRECISION */
166 /* The threshold for splitting. */
167 
168 /* NSPLIT (output) INTEGER */
169 /* The number of blocks T splits into. 1 <= NSPLIT <= N. */
170 
171 /* ISPLIT (output) INTEGER array, dimension (N) */
172 /* The splitting points, at which T breaks up into blocks. */
173 /* The first block consists of rows/columns 1 to ISPLIT(1), */
174 /* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */
175 /* etc., and the NSPLIT-th consists of rows/columns */
176 /* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */
177 
178 /* M (output) INTEGER */
179 /* The total number of eigenvalues (of all L_i D_i L_i^T) */
180 /* found. */
181 
182 /* W (output) DOUBLE PRECISION array, dimension (N) */
183 /* The first M elements contain the eigenvalues. The */
184 /* eigenvalues of each of the blocks, L_i D_i L_i^T, are */
185 /* sorted in ascending order ( DLARRE may use the */
186 /* remaining N-M elements as workspace). */
187 
188 /* WERR (output) DOUBLE PRECISION array, dimension (N) */
189 /* The error bound on the corresponding eigenvalue in W. */
190 
191 /* WGAP (output) DOUBLE PRECISION array, dimension (N) */
192 /* The separation from the right neighbor eigenvalue in W. */
193 /* The gap is only with respect to the eigenvalues of the same block */
194 /* as each block has its own representation tree. */
195 /* Exception: at the right end of a block we store the left gap */
196 
197 /* IBLOCK (output) INTEGER array, dimension (N) */
198 /* The indices of the blocks (submatrices) associated with the */
199 /* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */
200 /* W(i) belongs to the first block from the top, =2 if W(i) */
201 /* belongs to the second block, etc. */
202 
203 /* INDEXW (output) INTEGER array, dimension (N) */
204 /* The indices of the eigenvalues within each block (submatrix); */
205 /* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */
206 /* i-th eigenvalue W(i) is the 10-th eigenvalue in block 2 */
207 
208 /* GERS (output) DOUBLE PRECISION array, dimension (2*N) */
209 /* The N Gerschgorin intervals (the i-th Gerschgorin interval */
210 /* is (GERS(2*i-1), GERS(2*i)). */
211 
212 /* PIVMIN (output) DOUBLE PRECISION */
213 /* The minimum pivot in the Sturm sequence for T. */
214 
215 /* WORK (workspace) DOUBLE PRECISION array, dimension (6*N) */
216 /* Workspace. */
217 
218 /* IWORK (workspace) INTEGER array, dimension (5*N) */
219 /* Workspace. */
220 
221 /* INFO (output) INTEGER */
222 /* = 0: successful exit */
223 /* > 0: A problem occured in DLARRE. */
224 /* < 0: One of the called subroutines signaled an internal problem. */
225 /* Needs inspection of the corresponding parameter IINFO */
226 /* for further information. */
227 
228 /* =-1: Problem in DLARRD. */
229 /* = 2: No base representation could be found in MAXTRY iterations. */
230 /* Increasing MAXTRY and recompilation might be a remedy. */
231 /* =-3: Problem in DLARRB when computing the refined root */
232 /* representation for DLASQ2. */
233 /* =-4: Problem in DLARRB when preforming bisection on the */
234 /* desired part of the spectrum. */
235 /* =-5: Problem in DLASQ2. */
236 /* =-6: Problem in DLASQ2. */
237 
238 /* Further Details */
239 /* The base representations are required to suffer very little */
240 /* element growth and consequently define all their eigenvalues to */
241 /* high relative accuracy. */
242 /* =============== */
243 
244 /* Based on contributions by */
245 /* Beresford Parlett, University of California, Berkeley, USA */
246 /* Jim Demmel, University of California, Berkeley, USA */
247 /* Inderjit Dhillon, University of Texas, Austin, USA */
248 /* Osni Marques, LBNL/NERSC, USA */
249 /* Christof Voemel, University of California, Berkeley, USA */
250 
251 /* ===================================================================== */
252 
253 /* .. Parameters .. */
254 /* .. */
255 /* .. Local Scalars .. */
256 /* .. */
257 /* .. Local Arrays .. */
258 /* .. */
259 /* .. External Functions .. */
260 /* .. */
261 /* .. External Subroutines .. */
262 /* .. */
263 /* .. Intrinsic Functions .. */
264 /* .. */
265 /* .. Executable Statements .. */
266 
267  /* Parameter adjustments */
268 
269 
270  /* Table of constant values */
271 
272  integer c__1 = 1;
273  integer c__2 = 2;
274 
275 
276  --iwork;
277  --work;
278  --gers;
279  --indexw;
280  --iblock;
281  --wgap;
282  --werr;
283  --w;
284  --isplit;
285  --e2;
286  --e;
287  --d__;
288 
289  /* Initialization added by Elias to get rid of compiler warnings. */
290  mm = 0;
291  /* Function Body */
292  *info = 0;
293 
294 /* Decode RANGE */
295 
296  if (template_blas_lsame(range, "A")) {
297  irange = 1;
298  } else if (template_blas_lsame(range, "V")) {
299  irange = 3;
300  } else if (template_blas_lsame(range, "I")) {
301  irange = 2;
302  }
303  *m = 0;
304 /* Get machine constants */
305  safmin = template_lapack_lamch("S",(Treal)0);
306  eps = template_lapack_lamch("P",(Treal)0);
307 /* Set parameters */
308  rtl = template_blas_sqrt(eps);
309  bsrtol = template_blas_sqrt(eps);
310 /* Treat case of 1x1 matrix for quick return */
311  if (*n == 1) {
312  if (irange == 1 || ( irange == 3 && d__[1] > *vl && d__[1] <= *vu ) ||
313  ( irange == 2 && *il == 1 && *iu == 1 ) ) {
314  *m = 1;
315  w[1] = d__[1];
316 /* The computation error of the eigenvalue is zero */
317  werr[1] = 0.;
318  wgap[1] = 0.;
319  iblock[1] = 1;
320  indexw[1] = 1;
321  gers[1] = d__[1];
322  gers[2] = d__[1];
323  }
324 /* store the shift for the initial RRR, which is zero in this case */
325  e[1] = 0.;
326  return 0;
327  }
328 /* General case: tridiagonal matrix of order > 1 */
329 
330 /* Init WERR, WGAP. Compute Gerschgorin intervals and spectral diameter. */
331 /* Compute maximum off-diagonal entry and pivmin. */
332  gl = d__[1];
333  gu = d__[1];
334  eold = 0.;
335  emax = 0.;
336  e[*n] = 0.;
337  i__1 = *n;
338  for (i__ = 1; i__ <= i__1; ++i__) {
339  werr[i__] = 0.;
340  wgap[i__] = 0.;
341  eabs = (d__1 = e[i__], absMACRO(d__1));
342  if (eabs >= emax) {
343  emax = eabs;
344  }
345  tmp1 = eabs + eold;
346  gers[(i__ << 1) - 1] = d__[i__] - tmp1;
347 /* Computing MIN */
348  d__1 = gl, d__2 = gers[(i__ << 1) - 1];
349  gl = minMACRO(d__1,d__2);
350  gers[i__ * 2] = d__[i__] + tmp1;
351 /* Computing MAX */
352  d__1 = gu, d__2 = gers[i__ * 2];
353  gu = maxMACRO(d__1,d__2);
354  eold = eabs;
355 /* L5: */
356  }
357 /* The minimum pivot allowed in the Sturm sequence for T */
358 /* Computing MAX */
359 /* Computing 2nd power */
360  d__3 = emax;
361  d__1 = 1., d__2 = d__3 * d__3;
362  *pivmin = safmin * maxMACRO(d__1,d__2);
363 /* Compute spectral diameter. The Gerschgorin bounds give an */
364 /* estimate that is wrong by at most a factor of SQRT(2) */
365  spdiam = gu - gl;
366 /* Compute splitting points */
367  template_lapack_larra(n, &d__[1], &e[1], &e2[1], spltol, &spdiam, nsplit, &isplit[1], &
368  iinfo);
369 /* Can force use of bisection instead of faster DQDS. */
370 /* Option left in the code for future multisection work. */
371  forceb = FALSE_;
372 /* Initialize USEDQD, DQDS should be used for ALLRNG unless someone */
373 /* explicitly wants bisection. */
374  usedqd = irange == 1 && ! forceb;
375  if (irange == 1 && ! forceb) {
376 /* Set interval [VL,VU] that contains all eigenvalues */
377  *vl = gl;
378  *vu = gu;
379  } else {
380 /* We call DLARRD to find crude approximations to the eigenvalues */
381 /* in the desired range. In case IRANGE = INDRNG, we also obtain the */
382 /* interval (VL,VU] that contains all the wanted eigenvalues. */
383 /* An interval [LEFT,RIGHT] has converged if */
384 /* RIGHT-LEFT.LT.RTOL*MAX(ABS(LEFT),ABS(RIGHT)) */
385 /* DLARRD needs a WORK of size 4*N, IWORK of size 3*N */
386  template_lapack_larrd(range, "B", n, vl, vu, il, iu, &gers[1], &bsrtol, &d__[1], &e[
387  1], &e2[1], pivmin, nsplit, &isplit[1], &mm, &w[1], &werr[1],
388  vl, vu, &iblock[1], &indexw[1], &work[1], &iwork[1], &iinfo);
389  if (iinfo != 0) {
390  *info = -1;
391  return 0;
392  }
393 /* Make sure that the entries M+1 to N in W, WERR, IBLOCK, INDEXW are 0 */
394  i__1 = *n;
395  for (i__ = mm + 1; i__ <= i__1; ++i__) {
396  w[i__] = 0.;
397  werr[i__] = 0.;
398  iblock[i__] = 0;
399  indexw[i__] = 0;
400 /* L14: */
401  }
402  }
403 /* ** */
404 /* Loop over unreduced blocks */
405  ibegin = 1;
406  wbegin = 1;
407  i__1 = *nsplit;
408  for (jblk = 1; jblk <= i__1; ++jblk) {
409  iend = isplit[jblk];
410  in = iend - ibegin + 1;
411 /* 1 X 1 block */
412  if (in == 1) {
413  if (irange == 1 || ( irange == 3 && d__[ibegin] > *vl && d__[ibegin]
414  <= *vu ) || ( irange == 2 && iblock[wbegin] == jblk ) ) {
415  ++(*m);
416  w[*m] = d__[ibegin];
417  werr[*m] = 0.;
418 /* The gap for a single block doesn't matter for the later */
419 /* algorithm and is assigned an arbitrary large value */
420  wgap[*m] = 0.;
421  iblock[*m] = jblk;
422  indexw[*m] = 1;
423  ++wbegin;
424  }
425 /* E( IEND ) holds the shift for the initial RRR */
426  e[iend] = 0.;
427  ibegin = iend + 1;
428  goto L170;
429  }
430 
431 /* Blocks of size larger than 1x1 */
432 
433 /* E( IEND ) will hold the shift for the initial RRR, for now set it =0 */
434  e[iend] = 0.;
435 
436 /* Find local outer bounds GL,GU for the block */
437  gl = d__[ibegin];
438  gu = d__[ibegin];
439  i__2 = iend;
440  for (i__ = ibegin; i__ <= i__2; ++i__) {
441 /* Computing MIN */
442  d__1 = gers[(i__ << 1) - 1];
443  gl = minMACRO(d__1,gl);
444 /* Computing MAX */
445  d__1 = gers[i__ * 2];
446  gu = maxMACRO(d__1,gu);
447 /* L15: */
448  }
449  spdiam = gu - gl;
450  if (! (irange == 1 && ! forceb)) {
451 /* Count the number of eigenvalues in the current block. */
452  mb = 0;
453  i__2 = mm;
454  for (i__ = wbegin; i__ <= i__2; ++i__) {
455  if (iblock[i__] == jblk) {
456  ++mb;
457  } else {
458  goto L21;
459  }
460 /* L20: */
461  }
462 L21:
463  if (mb == 0) {
464 /* No eigenvalue in the current block lies in the desired range */
465 /* E( IEND ) holds the shift for the initial RRR */
466  e[iend] = 0.;
467  ibegin = iend + 1;
468  goto L170;
469  } else {
470 /* Decide whether dqds or bisection is more efficient */
471  usedqd = (Treal) mb > in * .5 && ! forceb;
472  wend = wbegin + mb - 1;
473 /* Calculate gaps for the current block */
474 /* In later stages, when representations for individual */
475 /* eigenvalues are different, we use SIGMA = E( IEND ). */
476  sigma = 0.;
477  i__2 = wend - 1;
478  for (i__ = wbegin; i__ <= i__2; ++i__) {
479 /* Computing MAX */
480  d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] +
481  werr[i__]);
482  wgap[i__] = maxMACRO(d__1,d__2);
483 /* L30: */
484  }
485 /* Computing MAX */
486  d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]);
487  wgap[wend] = maxMACRO(d__1,d__2);
488 /* Find local index of the first and last desired evalue. */
489  indl = indexw[wbegin];
490  indu = indexw[wend];
491  }
492  }
493  if ( ( irange == 1 && ! forceb ) || usedqd) {
494 /* Case of DQDS */
495 /* Find approximations to the extremal eigenvalues of the block */
496  template_lapack_larrk(&in, &c__1, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, &
497  rtl, &tmp, &tmp1, &iinfo);
498  if (iinfo != 0) {
499  *info = -1;
500  return 0;
501  }
502 /* Computing MAX */
503  d__2 = gl, d__3 = tmp - tmp1 - eps * 100. * (d__1 = tmp - tmp1,
504  absMACRO(d__1));
505  isleft = maxMACRO(d__2,d__3);
506  template_lapack_larrk(&in, &in, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, &
507  rtl, &tmp, &tmp1, &iinfo);
508  if (iinfo != 0) {
509  *info = -1;
510  return 0;
511  }
512 /* Computing MIN */
513  d__2 = gu, d__3 = tmp + tmp1 + eps * 100. * (d__1 = tmp + tmp1,
514  absMACRO(d__1));
515  isrght = minMACRO(d__2,d__3);
516 /* Improve the estimate of the spectral diameter */
517  spdiam = isrght - isleft;
518  } else {
519 /* Case of bisection */
520 /* Find approximations to the wanted extremal eigenvalues */
521 /* Computing MAX */
522  d__2 = gl, d__3 = w[wbegin] - werr[wbegin] - eps * 100. * (d__1 =
523  w[wbegin] - werr[wbegin], absMACRO(d__1));
524  isleft = maxMACRO(d__2,d__3);
525 /* Computing MIN */
526  d__2 = gu, d__3 = w[wend] + werr[wend] + eps * 100. * (d__1 = w[
527  wend] + werr[wend], absMACRO(d__1));
528  isrght = minMACRO(d__2,d__3);
529  }
530 /* Decide whether the base representation for the current block */
531 /* L_JBLK D_JBLK L_JBLK^T = T_JBLK - sigma_JBLK I */
532 /* should be on the left or the right end of the current block. */
533 /* The strategy is to shift to the end which is "more populated" */
534 /* Furthermore, decide whether to use DQDS for the computation of */
535 /* the eigenvalue approximations at the end of DLARRE or bisection. */
536 /* dqds is chosen if all eigenvalues are desired or the number of */
537 /* eigenvalues to be computed is large compared to the blocksize. */
538  if (irange == 1 && ! forceb) {
539 /* If all the eigenvalues have to be computed, we use dqd */
540  usedqd = TRUE_;
541 /* INDL is the local index of the first eigenvalue to compute */
542  indl = 1;
543  indu = in;
544 /* MB = number of eigenvalues to compute */
545  mb = in;
546  wend = wbegin + mb - 1;
547 /* Define 1/4 and 3/4 points of the spectrum */
548  s1 = isleft + spdiam * .25;
549  s2 = isrght - spdiam * .25;
550  } else {
551 /* DLARRD has computed IBLOCK and INDEXW for each eigenvalue */
552 /* approximation. */
553 /* choose sigma */
554  if (usedqd) {
555  s1 = isleft + spdiam * .25;
556  s2 = isrght - spdiam * .25;
557  } else {
558  tmp = minMACRO(isrght,*vu) - maxMACRO(isleft,*vl);
559  s1 = maxMACRO(isleft,*vl) + tmp * .25;
560  s2 = minMACRO(isrght,*vu) - tmp * .25;
561  }
562  }
563 /* Compute the negcount at the 1/4 and 3/4 points */
564  if (mb > 1) {
565  template_lapack_larrc("T", &in, &s1, &s2, &d__[ibegin], &e[ibegin], pivmin, &
566  cnt, &cnt1, &cnt2, &iinfo);
567  }
568  if (mb == 1) {
569  sigma = gl;
570  sgndef = 1.;
571  } else if (cnt1 - indl >= indu - cnt2) {
572  if (irange == 1 && ! forceb) {
573  sigma = maxMACRO(isleft,gl);
574  } else if (usedqd) {
575 /* use Gerschgorin bound as shift to get pos def matrix */
576 /* for dqds */
577  sigma = isleft;
578  } else {
579 /* use approximation of the first desired eigenvalue of the */
580 /* block as shift */
581  sigma = maxMACRO(isleft,*vl);
582  }
583  sgndef = 1.;
584  } else {
585  if (irange == 1 && ! forceb) {
586  sigma = minMACRO(isrght,gu);
587  } else if (usedqd) {
588 /* use Gerschgorin bound as shift to get neg def matrix */
589 /* for dqds */
590  sigma = isrght;
591  } else {
592 /* use approximation of the first desired eigenvalue of the */
593 /* block as shift */
594  sigma = minMACRO(isrght,*vu);
595  }
596  sgndef = -1.;
597  }
598 /* An initial SIGMA has been chosen that will be used for computing */
599 /* T - SIGMA I = L D L^T */
600 /* Define the increment TAU of the shift in case the initial shift */
601 /* needs to be refined to obtain a factorization with not too much */
602 /* element growth. */
603  if (usedqd) {
604 /* The initial SIGMA was to the outer end of the spectrum */
605 /* the matrix is definite and we need not retreat. */
606  tau = spdiam * eps * *n + *pivmin * 2.;
607  } else {
608  if (mb > 1) {
609  clwdth = w[wend] + werr[wend] - w[wbegin] - werr[wbegin];
610  avgap = (d__1 = clwdth / (Treal) (wend - wbegin), absMACRO(
611  d__1));
612  if (sgndef == 1.) {
613 /* Computing MAX */
614  d__1 = wgap[wbegin];
615  tau = maxMACRO(d__1,avgap) * .5;
616 /* Computing MAX */
617  d__1 = tau, d__2 = werr[wbegin];
618  tau = maxMACRO(d__1,d__2);
619  } else {
620 /* Computing MAX */
621  d__1 = wgap[wend - 1];
622  tau = maxMACRO(d__1,avgap) * .5;
623 /* Computing MAX */
624  d__1 = tau, d__2 = werr[wend];
625  tau = maxMACRO(d__1,d__2);
626  }
627  } else {
628  tau = werr[wbegin];
629  }
630  }
631 
632  for (idum = 1; idum <= 6; ++idum) {
633 /* Compute L D L^T factorization of tridiagonal matrix T - sigma I. */
634 /* Store D in WORK(1:IN), L in WORK(IN+1:2*IN), and reciprocals of */
635 /* pivots in WORK(2*IN+1:3*IN) */
636  dpivot = d__[ibegin] - sigma;
637  work[1] = dpivot;
638  dmax__ = absMACRO(work[1]);
639  j = ibegin;
640  i__2 = in - 1;
641  for (i__ = 1; i__ <= i__2; ++i__) {
642  work[(in << 1) + i__] = 1. / work[i__];
643  tmp = e[j] * work[(in << 1) + i__];
644  work[in + i__] = tmp;
645  dpivot = d__[j + 1] - sigma - tmp * e[j];
646  work[i__ + 1] = dpivot;
647 /* Computing MAX */
648  d__1 = dmax__, d__2 = absMACRO(dpivot);
649  dmax__ = maxMACRO(d__1,d__2);
650  ++j;
651 /* L70: */
652  }
653 /* check for element growth */
654  if (dmax__ > spdiam * 64.) {
655  norep = TRUE_;
656  } else {
657  norep = FALSE_;
658  }
659  if (usedqd && ! norep) {
660 /* Ensure the definiteness of the representation */
661 /* All entries of D (of L D L^T) must have the same sign */
662  i__2 = in;
663  for (i__ = 1; i__ <= i__2; ++i__) {
664  tmp = sgndef * work[i__];
665  if (tmp < 0.) {
666  norep = TRUE_;
667  }
668 /* L71: */
669  }
670  }
671  if (norep) {
672 /* Note that in the case of IRANGE=ALLRNG, we use the Gerschgorin */
673 /* shift which makes the matrix definite. So we should end up */
674 /* here really only in the case of IRANGE = VALRNG or INDRNG. */
675  if (idum == 5) {
676  if (sgndef == 1.) {
677 /* The fudged Gerschgorin shift should succeed */
678  sigma = gl - spdiam * 2. * eps * *n - *pivmin * 4.;
679  } else {
680  sigma = gu + spdiam * 2. * eps * *n + *pivmin * 4.;
681  }
682  } else {
683  sigma -= sgndef * tau;
684  tau *= 2.;
685  }
686  } else {
687 /* an initial RRR is found */
688  goto L83;
689  }
690 /* L80: */
691  }
692 /* if the program reaches this point, no base representation could be */
693 /* found in MAXTRY iterations. */
694  *info = 2;
695  return 0;
696 L83:
697 /* At this point, we have found an initial base representation */
698 /* T - SIGMA I = L D L^T with not too much element growth. */
699 /* Store the shift. */
700  e[iend] = sigma;
701 /* Store D and L. */
702  template_blas_copy(&in, &work[1], &c__1, &d__[ibegin], &c__1);
703  i__2 = in - 1;
704  template_blas_copy(&i__2, &work[in + 1], &c__1, &e[ibegin], &c__1);
705  if (mb > 1) {
706 
707 /* Perturb each entry of the base representation by a small */
708 /* (but random) relative amount to overcome difficulties with */
709 /* glued matrices. */
710 
711  for (i__ = 1; i__ <= 4; ++i__) {
712  iseed[i__ - 1] = 1;
713 /* L122: */
714  }
715  i__2 = (in << 1) - 1;
716  template_lapack_larnv(&c__2, iseed, &i__2, &work[1]);
717  i__2 = in - 1;
718  for (i__ = 1; i__ <= i__2; ++i__) {
719  d__[ibegin + i__ - 1] *= eps * 8. * work[i__] + 1.;
720  e[ibegin + i__ - 1] *= eps * 8. * work[in + i__] + 1.;
721 /* L125: */
722  }
723  d__[iend] *= eps * 4. * work[in] + 1.;
724 
725  }
726 
727 /* Don't update the Gerschgorin intervals because keeping track */
728 /* of the updates would be too much work in DLARRV. */
729 /* We update W instead and use it to locate the proper Gerschgorin */
730 /* intervals. */
731 /* Compute the required eigenvalues of L D L' by bisection or dqds */
732  if (! usedqd) {
733 /* If DLARRD has been used, shift the eigenvalue approximations */
734 /* according to their representation. This is necessary for */
735 /* a uniform DLARRV since dqds computes eigenvalues of the */
736 /* shifted representation. In DLARRV, W will always hold the */
737 /* UNshifted eigenvalue approximation. */
738  i__2 = wend;
739  for (j = wbegin; j <= i__2; ++j) {
740  w[j] -= sigma;
741  werr[j] += (d__1 = w[j], absMACRO(d__1)) * eps;
742 /* L134: */
743  }
744 /* call DLARRB to reduce eigenvalue error of the approximations */
745 /* from DLARRD */
746  i__2 = iend - 1;
747  for (i__ = ibegin; i__ <= i__2; ++i__) {
748 /* Computing 2nd power */
749  d__1 = e[i__];
750  work[i__] = d__[i__] * (d__1 * d__1);
751 /* L135: */
752  }
753 /* use bisection to find EV from INDL to INDU */
754  i__2 = indl - 1;
755  template_lapack_larrb(&in, &d__[ibegin], &work[ibegin], &indl, &indu, rtol1,
756  rtol2, &i__2, &w[wbegin], &wgap[wbegin], &werr[wbegin], &
757  work[(*n << 1) + 1], &iwork[1], pivmin, &spdiam, &in, &
758  iinfo);
759  if (iinfo != 0) {
760  *info = -4;
761  return 0;
762  }
763 /* DLARRB computes all gaps correctly except for the last one */
764 /* Record distance to VU/GU */
765 /* Computing MAX */
766  d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]);
767  wgap[wend] = maxMACRO(d__1,d__2);
768  i__2 = indu;
769  for (i__ = indl; i__ <= i__2; ++i__) {
770  ++(*m);
771  iblock[*m] = jblk;
772  indexw[*m] = i__;
773 /* L138: */
774  }
775  } else {
776 /* Call dqds to get all eigs (and then possibly delete unwanted */
777 /* eigenvalues). */
778 /* Note that dqds finds the eigenvalues of the L D L^T representation */
779 /* of T to high relative accuracy. High relative accuracy */
780 /* might be lost when the shift of the RRR is subtracted to obtain */
781 /* the eigenvalues of T. However, T is not guaranteed to define its */
782 /* eigenvalues to high relative accuracy anyway. */
783 /* Set RTOL to the order of the tolerance used in DLASQ2 */
784 /* This is an ESTIMATED error, the worst case bound is 4*N*EPS */
785 /* which is usually too large and requires unnecessary work to be */
786 /* done by bisection when computing the eigenvectors */
787  rtol = template_blas_log((Treal) in) * 4. * eps;
788  j = ibegin;
789  i__2 = in - 1;
790  for (i__ = 1; i__ <= i__2; ++i__) {
791  work[(i__ << 1) - 1] = (d__1 = d__[j], absMACRO(d__1));
792  work[i__ * 2] = e[j] * e[j] * work[(i__ << 1) - 1];
793  ++j;
794 /* L140: */
795  }
796  work[(in << 1) - 1] = (d__1 = d__[iend], absMACRO(d__1));
797  work[in * 2] = 0.;
798  template_lapack_lasq2(&in, &work[1], &iinfo);
799  if (iinfo != 0) {
800 /* If IINFO = -5 then an index is part of a tight cluster */
801 /* and should be changed. The index is in IWORK(1) and the */
802 /* gap is in WORK(N+1) */
803  *info = -5;
804  return 0;
805  } else {
806 /* Test that all eigenvalues are positive as expected */
807  i__2 = in;
808  for (i__ = 1; i__ <= i__2; ++i__) {
809  if (work[i__] < 0.) {
810  *info = -6;
811  return 0;
812  }
813 /* L149: */
814  }
815  }
816  if (sgndef > 0.) {
817  i__2 = indu;
818  for (i__ = indl; i__ <= i__2; ++i__) {
819  ++(*m);
820  w[*m] = work[in - i__ + 1];
821  iblock[*m] = jblk;
822  indexw[*m] = i__;
823 /* L150: */
824  }
825  } else {
826  i__2 = indu;
827  for (i__ = indl; i__ <= i__2; ++i__) {
828  ++(*m);
829  w[*m] = -work[i__];
830  iblock[*m] = jblk;
831  indexw[*m] = i__;
832 /* L160: */
833  }
834  }
835  i__2 = *m;
836  for (i__ = *m - mb + 1; i__ <= i__2; ++i__) {
837 /* the value of RTOL below should be the tolerance in DLASQ2 */
838  werr[i__] = rtol * (d__1 = w[i__], absMACRO(d__1));
839 /* L165: */
840  }
841  i__2 = *m - 1;
842  for (i__ = *m - mb + 1; i__ <= i__2; ++i__) {
843 /* compute the right gap between the intervals */
844 /* Computing MAX */
845  d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + werr[
846  i__]);
847  wgap[i__] = maxMACRO(d__1,d__2);
848 /* L166: */
849  }
850 /* Computing MAX */
851  d__1 = 0., d__2 = *vu - sigma - (w[*m] + werr[*m]);
852  wgap[*m] = maxMACRO(d__1,d__2);
853  }
854 /* proceed with next block */
855  ibegin = iend + 1;
856  wbegin = wend + 1;
857 L170:
858  ;
859  }
860 
861  return 0;
862 
863 /* end of DLARRE */
864 
865 } /* dlarre_ */
866 
867 #endif