ergo
template_lapack_lasq3.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LASQ3_HEADER
36 #define TEMPLATE_LAPACK_LASQ3_HEADER
37 
38 template<class Treal>
39 int template_lapack_lasq3(integer *i0, integer *n0, Treal *z__,
40  integer *pp, Treal *dmin__, Treal *sigma, Treal *desig,
41  Treal *qmax, integer *nfail, integer *iter, integer *ndiv,
42  logical *ieee, integer *ttype, Treal *dmin1, Treal *dmin2,
43  Treal *dn, Treal *dn1, Treal *dn2, Treal *g,
44  Treal *tau)
45 {
46  /* System generated locals */
47  integer i__1;
48  Treal d__1, d__2;
49 
50 
51  /* Local variables */
52  Treal s, t;
53  integer j4, nn;
54  Treal eps, tol;
55  integer n0in, ipn4;
56  Treal tol2, temp;
57 
58 
59 /* -- LAPACK routine (version 3.2) -- */
60 
61 /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */
62 /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */
63 /* -- Berkeley -- */
64 /* -- November 2008 -- */
65 
66 /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
67 /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
68 
69 /* .. Scalar Arguments .. */
70 /* .. */
71 /* .. Array Arguments .. */
72 /* .. */
73 
74 /* Purpose */
75 /* ======= */
76 
77 /* DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. */
78 /* In case of failure it changes shifts, and tries again until output */
79 /* is positive. */
80 
81 /* Arguments */
82 /* ========= */
83 
84 /* I0 (input) INTEGER */
85 /* First index. */
86 
87 /* N0 (input) INTEGER */
88 /* Last index. */
89 
90 /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */
91 /* Z holds the qd array. */
92 
93 /* PP (input/output) INTEGER */
94 /* PP=0 for ping, PP=1 for pong. */
95 /* PP=2 indicates that flipping was applied to the Z array */
96 /* and that the initial tests for deflation should not be */
97 /* performed. */
98 
99 /* DMIN (output) DOUBLE PRECISION */
100 /* Minimum value of d. */
101 
102 /* SIGMA (output) DOUBLE PRECISION */
103 /* Sum of shifts used in current segment. */
104 
105 /* DESIG (input/output) DOUBLE PRECISION */
106 /* Lower order part of SIGMA */
107 
108 /* QMAX (input) DOUBLE PRECISION */
109 /* Maximum value of q. */
110 
111 /* NFAIL (output) INTEGER */
112 /* Number of times shift was too big. */
113 
114 /* ITER (output) INTEGER */
115 /* Number of iterations. */
116 
117 /* NDIV (output) INTEGER */
118 /* Number of divisions. */
119 
120 /* IEEE (input) LOGICAL */
121 /* Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). */
122 
123 /* TTYPE (input/output) INTEGER */
124 /* Shift type. */
125 
126 /* DMIN1, DMIN2, DN, DN1, DN2, G, TAU (input/output) DOUBLE PRECISION */
127 /* These are passed as arguments in order to save their values */
128 /* between calls to DLASQ3. */
129 
130 /* ===================================================================== */
131 
132 /* .. Parameters .. */
133 /* .. */
134 /* .. Local Scalars .. */
135 /* .. */
136 /* .. External Subroutines .. */
137 /* .. */
138 /* .. External Function .. */
139 /* .. */
140 /* .. Intrinsic Functions .. */
141 /* .. */
142 /* .. Executable Statements .. */
143 
144  /* Parameter adjustments */
145  --z__;
146 
147  /* Function Body */
148  n0in = *n0;
149  eps = template_lapack_lamch("Precision", (Treal)0);
150  tol = eps * 100.;
151 /* Computing 2nd power */
152  d__1 = tol;
153  tol2 = d__1 * d__1;
154 
155 /* Check for deflation. */
156 
157 L10:
158 
159  if (*n0 < *i0) {
160  return 0;
161  }
162  if (*n0 == *i0) {
163  goto L20;
164  }
165  nn = (*n0 << 2) + *pp;
166  if (*n0 == *i0 + 1) {
167  goto L40;
168  }
169 
170 /* Check whether E(N0-1) is negligible, 1 eigenvalue. */
171 
172  if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) -
173  4] > tol2 * z__[nn - 7]) {
174  goto L30;
175  }
176 
177 L20:
178 
179  z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma;
180  --(*n0);
181  goto L10;
182 
183 /* Check whether E(N0-2) is negligible, 2 eigenvalues. */
184 
185 L30:
186 
187  if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[
188  nn - 11]) {
189  goto L50;
190  }
191 
192 L40:
193 
194  if (z__[nn - 3] > z__[nn - 7]) {
195  s = z__[nn - 3];
196  z__[nn - 3] = z__[nn - 7];
197  z__[nn - 7] = s;
198  }
199  if (z__[nn - 5] > z__[nn - 3] * tol2) {
200  t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5;
201  s = z__[nn - 3] * (z__[nn - 5] / t);
202  if (s <= t) {
203  s = z__[nn - 3] * (z__[nn - 5] / (t * (template_blas_sqrt(s / t + 1.) + 1.)));
204  } else {
205  s = z__[nn - 3] * (z__[nn - 5] / (t + template_blas_sqrt(t) * template_blas_sqrt(t + s)));
206  }
207  t = z__[nn - 7] + (s + z__[nn - 5]);
208  z__[nn - 3] *= z__[nn - 7] / t;
209  z__[nn - 7] = t;
210  }
211  z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma;
212  z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma;
213  *n0 += -2;
214  goto L10;
215 
216 L50:
217  if (*pp == 2) {
218  *pp = 0;
219  }
220 
221 /* Reverse the qd-array, if warranted. */
222 
223  if (*dmin__ <= 0. || *n0 < n0in) {
224  if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) {
225  ipn4 = ( *i0 + *n0 ) << 2;
226  i__1 = ( *i0 + *n0 - 1 ) << 1;
227  for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
228  temp = z__[j4 - 3];
229  z__[j4 - 3] = z__[ipn4 - j4 - 3];
230  z__[ipn4 - j4 - 3] = temp;
231  temp = z__[j4 - 2];
232  z__[j4 - 2] = z__[ipn4 - j4 - 2];
233  z__[ipn4 - j4 - 2] = temp;
234  temp = z__[j4 - 1];
235  z__[j4 - 1] = z__[ipn4 - j4 - 5];
236  z__[ipn4 - j4 - 5] = temp;
237  temp = z__[j4];
238  z__[j4] = z__[ipn4 - j4 - 4];
239  z__[ipn4 - j4 - 4] = temp;
240 /* L60: */
241  }
242  if (*n0 - *i0 <= 4) {
243  z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1];
244  z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp];
245  }
246 /* Computing MIN */
247  d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1];
248  *dmin2 = minMACRO(d__1,d__2);
249 /* Computing MIN */
250  d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1]
251  , d__1 = minMACRO(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3];
252  z__[(*n0 << 2) + *pp - 1] = minMACRO(d__1,d__2);
253 /* Computing MIN */
254  d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 =
255  minMACRO(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4];
256  z__[(*n0 << 2) - *pp] = minMACRO(d__1,d__2);
257 /* Computing MAX */
258  d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = maxMACRO(d__1,
259  d__2), d__2 = z__[(*i0 << 2) + *pp + 1];
260  *qmax = maxMACRO(d__1,d__2);
261  *dmin__ = -0.;
262  }
263  }
264 
265 /* Choose a shift. */
266 
267  template_lapack_lasq4(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2,
268  tau, ttype, g);
269 
270 /* Call dqds until DMIN > 0. */
271 
272 L70:
273 
274  template_lapack_lasq5(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2,
275  ieee);
276 
277  *ndiv += *n0 - *i0 + 2;
278  ++(*iter);
279 
280 /* Check status. */
281 
282  if (*dmin__ >= 0. && *dmin1 > 0.) {
283 
284 /* Success. */
285 
286  goto L90;
287 
288  } else if (*dmin__ < 0. && *dmin1 > 0. && z__[( ( *n0 - 1 ) << 2) - *pp] < tol
289  * (*sigma + *dn1) && absMACRO(*dn) < tol * *sigma) {
290 
291 /* Convergence hidden by negative DN. */
292 
293  z__[( ( *n0 - 1 ) << 2) - *pp + 2] = 0.;
294  *dmin__ = 0.;
295  goto L90;
296  } else if (*dmin__ < 0.) {
297 
298 /* TAU too big. Select new TAU and try again. */
299 
300  ++(*nfail);
301  if (*ttype < -22) {
302 
303 /* Failed twice. Play it safe. */
304 
305  *tau = 0.;
306  } else if (*dmin1 > 0.) {
307 
308 /* Late failure. Gives excellent shift. */
309 
310  *tau = (*tau + *dmin__) * (1. - eps * 2.);
311  *ttype += -11;
312  } else {
313 
314 /* Early failure. Divide by 4. */
315 
316  *tau *= .25;
317  *ttype += -12;
318  }
319  goto L70;
320  } else if (template_lapack_isnan(dmin__)) {
321 
322 /* NaN. */
323 
324  if (*tau == 0.) {
325  goto L80;
326  } else {
327  *tau = 0.;
328  goto L70;
329  }
330  } else {
331 
332 /* Possible underflow. Play it safe. */
333 
334  goto L80;
335  }
336 
337 /* Risk of underflow. */
338 
339 L80:
340  template_lapack_lasq6(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2);
341  *ndiv += *n0 - *i0 + 2;
342  ++(*iter);
343  *tau = 0.;
344 
345 L90:
346  if (*tau < *sigma) {
347  *desig += *tau;
348  t = *sigma + *desig;
349  *desig -= t - *sigma;
350  } else {
351  t = *sigma + *tau;
352  *desig = *sigma - (t - *tau) + *desig;
353  }
354  *sigma = t;
355 
356  return 0;
357 
358 /* End of DLASQ3 */
359 
360 } /* dlasq3_ */
361 
362 #endif