ergo
template_lapack_larrf.h
Go to the documentation of this file.
1 /* Ergo, version 3.3, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2013 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LARRF_HEADER
36 #define TEMPLATE_LAPACK_LARRF_HEADER
37 
38 
39 template<class Treal>
40 int template_lapack_larrf(integer *n, Treal *d__, Treal *l,
41  Treal *ld, integer *clstrt, integer *clend, Treal *w,
42  Treal *wgap, Treal *werr, Treal *spdiam, Treal *
43  clgapl, Treal *clgapr, Treal *pivmin, Treal *sigma,
44  Treal *dplus, Treal *lplus, Treal *work, integer *info)
45 {
46  /* System generated locals */
47  integer i__1;
48  Treal d__1, d__2, d__3;
49 
50  /* Local variables */
51  integer i__;
52  Treal s, bestshift, smlgrowth, eps, tmp, max1, max2, rrr1, rrr2,
53  znm2, growthbound, fail, fact, oldp;
54  integer indx;
55  Treal prod;
56  integer ktry;
57  Treal fail2, avgap, ldmax, rdmax;
58  integer shift;
59  logical dorrr1;
60  Treal ldelta;
61  logical nofail;
62  Treal mingap, lsigma, rdelta;
63  logical forcer;
64  Treal rsigma, clwdth;
65  logical sawnan1, sawnan2, tryrrr1;
66 
67 
68 /* -- LAPACK auxiliary routine (version 3.2) -- */
69 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
70 /* November 2006 */
71 /* * */
72 /* .. Scalar Arguments .. */
73 /* .. */
74 /* .. Array Arguments .. */
75 /* .. */
76 
77 /* Purpose */
78 /* ======= */
79 
80 /* Given the initial representation L D L^T and its cluster of close */
81 /* eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... */
82 /* W( CLEND ), DLARRF finds a new relatively robust representation */
83 /* L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the */
84 /* eigenvalues of L(+) D(+) L(+)^T is relatively isolated. */
85 
86 /* Arguments */
87 /* ========= */
88 
89 /* N (input) INTEGER */
90 /* The order of the matrix (subblock, if the matrix splitted). */
91 
92 /* D (input) DOUBLE PRECISION array, dimension (N) */
93 /* The N diagonal elements of the diagonal matrix D. */
94 
95 /* L (input) DOUBLE PRECISION array, dimension (N-1) */
96 /* The (N-1) subdiagonal elements of the unit bidiagonal */
97 /* matrix L. */
98 
99 /* LD (input) DOUBLE PRECISION array, dimension (N-1) */
100 /* The (N-1) elements L(i)*D(i). */
101 
102 /* CLSTRT (input) INTEGER */
103 /* The index of the first eigenvalue in the cluster. */
104 
105 /* CLEND (input) INTEGER */
106 /* The index of the last eigenvalue in the cluster. */
107 
108 /* W (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */
109 /* The eigenvalue APPROXIMATIONS of L D L^T in ascending order. */
110 /* W( CLSTRT ) through W( CLEND ) form the cluster of relatively */
111 /* close eigenalues. */
112 
113 /* WGAP (input/output) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */
114 /* The separation from the right neighbor eigenvalue in W. */
115 
116 /* WERR (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */
117 /* WERR contain the semiwidth of the uncertainty */
118 /* interval of the corresponding eigenvalue APPROXIMATION in W */
119 
120 /* SPDIAM (input) estimate of the spectral diameter obtained from the */
121 /* Gerschgorin intervals */
122 
123 /* CLGAPL, CLGAPR (input) absolute gap on each end of the cluster. */
124 /* Set by the calling routine to protect against shifts too close */
125 /* to eigenvalues outside the cluster. */
126 
127 /* PIVMIN (input) DOUBLE PRECISION */
128 /* The minimum pivot allowed in the Sturm sequence. */
129 
130 /* SIGMA (output) DOUBLE PRECISION */
131 /* The shift used to form L(+) D(+) L(+)^T. */
132 
133 /* DPLUS (output) DOUBLE PRECISION array, dimension (N) */
134 /* The N diagonal elements of the diagonal matrix D(+). */
135 
136 /* LPLUS (output) DOUBLE PRECISION array, dimension (N-1) */
137 /* The first (N-1) elements of LPLUS contain the subdiagonal */
138 /* elements of the unit bidiagonal matrix L(+). */
139 
140 /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */
141 /* Workspace. */
142 
143 /* Further Details */
144 /* =============== */
145 
146 /* Based on contributions by */
147 /* Beresford Parlett, University of California, Berkeley, USA */
148 /* Jim Demmel, University of California, Berkeley, USA */
149 /* Inderjit Dhillon, University of Texas, Austin, USA */
150 /* Osni Marques, LBNL/NERSC, USA */
151 /* Christof Voemel, University of California, Berkeley, USA */
152 
153 /* ===================================================================== */
154 
155 /* .. Parameters .. */
156 /* .. */
157 /* .. Local Scalars .. */
158 /* .. */
159 /* .. External Functions .. */
160 /* .. */
161 /* .. External Subroutines .. */
162 /* .. */
163 /* .. Intrinsic Functions .. */
164 /* .. */
165 /* .. Executable Statements .. */
166 
167 /* Table of constant values */
168 
169  integer c__1 = 1;
170 
171 
172 
173  /* Parameter adjustments */
174  --work;
175  --lplus;
176  --dplus;
177  --werr;
178  --wgap;
179  --w;
180  --ld;
181  --l;
182  --d__;
183 
184  /* Initialization added by Elias to get rid of compiler warnings. */
185  indx = 0;
186  /* Function Body */
187  *info = 0;
188  fact = 2.;
189  eps = template_lapack_lamch("Precision", (Treal)0);
190  shift = 0;
191  forcer = FALSE_;
192 /* Note that we cannot guarantee that for any of the shifts tried, */
193 /* the factorization has a small or even moderate element growth. */
194 /* There could be Ritz values at both ends of the cluster and despite */
195 /* backing off, there are examples where all factorizations tried */
196 /* (in IEEE mode, allowing zero pivots & infinities) have INFINITE */
197 /* element growth. */
198 /* For this reason, we should use PIVMIN in this subroutine so that at */
199 /* least the L D L^T factorization exists. It can be checked afterwards */
200 /* whether the element growth caused bad residuals/orthogonality. */
201 /* Decide whether the code should accept the best among all */
202 /* representations despite large element growth or signal INFO=1 */
203  nofail = TRUE_;
204 
205 /* Compute the average gap length of the cluster */
206  clwdth = (d__1 = w[*clend] - w[*clstrt], absMACRO(d__1)) + werr[*clend] + werr[
207  *clstrt];
208  avgap = clwdth / (Treal) (*clend - *clstrt);
209  mingap = minMACRO(*clgapl,*clgapr);
210 /* Initial values for shifts to both ends of cluster */
211 /* Computing MIN */
212  d__1 = w[*clstrt], d__2 = w[*clend];
213  lsigma = minMACRO(d__1,d__2) - werr[*clstrt];
214 /* Computing MAX */
215  d__1 = w[*clstrt], d__2 = w[*clend];
216  rsigma = maxMACRO(d__1,d__2) + werr[*clend];
217 /* Use a small fudge to make sure that we really shift to the outside */
218  lsigma -= absMACRO(lsigma) * 4. * eps;
219  rsigma += absMACRO(rsigma) * 4. * eps;
220 /* Compute upper bounds for how much to back off the initial shifts */
221  ldmax = mingap * .25 + *pivmin * 2.;
222  rdmax = mingap * .25 + *pivmin * 2.;
223 /* Computing MAX */
224  d__1 = avgap, d__2 = wgap[*clstrt];
225  ldelta = maxMACRO(d__1,d__2) / fact;
226 /* Computing MAX */
227  d__1 = avgap, d__2 = wgap[*clend - 1];
228  rdelta = maxMACRO(d__1,d__2) / fact;
229 
230 /* Initialize the record of the best representation found */
231 
232  s = template_lapack_lamch("S", (Treal)0);
233  smlgrowth = 1. / s;
234  fail = (Treal) (*n - 1) * mingap / (*spdiam * eps);
235  fail2 = (Treal) (*n - 1) * mingap / (*spdiam * template_blas_sqrt(eps));
236  bestshift = lsigma;
237 
238 /* while (KTRY <= KTRYMAX) */
239  ktry = 0;
240  growthbound = *spdiam * 8.;
241 L5:
242  sawnan1 = FALSE_;
243  sawnan2 = FALSE_;
244 /* Ensure that we do not back off too much of the initial shifts */
245  ldelta = minMACRO(ldmax,ldelta);
246  rdelta = minMACRO(rdmax,rdelta);
247 /* Compute the element growth when shifting to both ends of the cluster */
248 /* accept the shift if there is no element growth at one of the two ends */
249 /* Left end */
250  s = -lsigma;
251  dplus[1] = d__[1] + s;
252  if (absMACRO(dplus[1]) < *pivmin) {
253  dplus[1] = -(*pivmin);
254 /* Need to set SAWNAN1 because refined RRR test should not be used */
255 /* in this case */
256  sawnan1 = TRUE_;
257  }
258  max1 = absMACRO(dplus[1]);
259  i__1 = *n - 1;
260  for (i__ = 1; i__ <= i__1; ++i__) {
261  lplus[i__] = ld[i__] / dplus[i__];
262  s = s * lplus[i__] * l[i__] - lsigma;
263  dplus[i__ + 1] = d__[i__ + 1] + s;
264  if ((d__1 = dplus[i__ + 1], absMACRO(d__1)) < *pivmin) {
265  dplus[i__ + 1] = -(*pivmin);
266 /* Need to set SAWNAN1 because refined RRR test should not be used */
267 /* in this case */
268  sawnan1 = TRUE_;
269  }
270 /* Computing MAX */
271  d__2 = max1, d__3 = (d__1 = dplus[i__ + 1], absMACRO(d__1));
272  max1 = maxMACRO(d__2,d__3);
273 /* L6: */
274  }
275  sawnan1 = sawnan1 || template_lapack_isnan(&max1);
276  if (forcer || ( max1 <= growthbound && ! sawnan1 ) ) {
277  *sigma = lsigma;
278  shift = 1;
279  goto L100;
280  }
281 /* Right end */
282  s = -rsigma;
283  work[1] = d__[1] + s;
284  if (absMACRO(work[1]) < *pivmin) {
285  work[1] = -(*pivmin);
286 /* Need to set SAWNAN2 because refined RRR test should not be used */
287 /* in this case */
288  sawnan2 = TRUE_;
289  }
290  max2 = absMACRO(work[1]);
291  i__1 = *n - 1;
292  for (i__ = 1; i__ <= i__1; ++i__) {
293  work[*n + i__] = ld[i__] / work[i__];
294  s = s * work[*n + i__] * l[i__] - rsigma;
295  work[i__ + 1] = d__[i__ + 1] + s;
296  if ((d__1 = work[i__ + 1], absMACRO(d__1)) < *pivmin) {
297  work[i__ + 1] = -(*pivmin);
298 /* Need to set SAWNAN2 because refined RRR test should not be used */
299 /* in this case */
300  sawnan2 = TRUE_;
301  }
302 /* Computing MAX */
303  d__2 = max2, d__3 = (d__1 = work[i__ + 1], absMACRO(d__1));
304  max2 = maxMACRO(d__2,d__3);
305 /* L7: */
306  }
307  sawnan2 = sawnan2 || template_lapack_isnan(&max2);
308  if (forcer || ( max2 <= growthbound && ! sawnan2 ) ) {
309  *sigma = rsigma;
310  shift = 2;
311  goto L100;
312  }
313 /* If we are at this point, both shifts led to too much element growth */
314 /* Record the better of the two shifts (provided it didn't lead to NaN) */
315  if (sawnan1 && sawnan2) {
316 /* both MAX1 and MAX2 are NaN */
317  goto L50;
318  } else {
319  if (! sawnan1) {
320  indx = 1;
321  if (max1 <= smlgrowth) {
322  smlgrowth = max1;
323  bestshift = lsigma;
324  }
325  }
326  if (! sawnan2) {
327  if (sawnan1 || max2 <= max1) {
328  indx = 2;
329  }
330  if (max2 <= smlgrowth) {
331  smlgrowth = max2;
332  bestshift = rsigma;
333  }
334  }
335  }
336 /* If we are here, both the left and the right shift led to */
337 /* element growth. If the element growth is moderate, then */
338 /* we may still accept the representation, if it passes a */
339 /* refined test for RRR. This test supposes that no NaN occurred. */
340 /* Moreover, we use the refined RRR test only for isolated clusters. */
341  if (clwdth < mingap / 128. && minMACRO(max1,max2) < fail2 && ! sawnan1 && !
342  sawnan2) {
343  dorrr1 = TRUE_;
344  } else {
345  dorrr1 = FALSE_;
346  }
347  tryrrr1 = TRUE_;
348  if (tryrrr1 && dorrr1) {
349  if (indx == 1) {
350  tmp = (d__1 = dplus[*n], absMACRO(d__1));
351  znm2 = 1.;
352  prod = 1.;
353  oldp = 1.;
354  for (i__ = *n - 1; i__ >= 1; --i__) {
355  if (prod <= eps) {
356  prod = dplus[i__ + 1] * work[*n + i__ + 1] / (dplus[i__] *
357  work[*n + i__]) * oldp;
358  } else {
359  prod *= (d__1 = work[*n + i__], absMACRO(d__1));
360  }
361  oldp = prod;
362 /* Computing 2nd power */
363  d__1 = prod;
364  znm2 += d__1 * d__1;
365 /* Computing MAX */
366  d__2 = tmp, d__3 = (d__1 = dplus[i__] * prod, absMACRO(d__1));
367  tmp = maxMACRO(d__2,d__3);
368 /* L15: */
369  }
370  rrr1 = tmp / (*spdiam * template_blas_sqrt(znm2));
371  if (rrr1 <= 8.) {
372  *sigma = lsigma;
373  shift = 1;
374  goto L100;
375  }
376  } else if (indx == 2) {
377  tmp = (d__1 = work[*n], absMACRO(d__1));
378  znm2 = 1.;
379  prod = 1.;
380  oldp = 1.;
381  for (i__ = *n - 1; i__ >= 1; --i__) {
382  if (prod <= eps) {
383  prod = work[i__ + 1] * lplus[i__ + 1] / (work[i__] *
384  lplus[i__]) * oldp;
385  } else {
386  prod *= (d__1 = lplus[i__], absMACRO(d__1));
387  }
388  oldp = prod;
389 /* Computing 2nd power */
390  d__1 = prod;
391  znm2 += d__1 * d__1;
392 /* Computing MAX */
393  d__2 = tmp, d__3 = (d__1 = work[i__] * prod, absMACRO(d__1));
394  tmp = maxMACRO(d__2,d__3);
395 /* L16: */
396  }
397  rrr2 = tmp / (*spdiam * template_blas_sqrt(znm2));
398  if (rrr2 <= 8.) {
399  *sigma = rsigma;
400  shift = 2;
401  goto L100;
402  }
403  }
404  }
405 L50:
406  if (ktry < 1) {
407 /* If we are here, both shifts failed also the RRR test. */
408 /* Back off to the outside */
409 /* Computing MAX */
410  d__1 = lsigma - ldelta, d__2 = lsigma - ldmax;
411  lsigma = maxMACRO(d__1,d__2);
412 /* Computing MIN */
413  d__1 = rsigma + rdelta, d__2 = rsigma + rdmax;
414  rsigma = minMACRO(d__1,d__2);
415  ldelta *= 2.;
416  rdelta *= 2.;
417  ++ktry;
418  goto L5;
419  } else {
420 /* None of the representations investigated satisfied our */
421 /* criteria. Take the best one we found. */
422  if (smlgrowth < fail || nofail) {
423  lsigma = bestshift;
424  rsigma = bestshift;
425  forcer = TRUE_;
426  goto L5;
427  } else {
428  *info = 1;
429  return 0;
430  }
431  }
432 L100:
433  if (shift == 1) {
434  } else if (shift == 2) {
435 /* store new L and D back into DPLUS, LPLUS */
436  template_blas_copy(n, &work[1], &c__1, &dplus[1], &c__1);
437  i__1 = *n - 1;
438  template_blas_copy(&i__1, &work[*n + 1], &c__1, &lplus[1], &c__1);
439  }
440  return 0;
441 
442 /* End of DLARRF */
443 
444 } /* dlarrf_ */
445 
446 #endif
#define absMACRO(x)
Definition: template_blas_common.h:45
int template_lapack_larrf(integer *n, Treal *d__, Treal *l, Treal *ld, integer *clstrt, integer *clend, Treal *w, Treal *wgap, Treal *werr, Treal *spdiam, Treal *clgapl, Treal *clgapr, Treal *pivmin, Treal *sigma, Treal *dplus, Treal *lplus, Treal *work, integer *info)
Definition: template_lapack_larrf.h:40
logical template_lapack_isnan(Treal *din)
Definition: template_lapack_isnan.h:43
int integer
Definition: template_blas_common.h:38
#define maxMACRO(a, b)
Definition: template_blas_common.h:43
#define minMACRO(a, b)
Definition: template_blas_common.h:44
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:199
bool logical
Definition: template_blas_common.h:39
#define TRUE_
Definition: template_lapack_common.h:40
int template_blas_copy(const integer *n, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_copy.h:40
#define FALSE_
Definition: template_lapack_common.h:41
Treal template_blas_sqrt(Treal x)