ergo
template_lapack_laneg.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LANEG_HEADER
36 #define TEMPLATE_LAPACK_LANEG_HEADER
37 
38 template<class Treal>
39 int template_lapack_laneg(integer *n, Treal *d__, Treal *lld, Treal *
40  sigma, Treal *pivmin, integer *r__)
41 {
42  /* System generated locals */
43  integer ret_val, i__1, i__2, i__3, i__4;
44 
45  /* Local variables */
46  integer j;
47  Treal p, t;
48  integer bj;
49  Treal tmp;
50  integer neg1, neg2;
51  Treal bsav, gamma, dplus;
52  integer negcnt;
53  logical sawnan;
54  Treal dminus;
55 
56 
57 /* -- LAPACK auxiliary routine (version 3.2) -- */
58 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
59 /* November 2006 */
60 
61 /* .. Scalar Arguments .. */
62 /* .. */
63 /* .. Array Arguments .. */
64 /* .. */
65 
66 /* Purpose */
67 /* ======= */
68 
69 /* DLANEG computes the Sturm count, the number of negative pivots */
70 /* encountered while factoring tridiagonal T - sigma I = L D L^T. */
71 /* This implementation works directly on the factors without forming */
72 /* the tridiagonal matrix T. The Sturm count is also the number of */
73 /* eigenvalues of T less than sigma. */
74 
75 /* This routine is called from DLARRB. */
76 
77 /* The current routine does not use the PIVMIN parameter but rather */
78 /* requires IEEE-754 propagation of Infinities and NaNs. This */
79 /* routine also has no input range restrictions but does require */
80 /* default exception handling such that x/0 produces Inf when x is */
81 /* non-zero, and Inf/Inf produces NaN. For more information, see: */
82 
83 /* Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in */
84 /* Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on */
85 /* Scientific Computing, v28, n5, 2006. DOI 10.1137/050641624 */
86 /* (Tech report version in LAWN 172 with the same title.) */
87 
88 /* Arguments */
89 /* ========= */
90 
91 /* N (input) INTEGER */
92 /* The order of the matrix. */
93 
94 /* D (input) DOUBLE PRECISION array, dimension (N) */
95 /* The N diagonal elements of the diagonal matrix D. */
96 
97 /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */
98 /* The (N-1) elements L(i)*L(i)*D(i). */
99 
100 /* SIGMA (input) DOUBLE PRECISION */
101 /* Shift amount in T - sigma I = L D L^T. */
102 
103 /* PIVMIN (input) DOUBLE PRECISION */
104 /* The minimum pivot in the Sturm sequence. May be used */
105 /* when zero pivots are encountered on non-IEEE-754 */
106 /* architectures. */
107 
108 /* R (input) INTEGER */
109 /* The twist index for the twisted factorization that is used */
110 /* for the negcount. */
111 
112 /* Further Details */
113 /* =============== */
114 
115 /* Based on contributions by */
116 /* Osni Marques, LBNL/NERSC, USA */
117 /* Christof Voemel, University of California, Berkeley, USA */
118 /* Jason Riedy, University of California, Berkeley, USA */
119 
120 /* ===================================================================== */
121 
122 /* .. Parameters .. */
123 /* Some architectures propagate Infinities and NaNs very slowly, so */
124 /* the code computes counts in BLKLEN chunks. Then a NaN can */
125 /* propagate at most BLKLEN columns before being detected. This is */
126 /* not a general tuning parameter; it needs only to be just large */
127 /* enough that the overhead is tiny in common cases. */
128 /* .. */
129 /* .. Local Scalars .. */
130 /* .. */
131 /* .. Intrinsic Functions .. */
132 /* .. */
133 /* .. External Functions .. */
134 /* .. */
135 /* .. Executable Statements .. */
136  /* Parameter adjustments */
137  --lld;
138  --d__;
139 
140  /* Function Body */
141  negcnt = 0;
142 /* I) upper part: L D L^T - SIGMA I = L+ D+ L+^T */
143  t = -(*sigma);
144  i__1 = *r__ - 1;
145  for (bj = 1; bj <= i__1; bj += 128) {
146  neg1 = 0;
147  bsav = t;
148 /* Computing MIN */
149  i__3 = bj + 127, i__4 = *r__ - 1;
150  i__2 = minMACRO(i__3,i__4);
151  for (j = bj; j <= i__2; ++j) {
152  dplus = d__[j] + t;
153  if (dplus < 0.) {
154  ++neg1;
155  }
156  tmp = t / dplus;
157  t = tmp * lld[j] - *sigma;
158 /* L21: */
159  }
160  sawnan = template_lapack_isnan(&t);
161 /* Run a slower version of the above loop if a NaN is detected. */
162 /* A NaN should occur only with a zero pivot after an infinite */
163 /* pivot. In that case, substituting 1 for T/DPLUS is the */
164 /* correct limit. */
165  if (sawnan) {
166  neg1 = 0;
167  t = bsav;
168 /* Computing MIN */
169  i__3 = bj + 127, i__4 = *r__ - 1;
170  i__2 = minMACRO(i__3,i__4);
171  for (j = bj; j <= i__2; ++j) {
172  dplus = d__[j] + t;
173  if (dplus < 0.) {
174  ++neg1;
175  }
176  tmp = t / dplus;
177  if (template_lapack_isnan(&tmp)) {
178  tmp = 1.;
179  }
180  t = tmp * lld[j] - *sigma;
181 /* L22: */
182  }
183  }
184  negcnt += neg1;
185 /* L210: */
186  }
187 
188 /* II) lower part: L D L^T - SIGMA I = U- D- U-^T */
189  p = d__[*n] - *sigma;
190  i__1 = *r__;
191  for (bj = *n - 1; bj >= i__1; bj += -128) {
192  neg2 = 0;
193  bsav = p;
194 /* Computing MAX */
195  i__3 = bj - 127;
196  i__2 = maxMACRO(i__3,*r__);
197  for (j = bj; j >= i__2; --j) {
198  dminus = lld[j] + p;
199  if (dminus < 0.) {
200  ++neg2;
201  }
202  tmp = p / dminus;
203  p = tmp * d__[j] - *sigma;
204 /* L23: */
205  }
206  sawnan = template_lapack_isnan(&p);
207 /* As above, run a slower version that substitutes 1 for Inf/Inf. */
208 
209  if (sawnan) {
210  neg2 = 0;
211  p = bsav;
212 /* Computing MAX */
213  i__3 = bj - 127;
214  i__2 = maxMACRO(i__3,*r__);
215  for (j = bj; j >= i__2; --j) {
216  dminus = lld[j] + p;
217  if (dminus < 0.) {
218  ++neg2;
219  }
220  tmp = p / dminus;
221  if (template_lapack_isnan(&tmp)) {
222  tmp = 1.;
223  }
224  p = tmp * d__[j] - *sigma;
225 /* L24: */
226  }
227  }
228  negcnt += neg2;
229 /* L230: */
230  }
231 
232 /* III) Twist index */
233 /* T was shifted by SIGMA initially. */
234  gamma = t + *sigma + p;
235  if (gamma < 0.) {
236  ++negcnt;
237  }
238  ret_val = negcnt;
239  return ret_val;
240 } /* dlaneg_ */
241 
242 #endif