ergo
template_lapack_lag2.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LAG2_HEADER
36 #define TEMPLATE_LAPACK_LAG2_HEADER
37 
38 
39 template<class Treal>
40 int template_lapack_lag2(const Treal *a, const integer *lda, const Treal *b,
41  const integer *ldb, const Treal *safmin, Treal *scale1, Treal *
42  scale2, Treal *wr1, Treal *wr2, Treal *wi)
43 {
44 /* -- LAPACK auxiliary routine (version 3.0) --
45  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
46  Courant Institute, Argonne National Lab, and Rice University
47  March 31, 1993
48 
49 
50  Purpose
51  =======
52 
53  DLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue
54  problem A - w B, with scaling as necessary to avoid over-/underflow.
55 
56  The scaling factor "s" results in a modified eigenvalue equation
57 
58  s A - w B
59 
60  where s is a non-negative scaling factor chosen so that w, w B,
61  and s A do not overflow and, if possible, do not underflow, either.
62 
63  Arguments
64  =========
65 
66  A (input) DOUBLE PRECISION array, dimension (LDA, 2)
67  On entry, the 2 x 2 matrix A. It is assumed that its 1-norm
68  is less than 1/SAFMIN. Entries less than
69  sqrt(SAFMIN)*norm(A) are subject to being treated as zero.
70 
71  LDA (input) INTEGER
72  The leading dimension of the array A. LDA >= 2.
73 
74  B (input) DOUBLE PRECISION array, dimension (LDB, 2)
75  On entry, the 2 x 2 upper triangular matrix B. It is
76  assumed that the one-norm of B is less than 1/SAFMIN. The
77  diagonals should be at least sqrt(SAFMIN) times the largest
78  element of B (in absolute value); if a diagonal is smaller
79  than that, then +/- sqrt(SAFMIN) will be used instead of
80  that diagonal.
81 
82  LDB (input) INTEGER
83  The leading dimension of the array B. LDB >= 2.
84 
85  SAFMIN (input) DOUBLE PRECISION
86  The smallest positive number s.t. 1/SAFMIN does not
87  overflow. (This should always be DLAMCH('S') -- it is an
88  argument in order to avoid having to call DLAMCH frequently.)
89 
90  SCALE1 (output) DOUBLE PRECISION
91  A scaling factor used to avoid over-/underflow in the
92  eigenvalue equation which defines the first eigenvalue. If
93  the eigenvalues are complex, then the eigenvalues are
94  ( WR1 +/- WI i ) / SCALE1 (which may lie outside the
95  exponent range of the machine), SCALE1=SCALE2, and SCALE1
96  will always be positive. If the eigenvalues are real, then
97  the first (real) eigenvalue is WR1 / SCALE1 , but this may
98  overflow or underflow, and in fact, SCALE1 may be zero or
99  less than the underflow threshhold if the exact eigenvalue
100  is sufficiently large.
101 
102  SCALE2 (output) DOUBLE PRECISION
103  A scaling factor used to avoid over-/underflow in the
104  eigenvalue equation which defines the second eigenvalue. If
105  the eigenvalues are complex, then SCALE2=SCALE1. If the
106  eigenvalues are real, then the second (real) eigenvalue is
107  WR2 / SCALE2 , but this may overflow or underflow, and in
108  fact, SCALE2 may be zero or less than the underflow
109  threshhold if the exact eigenvalue is sufficiently large.
110 
111  WR1 (output) DOUBLE PRECISION
112  If the eigenvalue is real, then WR1 is SCALE1 times the
113  eigenvalue closest to the (2,2) element of A B**(-1). If the
114  eigenvalue is complex, then WR1=WR2 is SCALE1 times the real
115  part of the eigenvalues.
116 
117  WR2 (output) DOUBLE PRECISION
118  If the eigenvalue is real, then WR2 is SCALE2 times the
119  other eigenvalue. If the eigenvalue is complex, then
120  WR1=WR2 is SCALE1 times the real part of the eigenvalues.
121 
122  WI (output) DOUBLE PRECISION
123  If the eigenvalue is real, then WI is zero. If the
124  eigenvalue is complex, then WI is SCALE1 times the imaginary
125  part of the eigenvalues. WI will always be non-negative.
126 
127  =====================================================================
128 
129 
130  Parameter adjustments */
131  /* System generated locals */
132  integer a_dim1, a_offset, b_dim1, b_offset;
133  Treal d__1, d__2, d__3, d__4, d__5, d__6;
134  /* Local variables */
135  Treal diff, bmin, wbig, wabs, wdet, r__, binv11, binv22,
136  discr, anorm, bnorm, bsize, shift, c1, c2, c3, c4, c5, rtmin,
137  rtmax, wsize, s1, s2, a11, a12, a21, a22, b11, b12, b22, ascale,
138  bscale, pp, qq, ss, wscale, safmax, wsmall, as11, as12, as22, sum,
139  abi22;
140 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
141 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
142 
143  a_dim1 = *lda;
144  a_offset = 1 + a_dim1 * 1;
145  a -= a_offset;
146  b_dim1 = *ldb;
147  b_offset = 1 + b_dim1 * 1;
148  b -= b_offset;
149 
150  /* Function Body */
151  rtmin = template_blas_sqrt(*safmin);
152  rtmax = 1. / rtmin;
153  safmax = 1. / *safmin;
154 
155 /* Scale A
156 
157  Computing MAX */
158  d__5 = (d__1 = a_ref(1, 1), absMACRO(d__1)) + (d__2 = a_ref(2, 1), absMACRO(d__2)),
159  d__6 = (d__3 = a_ref(1, 2), absMACRO(d__3)) + (d__4 = a_ref(2, 2), absMACRO(
160  d__4)), d__5 = maxMACRO(d__5,d__6);
161  anorm = maxMACRO(d__5,*safmin);
162  ascale = 1. / anorm;
163  a11 = ascale * a_ref(1, 1);
164  a21 = ascale * a_ref(2, 1);
165  a12 = ascale * a_ref(1, 2);
166  a22 = ascale * a_ref(2, 2);
167 
168 /* Perturb B if necessary to insure non-singularity */
169 
170  b11 = b_ref(1, 1);
171  b12 = b_ref(1, 2);
172  b22 = b_ref(2, 2);
173 /* Computing MAX */
174  d__1 = absMACRO(b11), d__2 = absMACRO(b12), d__1 = maxMACRO(d__1,d__2), d__2 = absMACRO(b22),
175  d__1 = maxMACRO(d__1,d__2);
176  bmin = rtmin * maxMACRO(d__1,rtmin);
177  if (absMACRO(b11) < bmin) {
178  b11 = template_lapack_d_sign(&bmin, &b11);
179  }
180  if (absMACRO(b22) < bmin) {
181  b22 = template_lapack_d_sign(&bmin, &b22);
182  }
183 
184 /* Scale B
185 
186  Computing MAX */
187  d__1 = absMACRO(b11), d__2 = absMACRO(b12) + absMACRO(b22), d__1 = maxMACRO(d__1,d__2);
188  bnorm = maxMACRO(d__1,*safmin);
189 /* Computing MAX */
190  d__1 = absMACRO(b11), d__2 = absMACRO(b22);
191  bsize = maxMACRO(d__1,d__2);
192  bscale = 1. / bsize;
193  b11 *= bscale;
194  b12 *= bscale;
195  b22 *= bscale;
196 
197 /* Compute larger eigenvalue by method described by C. van Loan
198 
199  ( AS is A shifted by -SHIFT*B ) */
200 
201  binv11 = 1. / b11;
202  binv22 = 1. / b22;
203  s1 = a11 * binv11;
204  s2 = a22 * binv22;
205  if (absMACRO(s1) <= absMACRO(s2)) {
206  as12 = a12 - s1 * b12;
207  as22 = a22 - s1 * b22;
208  ss = a21 * (binv11 * binv22);
209  abi22 = as22 * binv22 - ss * b12;
210  pp = abi22 * .5;
211  shift = s1;
212  } else {
213  as12 = a12 - s2 * b12;
214  as11 = a11 - s2 * b11;
215  ss = a21 * (binv11 * binv22);
216  abi22 = -ss * b12;
217  pp = (as11 * binv11 + abi22) * .5;
218  shift = s2;
219  }
220  qq = ss * as12;
221  if ((d__1 = pp * rtmin, absMACRO(d__1)) >= 1.) {
222 /* Computing 2nd power */
223  d__1 = rtmin * pp;
224  discr = d__1 * d__1 + qq * *safmin;
225  r__ = template_blas_sqrt((absMACRO(discr))) * rtmax;
226  } else {
227 /* Computing 2nd power */
228  d__1 = pp;
229  if (d__1 * d__1 + absMACRO(qq) <= *safmin) {
230 /* Computing 2nd power */
231  d__1 = rtmax * pp;
232  discr = d__1 * d__1 + qq * safmax;
233  r__ = template_blas_sqrt((absMACRO(discr))) * rtmin;
234  } else {
235 /* Computing 2nd power */
236  d__1 = pp;
237  discr = d__1 * d__1 + qq;
238  r__ = template_blas_sqrt((absMACRO(discr)));
239  }
240  }
241 
242 /* Note: the test of R in the following IF is to cover the case when
243  DISCR is small and negative and is flushed to zero during
244  the calculation of R. On machines which have a consistent
245  flush-to-zero threshhold and handle numbers above that
246  threshhold correctly, it would not be necessary. */
247 
248  if (discr >= 0. || r__ == 0.) {
249  sum = pp + template_lapack_d_sign(&r__, &pp);
250  diff = pp - template_lapack_d_sign(&r__, &pp);
251  wbig = shift + sum;
252 
253 /* Compute smaller eigenvalue */
254 
255  wsmall = shift + diff;
256 /* Computing MAX */
257  d__1 = absMACRO(wsmall);
258  if (absMACRO(wbig) * .5 > maxMACRO(d__1,*safmin)) {
259  wdet = (a11 * a22 - a12 * a21) * (binv11 * binv22);
260  wsmall = wdet / wbig;
261  }
262 
263 /* Choose (real) eigenvalue closest to 2,2 element of A*B**(-1)
264  for WR1. */
265 
266  if (pp > abi22) {
267  *wr1 = minMACRO(wbig,wsmall);
268  *wr2 = maxMACRO(wbig,wsmall);
269  } else {
270  *wr1 = maxMACRO(wbig,wsmall);
271  *wr2 = minMACRO(wbig,wsmall);
272  }
273  *wi = 0.;
274  } else {
275 
276 /* Complex eigenvalues */
277 
278  *wr1 = shift + pp;
279  *wr2 = *wr1;
280  *wi = r__;
281  }
282 
283 /* Further scaling to avoid underflow and overflow in computing
284  SCALE1 and overflow in computing w*B.
285 
286  This scale factor (WSCALE) is bounded from above using C1 and C2,
287  and from below using C3 and C4.
288  C1 implements the condition s A must never overflow.
289  C2 implements the condition w B must never overflow.
290  C3, with C2,
291  implement the condition that s A - w B must never overflow.
292  C4 implements the condition s should not underflow.
293  C5 implements the condition max(s,|w|) should be at least 2. */
294 
295  c1 = bsize * (*safmin * maxMACRO(1.,ascale));
296  c2 = *safmin * maxMACRO(1.,bnorm);
297  c3 = bsize * *safmin;
298  if (ascale <= 1. && bsize <= 1.) {
299 /* Computing MIN */
300  d__1 = 1., d__2 = ascale / *safmin * bsize;
301  c4 = minMACRO(d__1,d__2);
302  } else {
303  c4 = 1.;
304  }
305  if (ascale <= 1. || bsize <= 1.) {
306 /* Computing MIN */
307  d__1 = 1., d__2 = ascale * bsize;
308  c5 = minMACRO(d__1,d__2);
309  } else {
310  c5 = 1.;
311  }
312 
313 /* Scale first eigenvalue */
314 
315  wabs = absMACRO(*wr1) + absMACRO(*wi);
316 /* Computing MAX
317  Computing MIN */
318  d__3 = c4, d__4 = maxMACRO(wabs,c5) * .5;
319  d__1 = maxMACRO(*safmin,c1), d__2 = (wabs * c2 + c3) * 1.0000100000000001,
320  d__1 = maxMACRO(d__1,d__2), d__2 = minMACRO(d__3,d__4);
321  wsize = maxMACRO(d__1,d__2);
322  if (wsize != 1.) {
323  wscale = 1. / wsize;
324  if (wsize > 1.) {
325  *scale1 = maxMACRO(ascale,bsize) * wscale * minMACRO(ascale,bsize);
326  } else {
327  *scale1 = minMACRO(ascale,bsize) * wscale * maxMACRO(ascale,bsize);
328  }
329  *wr1 *= wscale;
330  if (*wi != 0.) {
331  *wi *= wscale;
332  *wr2 = *wr1;
333  *scale2 = *scale1;
334  }
335  } else {
336  *scale1 = ascale * bsize;
337  *scale2 = *scale1;
338  }
339 
340 /* Scale second eigenvalue (if real) */
341 
342  if (*wi == 0.) {
343 /* Computing MAX
344  Computing MIN
345  Computing MAX */
346  d__5 = absMACRO(*wr2);
347  d__3 = c4, d__4 = maxMACRO(d__5,c5) * .5;
348  d__1 = maxMACRO(*safmin,c1), d__2 = (absMACRO(*wr2) * c2 + c3) *
349  1.0000100000000001, d__1 = maxMACRO(d__1,d__2), d__2 = minMACRO(d__3,
350  d__4);
351  wsize = maxMACRO(d__1,d__2);
352  if (wsize != 1.) {
353  wscale = 1. / wsize;
354  if (wsize > 1.) {
355  *scale2 = maxMACRO(ascale,bsize) * wscale * minMACRO(ascale,bsize);
356  } else {
357  *scale2 = minMACRO(ascale,bsize) * wscale * maxMACRO(ascale,bsize);
358  }
359  *wr2 *= wscale;
360  } else {
361  *scale2 = ascale * bsize;
362  }
363  }
364 
365 /* End of DLAG2 */
366 
367  return 0;
368 } /* dlag2_ */
369 
370 #undef b_ref
371 #undef a_ref
372 
373 
374 #endif