ergo
template_lapack_larre.h
Go to the documentation of this file.
1 /* Ergo, version 3.4, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2014 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LARRE_HEADER
36 #define TEMPLATE_LAPACK_LARRE_HEADER
37 
38 
39 #include "template_lapack_larrk.h"
40 #include "template_lapack_lasq2.h"
41 
42 
43 template<class Treal>
44 int template_lapack_larre(const char *range, const integer *n, Treal *vl,
45  Treal *vu, integer *il, integer *iu, Treal *d__, Treal
46  *e, Treal *e2, Treal *rtol1, Treal *rtol2, Treal *
47  spltol, integer *nsplit, integer *isplit, integer *m, Treal *w,
48  Treal *werr, Treal *wgap, integer *iblock, integer *indexw,
49  Treal *gers, Treal *pivmin, Treal *work, integer *
50  iwork, integer *info)
51 {
52  /* System generated locals */
53  integer i__1, i__2;
54  Treal d__1, d__2, d__3;
55 
56 
57  /* Local variables */
58  integer i__, j;
59  Treal s1, s2;
60  integer mb = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
61  Treal gl;
62  integer in, mm;
63  Treal gu;
64  integer cnt;
65  Treal eps, tau, tmp, rtl;
66  integer cnt1, cnt2;
67  Treal tmp1, eabs;
68  integer iend, jblk;
69  Treal eold;
70  integer indl;
71  Treal dmax__, emax;
72  integer wend = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
73  integer idum, indu;
74  Treal rtol;
75  integer iseed[4];
76  Treal avgap, sigma;
77  integer iinfo;
78  logical norep;
79  integer ibegin;
80  logical forceb;
81  integer irange = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
82  Treal sgndef;
83  integer wbegin;
84  Treal safmin, spdiam;
85  logical usedqd;
86  Treal clwdth, isleft;
87  Treal isrght, bsrtol, dpivot;
88 
89 
90 /* -- LAPACK auxiliary routine (version 3.2) -- */
91 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
92 /* November 2006 */
93 
94 /* .. Scalar Arguments .. */
95 /* .. */
96 /* .. Array Arguments .. */
97 /* .. */
98 
99 /* Purpose */
100 /* ======= */
101 
102 /* To find the desired eigenvalues of a given real symmetric */
103 /* tridiagonal matrix T, DLARRE sets any "small" off-diagonal */
104 /* elements to zero, and for each unreduced block T_i, it finds */
105 /* (a) a suitable shift at one end of the block's spectrum, */
106 /* (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and */
107 /* (c) eigenvalues of each L_i D_i L_i^T. */
108 /* The representations and eigenvalues found are then used by */
109 /* DSTEMR to compute the eigenvectors of T. */
110 /* The accuracy varies depending on whether bisection is used to */
111 /* find a few eigenvalues or the dqds algorithm (subroutine DLASQ2) to */
112 /* conpute all and then discard any unwanted one. */
113 /* As an added benefit, DLARRE also outputs the n */
114 /* Gerschgorin intervals for the matrices L_i D_i L_i^T. */
115 
116 /* Arguments */
117 /* ========= */
118 
119 /* RANGE (input) CHARACTER */
120 /* = 'A': ("All") all eigenvalues will be found. */
121 /* = 'V': ("Value") all eigenvalues in the half-open interval */
122 /* (VL, VU] will be found. */
123 /* = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */
124 /* entire matrix) will be found. */
125 
126 /* N (input) INTEGER */
127 /* The order of the matrix. N > 0. */
128 
129 /* VL (input/output) DOUBLE PRECISION */
130 /* VU (input/output) DOUBLE PRECISION */
131 /* If RANGE='V', the lower and upper bounds for the eigenvalues. */
132 /* Eigenvalues less than or equal to VL, or greater than VU, */
133 /* will not be returned. VL < VU. */
134 /* If RANGE='I' or ='A', DLARRE computes bounds on the desired */
135 /* part of the spectrum. */
136 
137 /* IL (input) INTEGER */
138 /* IU (input) INTEGER */
139 /* If RANGE='I', the indices (in ascending order) of the */
140 /* smallest and largest eigenvalues to be returned. */
141 /* 1 <= IL <= IU <= N. */
142 
143 /* D (input/output) DOUBLE PRECISION array, dimension (N) */
144 /* On entry, the N diagonal elements of the tridiagonal */
145 /* matrix T. */
146 /* On exit, the N diagonal elements of the diagonal */
147 /* matrices D_i. */
148 
149 /* E (input/output) DOUBLE PRECISION array, dimension (N) */
150 /* On entry, the first (N-1) entries contain the subdiagonal */
151 /* elements of the tridiagonal matrix T; E(N) need not be set. */
152 /* On exit, E contains the subdiagonal elements of the unit */
153 /* bidiagonal matrices L_i. The entries E( ISPLIT( I ) ), */
154 /* 1 <= I <= NSPLIT, contain the base points sigma_i on output. */
155 
156 /* E2 (input/output) DOUBLE PRECISION array, dimension (N) */
157 /* On entry, the first (N-1) entries contain the SQUARES of the */
158 /* subdiagonal elements of the tridiagonal matrix T; */
159 /* E2(N) need not be set. */
160 /* On exit, the entries E2( ISPLIT( I ) ), */
161 /* 1 <= I <= NSPLIT, have been set to zero */
162 
163 /* RTOL1 (input) DOUBLE PRECISION */
164 /* RTOL2 (input) DOUBLE PRECISION */
165 /* Parameters for bisection. */
166 /* An interval [LEFT,RIGHT] has converged if */
167 /* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
168 
169 /* SPLTOL (input) DOUBLE PRECISION */
170 /* The threshold for splitting. */
171 
172 /* NSPLIT (output) INTEGER */
173 /* The number of blocks T splits into. 1 <= NSPLIT <= N. */
174 
175 /* ISPLIT (output) INTEGER array, dimension (N) */
176 /* The splitting points, at which T breaks up into blocks. */
177 /* The first block consists of rows/columns 1 to ISPLIT(1), */
178 /* the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */
179 /* etc., and the NSPLIT-th consists of rows/columns */
180 /* ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */
181 
182 /* M (output) INTEGER */
183 /* The total number of eigenvalues (of all L_i D_i L_i^T) */
184 /* found. */
185 
186 /* W (output) DOUBLE PRECISION array, dimension (N) */
187 /* The first M elements contain the eigenvalues. The */
188 /* eigenvalues of each of the blocks, L_i D_i L_i^T, are */
189 /* sorted in ascending order ( DLARRE may use the */
190 /* remaining N-M elements as workspace). */
191 
192 /* WERR (output) DOUBLE PRECISION array, dimension (N) */
193 /* The error bound on the corresponding eigenvalue in W. */
194 
195 /* WGAP (output) DOUBLE PRECISION array, dimension (N) */
196 /* The separation from the right neighbor eigenvalue in W. */
197 /* The gap is only with respect to the eigenvalues of the same block */
198 /* as each block has its own representation tree. */
199 /* Exception: at the right end of a block we store the left gap */
200 
201 /* IBLOCK (output) INTEGER array, dimension (N) */
202 /* The indices of the blocks (submatrices) associated with the */
203 /* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */
204 /* W(i) belongs to the first block from the top, =2 if W(i) */
205 /* belongs to the second block, etc. */
206 
207 /* INDEXW (output) INTEGER array, dimension (N) */
208 /* The indices of the eigenvalues within each block (submatrix); */
209 /* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */
210 /* i-th eigenvalue W(i) is the 10-th eigenvalue in block 2 */
211 
212 /* GERS (output) DOUBLE PRECISION array, dimension (2*N) */
213 /* The N Gerschgorin intervals (the i-th Gerschgorin interval */
214 /* is (GERS(2*i-1), GERS(2*i)). */
215 
216 /* PIVMIN (output) DOUBLE PRECISION */
217 /* The minimum pivot in the Sturm sequence for T. */
218 
219 /* WORK (workspace) DOUBLE PRECISION array, dimension (6*N) */
220 /* Workspace. */
221 
222 /* IWORK (workspace) INTEGER array, dimension (5*N) */
223 /* Workspace. */
224 
225 /* INFO (output) INTEGER */
226 /* = 0: successful exit */
227 /* > 0: A problem occured in DLARRE. */
228 /* < 0: One of the called subroutines signaled an internal problem. */
229 /* Needs inspection of the corresponding parameter IINFO */
230 /* for further information. */
231 
232 /* =-1: Problem in DLARRD. */
233 /* = 2: No base representation could be found in MAXTRY iterations. */
234 /* Increasing MAXTRY and recompilation might be a remedy. */
235 /* =-3: Problem in DLARRB when computing the refined root */
236 /* representation for DLASQ2. */
237 /* =-4: Problem in DLARRB when preforming bisection on the */
238 /* desired part of the spectrum. */
239 /* =-5: Problem in DLASQ2. */
240 /* =-6: Problem in DLASQ2. */
241 
242 /* Further Details */
243 /* The base representations are required to suffer very little */
244 /* element growth and consequently define all their eigenvalues to */
245 /* high relative accuracy. */
246 /* =============== */
247 
248 /* Based on contributions by */
249 /* Beresford Parlett, University of California, Berkeley, USA */
250 /* Jim Demmel, University of California, Berkeley, USA */
251 /* Inderjit Dhillon, University of Texas, Austin, USA */
252 /* Osni Marques, LBNL/NERSC, USA */
253 /* Christof Voemel, University of California, Berkeley, USA */
254 
255 /* ===================================================================== */
256 
257 /* .. Parameters .. */
258 /* .. */
259 /* .. Local Scalars .. */
260 /* .. */
261 /* .. Local Arrays .. */
262 /* .. */
263 /* .. External Functions .. */
264 /* .. */
265 /* .. External Subroutines .. */
266 /* .. */
267 /* .. Intrinsic Functions .. */
268 /* .. */
269 /* .. Executable Statements .. */
270 
271  /* Parameter adjustments */
272 
273 
274  /* Table of constant values */
275 
276  integer c__1 = 1;
277  integer c__2 = 2;
278 
279 
280  --iwork;
281  --work;
282  --gers;
283  --indexw;
284  --iblock;
285  --wgap;
286  --werr;
287  --w;
288  --isplit;
289  --e2;
290  --e;
291  --d__;
292 
293  /* Initialization added by Elias to get rid of compiler warnings. */
294  mm = 0;
295  /* Function Body */
296  *info = 0;
297 
298 /* Decode RANGE */
299 
300  if (template_blas_lsame(range, "A")) {
301  irange = 1;
302  } else if (template_blas_lsame(range, "V")) {
303  irange = 3;
304  } else if (template_blas_lsame(range, "I")) {
305  irange = 2;
306  }
307  *m = 0;
308 /* Get machine constants */
309  safmin = template_lapack_lamch("S",(Treal)0);
310  eps = template_lapack_lamch("P",(Treal)0);
311 /* Set parameters */
312  rtl = template_blas_sqrt(eps);
313  bsrtol = template_blas_sqrt(eps);
314 /* Treat case of 1x1 matrix for quick return */
315  if (*n == 1) {
316  if (irange == 1 || ( irange == 3 && d__[1] > *vl && d__[1] <= *vu ) ||
317  ( irange == 2 && *il == 1 && *iu == 1 ) ) {
318  *m = 1;
319  w[1] = d__[1];
320 /* The computation error of the eigenvalue is zero */
321  werr[1] = 0.;
322  wgap[1] = 0.;
323  iblock[1] = 1;
324  indexw[1] = 1;
325  gers[1] = d__[1];
326  gers[2] = d__[1];
327  }
328 /* store the shift for the initial RRR, which is zero in this case */
329  e[1] = 0.;
330  return 0;
331  }
332 /* General case: tridiagonal matrix of order > 1 */
333 
334 /* Init WERR, WGAP. Compute Gerschgorin intervals and spectral diameter. */
335 /* Compute maximum off-diagonal entry and pivmin. */
336  gl = d__[1];
337  gu = d__[1];
338  eold = 0.;
339  emax = 0.;
340  e[*n] = 0.;
341  i__1 = *n;
342  for (i__ = 1; i__ <= i__1; ++i__) {
343  werr[i__] = 0.;
344  wgap[i__] = 0.;
345  eabs = (d__1 = e[i__], absMACRO(d__1));
346  if (eabs >= emax) {
347  emax = eabs;
348  }
349  tmp1 = eabs + eold;
350  gers[(i__ << 1) - 1] = d__[i__] - tmp1;
351 /* Computing MIN */
352  d__1 = gl, d__2 = gers[(i__ << 1) - 1];
353  gl = minMACRO(d__1,d__2);
354  gers[i__ * 2] = d__[i__] + tmp1;
355 /* Computing MAX */
356  d__1 = gu, d__2 = gers[i__ * 2];
357  gu = maxMACRO(d__1,d__2);
358  eold = eabs;
359 /* L5: */
360  }
361 /* The minimum pivot allowed in the Sturm sequence for T */
362 /* Computing MAX */
363 /* Computing 2nd power */
364  d__3 = emax;
365  d__1 = 1., d__2 = d__3 * d__3;
366  *pivmin = safmin * maxMACRO(d__1,d__2);
367 /* Compute spectral diameter. The Gerschgorin bounds give an */
368 /* estimate that is wrong by at most a factor of SQRT(2) */
369  spdiam = gu - gl;
370 /* Compute splitting points */
371  template_lapack_larra(n, &d__[1], &e[1], &e2[1], spltol, &spdiam, nsplit, &isplit[1], &
372  iinfo);
373 /* Can force use of bisection instead of faster DQDS. */
374 /* Option left in the code for future multisection work. */
375  forceb = FALSE_;
376 /* Initialize USEDQD, DQDS should be used for ALLRNG unless someone */
377 /* explicitly wants bisection. */
378  usedqd = irange == 1 && ! forceb;
379  if (irange == 1 && ! forceb) {
380 /* Set interval [VL,VU] that contains all eigenvalues */
381  *vl = gl;
382  *vu = gu;
383  } else {
384 /* We call DLARRD to find crude approximations to the eigenvalues */
385 /* in the desired range. In case IRANGE = INDRNG, we also obtain the */
386 /* interval (VL,VU] that contains all the wanted eigenvalues. */
387 /* An interval [LEFT,RIGHT] has converged if */
388 /* RIGHT-LEFT.LT.RTOL*MAX(ABS(LEFT),ABS(RIGHT)) */
389 /* DLARRD needs a WORK of size 4*N, IWORK of size 3*N */
390  template_lapack_larrd(range, "B", n, vl, vu, il, iu, &gers[1], &bsrtol, &d__[1], &e[
391  1], &e2[1], pivmin, nsplit, &isplit[1], &mm, &w[1], &werr[1],
392  vl, vu, &iblock[1], &indexw[1], &work[1], &iwork[1], &iinfo);
393  if (iinfo != 0) {
394  *info = -1;
395  return 0;
396  }
397 /* Make sure that the entries M+1 to N in W, WERR, IBLOCK, INDEXW are 0 */
398  i__1 = *n;
399  for (i__ = mm + 1; i__ <= i__1; ++i__) {
400  w[i__] = 0.;
401  werr[i__] = 0.;
402  iblock[i__] = 0;
403  indexw[i__] = 0;
404 /* L14: */
405  }
406  }
407 /* ** */
408 /* Loop over unreduced blocks */
409  ibegin = 1;
410  wbegin = 1;
411  i__1 = *nsplit;
412  for (jblk = 1; jblk <= i__1; ++jblk) {
413  iend = isplit[jblk];
414  in = iend - ibegin + 1;
415 /* 1 X 1 block */
416  if (in == 1) {
417  if (irange == 1 || ( irange == 3 && d__[ibegin] > *vl && d__[ibegin]
418  <= *vu ) || ( irange == 2 && iblock[wbegin] == jblk ) ) {
419  ++(*m);
420  w[*m] = d__[ibegin];
421  werr[*m] = 0.;
422 /* The gap for a single block doesn't matter for the later */
423 /* algorithm and is assigned an arbitrary large value */
424  wgap[*m] = 0.;
425  iblock[*m] = jblk;
426  indexw[*m] = 1;
427  ++wbegin;
428  }
429 /* E( IEND ) holds the shift for the initial RRR */
430  e[iend] = 0.;
431  ibegin = iend + 1;
432  goto L170;
433  }
434 
435 /* Blocks of size larger than 1x1 */
436 
437 /* E( IEND ) will hold the shift for the initial RRR, for now set it =0 */
438  e[iend] = 0.;
439 
440 /* Find local outer bounds GL,GU for the block */
441  gl = d__[ibegin];
442  gu = d__[ibegin];
443  i__2 = iend;
444  for (i__ = ibegin; i__ <= i__2; ++i__) {
445 /* Computing MIN */
446  d__1 = gers[(i__ << 1) - 1];
447  gl = minMACRO(d__1,gl);
448 /* Computing MAX */
449  d__1 = gers[i__ * 2];
450  gu = maxMACRO(d__1,gu);
451 /* L15: */
452  }
453  spdiam = gu - gl;
454  if (! (irange == 1 && ! forceb)) {
455 /* Count the number of eigenvalues in the current block. */
456  mb = 0;
457  i__2 = mm;
458  for (i__ = wbegin; i__ <= i__2; ++i__) {
459  if (iblock[i__] == jblk) {
460  ++mb;
461  } else {
462  goto L21;
463  }
464 /* L20: */
465  }
466 L21:
467  if (mb == 0) {
468 /* No eigenvalue in the current block lies in the desired range */
469 /* E( IEND ) holds the shift for the initial RRR */
470  e[iend] = 0.;
471  ibegin = iend + 1;
472  goto L170;
473  } else {
474 /* Decide whether dqds or bisection is more efficient */
475  usedqd = (Treal) mb > in * .5 && ! forceb;
476  wend = wbegin + mb - 1;
477 /* Calculate gaps for the current block */
478 /* In later stages, when representations for individual */
479 /* eigenvalues are different, we use SIGMA = E( IEND ). */
480  sigma = 0.;
481  i__2 = wend - 1;
482  for (i__ = wbegin; i__ <= i__2; ++i__) {
483 /* Computing MAX */
484  d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] +
485  werr[i__]);
486  wgap[i__] = maxMACRO(d__1,d__2);
487 /* L30: */
488  }
489 /* Computing MAX */
490  d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]);
491  wgap[wend] = maxMACRO(d__1,d__2);
492 /* Find local index of the first and last desired evalue. */
493  indl = indexw[wbegin];
494  indu = indexw[wend];
495  }
496  }
497  if ( ( irange == 1 && ! forceb ) || usedqd) {
498 /* Case of DQDS */
499 /* Find approximations to the extremal eigenvalues of the block */
500  template_lapack_larrk(&in, &c__1, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, &
501  rtl, &tmp, &tmp1, &iinfo);
502  if (iinfo != 0) {
503  *info = -1;
504  return 0;
505  }
506 /* Computing MAX */
507  d__2 = gl, d__3 = tmp - tmp1 - eps * 100. * (d__1 = tmp - tmp1,
508  absMACRO(d__1));
509  isleft = maxMACRO(d__2,d__3);
510  template_lapack_larrk(&in, &in, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, &
511  rtl, &tmp, &tmp1, &iinfo);
512  if (iinfo != 0) {
513  *info = -1;
514  return 0;
515  }
516 /* Computing MIN */
517  d__2 = gu, d__3 = tmp + tmp1 + eps * 100. * (d__1 = tmp + tmp1,
518  absMACRO(d__1));
519  isrght = minMACRO(d__2,d__3);
520 /* Improve the estimate of the spectral diameter */
521  spdiam = isrght - isleft;
522  } else {
523 /* Case of bisection */
524 /* Find approximations to the wanted extremal eigenvalues */
525 /* Computing MAX */
526  d__2 = gl, d__3 = w[wbegin] - werr[wbegin] - eps * 100. * (d__1 =
527  w[wbegin] - werr[wbegin], absMACRO(d__1));
528  isleft = maxMACRO(d__2,d__3);
529 /* Computing MIN */
530  d__2 = gu, d__3 = w[wend] + werr[wend] + eps * 100. * (d__1 = w[
531  wend] + werr[wend], absMACRO(d__1));
532  isrght = minMACRO(d__2,d__3);
533  }
534 /* Decide whether the base representation for the current block */
535 /* L_JBLK D_JBLK L_JBLK^T = T_JBLK - sigma_JBLK I */
536 /* should be on the left or the right end of the current block. */
537 /* The strategy is to shift to the end which is "more populated" */
538 /* Furthermore, decide whether to use DQDS for the computation of */
539 /* the eigenvalue approximations at the end of DLARRE or bisection. */
540 /* dqds is chosen if all eigenvalues are desired or the number of */
541 /* eigenvalues to be computed is large compared to the blocksize. */
542  if (irange == 1 && ! forceb) {
543 /* If all the eigenvalues have to be computed, we use dqd */
544  usedqd = TRUE_;
545 /* INDL is the local index of the first eigenvalue to compute */
546  indl = 1;
547  indu = in;
548 /* MB = number of eigenvalues to compute */
549  mb = in;
550  wend = wbegin + mb - 1;
551 /* Define 1/4 and 3/4 points of the spectrum */
552  s1 = isleft + spdiam * .25;
553  s2 = isrght - spdiam * .25;
554  } else {
555 /* DLARRD has computed IBLOCK and INDEXW for each eigenvalue */
556 /* approximation. */
557 /* choose sigma */
558  if (usedqd) {
559  s1 = isleft + spdiam * .25;
560  s2 = isrght - spdiam * .25;
561  } else {
562  tmp = minMACRO(isrght,*vu) - maxMACRO(isleft,*vl);
563  s1 = maxMACRO(isleft,*vl) + tmp * .25;
564  s2 = minMACRO(isrght,*vu) - tmp * .25;
565  }
566  }
567 /* Compute the negcount at the 1/4 and 3/4 points */
568  if (mb > 1) {
569  template_lapack_larrc("T", &in, &s1, &s2, &d__[ibegin], &e[ibegin], pivmin, &
570  cnt, &cnt1, &cnt2, &iinfo);
571  }
572  if (mb == 1) {
573  sigma = gl;
574  sgndef = 1.;
575  } else if (cnt1 - indl >= indu - cnt2) {
576  if (irange == 1 && ! forceb) {
577  sigma = maxMACRO(isleft,gl);
578  } else if (usedqd) {
579 /* use Gerschgorin bound as shift to get pos def matrix */
580 /* for dqds */
581  sigma = isleft;
582  } else {
583 /* use approximation of the first desired eigenvalue of the */
584 /* block as shift */
585  sigma = maxMACRO(isleft,*vl);
586  }
587  sgndef = 1.;
588  } else {
589  if (irange == 1 && ! forceb) {
590  sigma = minMACRO(isrght,gu);
591  } else if (usedqd) {
592 /* use Gerschgorin bound as shift to get neg def matrix */
593 /* for dqds */
594  sigma = isrght;
595  } else {
596 /* use approximation of the first desired eigenvalue of the */
597 /* block as shift */
598  sigma = minMACRO(isrght,*vu);
599  }
600  sgndef = -1.;
601  }
602 /* An initial SIGMA has been chosen that will be used for computing */
603 /* T - SIGMA I = L D L^T */
604 /* Define the increment TAU of the shift in case the initial shift */
605 /* needs to be refined to obtain a factorization with not too much */
606 /* element growth. */
607  if (usedqd) {
608 /* The initial SIGMA was to the outer end of the spectrum */
609 /* the matrix is definite and we need not retreat. */
610  tau = spdiam * eps * *n + *pivmin * 2.;
611  } else {
612  if (mb > 1) {
613  clwdth = w[wend] + werr[wend] - w[wbegin] - werr[wbegin];
614  avgap = (d__1 = clwdth / (Treal) (wend - wbegin), absMACRO(
615  d__1));
616  if (sgndef == 1.) {
617 /* Computing MAX */
618  d__1 = wgap[wbegin];
619  tau = maxMACRO(d__1,avgap) * .5;
620 /* Computing MAX */
621  d__1 = tau, d__2 = werr[wbegin];
622  tau = maxMACRO(d__1,d__2);
623  } else {
624 /* Computing MAX */
625  d__1 = wgap[wend - 1];
626  tau = maxMACRO(d__1,avgap) * .5;
627 /* Computing MAX */
628  d__1 = tau, d__2 = werr[wend];
629  tau = maxMACRO(d__1,d__2);
630  }
631  } else {
632  tau = werr[wbegin];
633  }
634  }
635 
636  for (idum = 1; idum <= 6; ++idum) {
637 /* Compute L D L^T factorization of tridiagonal matrix T - sigma I. */
638 /* Store D in WORK(1:IN), L in WORK(IN+1:2*IN), and reciprocals of */
639 /* pivots in WORK(2*IN+1:3*IN) */
640  dpivot = d__[ibegin] - sigma;
641  work[1] = dpivot;
642  dmax__ = absMACRO(work[1]);
643  j = ibegin;
644  i__2 = in - 1;
645  for (i__ = 1; i__ <= i__2; ++i__) {
646  work[(in << 1) + i__] = 1. / work[i__];
647  tmp = e[j] * work[(in << 1) + i__];
648  work[in + i__] = tmp;
649  dpivot = d__[j + 1] - sigma - tmp * e[j];
650  work[i__ + 1] = dpivot;
651 /* Computing MAX */
652  d__1 = dmax__, d__2 = absMACRO(dpivot);
653  dmax__ = maxMACRO(d__1,d__2);
654  ++j;
655 /* L70: */
656  }
657 /* check for element growth */
658  if (dmax__ > spdiam * 64.) {
659  norep = TRUE_;
660  } else {
661  norep = FALSE_;
662  }
663  if (usedqd && ! norep) {
664 /* Ensure the definiteness of the representation */
665 /* All entries of D (of L D L^T) must have the same sign */
666  i__2 = in;
667  for (i__ = 1; i__ <= i__2; ++i__) {
668  tmp = sgndef * work[i__];
669  if (tmp < 0.) {
670  norep = TRUE_;
671  }
672 /* L71: */
673  }
674  }
675  if (norep) {
676 /* Note that in the case of IRANGE=ALLRNG, we use the Gerschgorin */
677 /* shift which makes the matrix definite. So we should end up */
678 /* here really only in the case of IRANGE = VALRNG or INDRNG. */
679  if (idum == 5) {
680  if (sgndef == 1.) {
681 /* The fudged Gerschgorin shift should succeed */
682  sigma = gl - spdiam * 2. * eps * *n - *pivmin * 4.;
683  } else {
684  sigma = gu + spdiam * 2. * eps * *n + *pivmin * 4.;
685  }
686  } else {
687  sigma -= sgndef * tau;
688  tau *= 2.;
689  }
690  } else {
691 /* an initial RRR is found */
692  goto L83;
693  }
694 /* L80: */
695  }
696 /* if the program reaches this point, no base representation could be */
697 /* found in MAXTRY iterations. */
698  *info = 2;
699  return 0;
700 L83:
701 /* At this point, we have found an initial base representation */
702 /* T - SIGMA I = L D L^T with not too much element growth. */
703 /* Store the shift. */
704  e[iend] = sigma;
705 /* Store D and L. */
706  template_blas_copy(&in, &work[1], &c__1, &d__[ibegin], &c__1);
707  i__2 = in - 1;
708  template_blas_copy(&i__2, &work[in + 1], &c__1, &e[ibegin], &c__1);
709  if (mb > 1) {
710 
711 /* Perturb each entry of the base representation by a small */
712 /* (but random) relative amount to overcome difficulties with */
713 /* glued matrices. */
714 
715  for (i__ = 1; i__ <= 4; ++i__) {
716  iseed[i__ - 1] = 1;
717 /* L122: */
718  }
719  i__2 = (in << 1) - 1;
720  template_lapack_larnv(&c__2, iseed, &i__2, &work[1]);
721  i__2 = in - 1;
722  for (i__ = 1; i__ <= i__2; ++i__) {
723  d__[ibegin + i__ - 1] *= eps * 8. * work[i__] + 1.;
724  e[ibegin + i__ - 1] *= eps * 8. * work[in + i__] + 1.;
725 /* L125: */
726  }
727  d__[iend] *= eps * 4. * work[in] + 1.;
728 
729  }
730 
731 /* Don't update the Gerschgorin intervals because keeping track */
732 /* of the updates would be too much work in DLARRV. */
733 /* We update W instead and use it to locate the proper Gerschgorin */
734 /* intervals. */
735 /* Compute the required eigenvalues of L D L' by bisection or dqds */
736  if (! usedqd) {
737 /* If DLARRD has been used, shift the eigenvalue approximations */
738 /* according to their representation. This is necessary for */
739 /* a uniform DLARRV since dqds computes eigenvalues of the */
740 /* shifted representation. In DLARRV, W will always hold the */
741 /* UNshifted eigenvalue approximation. */
742  i__2 = wend;
743  for (j = wbegin; j <= i__2; ++j) {
744  w[j] -= sigma;
745  werr[j] += (d__1 = w[j], absMACRO(d__1)) * eps;
746 /* L134: */
747  }
748 /* call DLARRB to reduce eigenvalue error of the approximations */
749 /* from DLARRD */
750  i__2 = iend - 1;
751  for (i__ = ibegin; i__ <= i__2; ++i__) {
752 /* Computing 2nd power */
753  d__1 = e[i__];
754  work[i__] = d__[i__] * (d__1 * d__1);
755 /* L135: */
756  }
757 /* use bisection to find EV from INDL to INDU */
758  i__2 = indl - 1;
759  template_lapack_larrb(&in, &d__[ibegin], &work[ibegin], &indl, &indu, rtol1,
760  rtol2, &i__2, &w[wbegin], &wgap[wbegin], &werr[wbegin], &
761  work[(*n << 1) + 1], &iwork[1], pivmin, &spdiam, &in, &
762  iinfo);
763  if (iinfo != 0) {
764  *info = -4;
765  return 0;
766  }
767 /* DLARRB computes all gaps correctly except for the last one */
768 /* Record distance to VU/GU */
769 /* Computing MAX */
770  d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]);
771  wgap[wend] = maxMACRO(d__1,d__2);
772  i__2 = indu;
773  for (i__ = indl; i__ <= i__2; ++i__) {
774  ++(*m);
775  iblock[*m] = jblk;
776  indexw[*m] = i__;
777 /* L138: */
778  }
779  } else {
780 /* Call dqds to get all eigs (and then possibly delete unwanted */
781 /* eigenvalues). */
782 /* Note that dqds finds the eigenvalues of the L D L^T representation */
783 /* of T to high relative accuracy. High relative accuracy */
784 /* might be lost when the shift of the RRR is subtracted to obtain */
785 /* the eigenvalues of T. However, T is not guaranteed to define its */
786 /* eigenvalues to high relative accuracy anyway. */
787 /* Set RTOL to the order of the tolerance used in DLASQ2 */
788 /* This is an ESTIMATED error, the worst case bound is 4*N*EPS */
789 /* which is usually too large and requires unnecessary work to be */
790 /* done by bisection when computing the eigenvectors */
791  rtol = template_blas_log((Treal) in) * 4. * eps;
792  j = ibegin;
793  i__2 = in - 1;
794  for (i__ = 1; i__ <= i__2; ++i__) {
795  work[(i__ << 1) - 1] = (d__1 = d__[j], absMACRO(d__1));
796  work[i__ * 2] = e[j] * e[j] * work[(i__ << 1) - 1];
797  ++j;
798 /* L140: */
799  }
800  work[(in << 1) - 1] = (d__1 = d__[iend], absMACRO(d__1));
801  work[in * 2] = 0.;
802  template_lapack_lasq2(&in, &work[1], &iinfo);
803  if (iinfo != 0) {
804 /* If IINFO = -5 then an index is part of a tight cluster */
805 /* and should be changed. The index is in IWORK(1) and the */
806 /* gap is in WORK(N+1) */
807  *info = -5;
808  return 0;
809  } else {
810 /* Test that all eigenvalues are positive as expected */
811  i__2 = in;
812  for (i__ = 1; i__ <= i__2; ++i__) {
813  if (work[i__] < 0.) {
814  *info = -6;
815  return 0;
816  }
817 /* L149: */
818  }
819  }
820  if (sgndef > 0.) {
821  i__2 = indu;
822  for (i__ = indl; i__ <= i__2; ++i__) {
823  ++(*m);
824  w[*m] = work[in - i__ + 1];
825  iblock[*m] = jblk;
826  indexw[*m] = i__;
827 /* L150: */
828  }
829  } else {
830  i__2 = indu;
831  for (i__ = indl; i__ <= i__2; ++i__) {
832  ++(*m);
833  w[*m] = -work[i__];
834  iblock[*m] = jblk;
835  indexw[*m] = i__;
836 /* L160: */
837  }
838  }
839  i__2 = *m;
840  for (i__ = *m - mb + 1; i__ <= i__2; ++i__) {
841 /* the value of RTOL below should be the tolerance in DLASQ2 */
842  werr[i__] = rtol * (d__1 = w[i__], absMACRO(d__1));
843 /* L165: */
844  }
845  i__2 = *m - 1;
846  for (i__ = *m - mb + 1; i__ <= i__2; ++i__) {
847 /* compute the right gap between the intervals */
848 /* Computing MAX */
849  d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + werr[
850  i__]);
851  wgap[i__] = maxMACRO(d__1,d__2);
852 /* L166: */
853  }
854 /* Computing MAX */
855  d__1 = 0., d__2 = *vu - sigma - (w[*m] + werr[*m]);
856  wgap[*m] = maxMACRO(d__1,d__2);
857  }
858 /* proceed with next block */
859  ibegin = iend + 1;
860  wbegin = wend + 1;
861 L170:
862  ;
863  }
864 
865  return 0;
866 
867 /* end of DLARRE */
868 
869 } /* dlarre_ */
870 
871 #endif
static const real gu
Definition: fun-pz81.c:71
int template_lapack_lasq2(integer *n, Treal *z__, integer *info)
Definition: template_lapack_lasq2.h:43
#define absMACRO(x)
Definition: template_blas_common.h:45
int template_lapack_larrb(integer *n, Treal *d__, Treal *lld, integer *ifirst, integer *ilast, Treal *rtol1, Treal *rtol2, integer *offset, Treal *w, Treal *wgap, Treal *werr, Treal *work, integer *iwork, Treal *pivmin, Treal *spdiam, integer *twist, integer *info)
Definition: template_lapack_larrb.h:43
int template_lapack_larnv(const integer *idist, integer *iseed, const integer *n, Treal *x)
Definition: template_lapack_larnv.h:40
int template_lapack_larrk(integer *n, integer *iw, Treal *gl, Treal *gu, Treal *d__, Treal *e2, Treal *pivmin, Treal *reltol, Treal *w, Treal *werr, integer *info)
Definition: template_lapack_larrk.h:39
int integer
Definition: template_blas_common.h:38
int template_lapack_larre(const char *range, const integer *n, Treal *vl, Treal *vu, integer *il, integer *iu, Treal *d__, Treal *e, Treal *e2, Treal *rtol1, Treal *rtol2, Treal *spltol, integer *nsplit, integer *isplit, integer *m, Treal *w, Treal *werr, Treal *wgap, integer *iblock, integer *indexw, Treal *gers, Treal *pivmin, Treal *work, integer *iwork, integer *info)
Definition: template_lapack_larre.h:44
int template_lapack_larrc(const char *jobt, const integer *n, const Treal *vl, const Treal *vu, Treal *d__, Treal *e, Treal *pivmin, integer *eigcnt, integer *lcnt, integer *rcnt, integer *info)
Definition: template_lapack_larrc.h:39
#define maxMACRO(a, b)
Definition: template_blas_common.h:43
#define minMACRO(a, b)
Definition: template_blas_common.h:44
Treal template_blas_log(Treal x)
int template_lapack_larrd(const char *range, const char *order, const integer *n, Treal *vl, Treal *vu, integer *il, integer *iu, Treal *gers, Treal *reltol, Treal *d__, Treal *e, Treal *e2, Treal *pivmin, integer *nsplit, integer *isplit, integer *m, Treal *w, Treal *werr, Treal *wl, Treal *wu, integer *iblock, integer *indexw, Treal *work, integer *iwork, integer *info)
Definition: template_lapack_larrd.h:39
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:199
bool logical
Definition: template_blas_common.h:39
#define TRUE_
Definition: template_lapack_common.h:40
int template_blas_copy(const integer *n, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_copy.h:40
int template_lapack_larra(const integer *n, Treal *d__, Treal *e, Treal *e2, Treal *spltol, Treal *tnrm, integer *nsplit, integer *isplit, integer *info)
Definition: template_lapack_larra.h:39
#define FALSE_
Definition: template_lapack_common.h:41
Treal template_blas_sqrt(Treal x)
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:44