ergo
template_lapack_lasq5.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LASQ5_HEADER
36 #define TEMPLATE_LAPACK_LASQ5_HEADER
37 
38 template<class Treal>
39 int template_lapack_lasq5(integer *i0, integer *n0, Treal *z__,
40  integer *pp, Treal *tau, Treal *dmin__, Treal *dmin1,
41  Treal *dmin2, Treal *dn, Treal *dnm1, Treal *dnm2,
42  logical *ieee)
43 {
44  /* System generated locals */
45  integer i__1;
46  Treal d__1, d__2;
47 
48  /* Local variables */
49  Treal d__;
50  integer j4, j4p2;
51  Treal emin, temp;
52 
53 
54 /* -- LAPACK routine (version 3.2) -- */
55 
56 /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */
57 /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */
58 /* -- Berkeley -- */
59 /* -- November 2008 -- */
60 
61 /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
62 /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
63 
64 /* .. Scalar Arguments .. */
65 /* .. */
66 /* .. Array Arguments .. */
67 /* .. */
68 
69 /* Purpose */
70 /* ======= */
71 
72 /* DLASQ5 computes one dqds transform in ping-pong form, one */
73 /* version for IEEE machines another for non IEEE machines. */
74 
75 /* Arguments */
76 /* ========= */
77 
78 /* I0 (input) INTEGER */
79 /* First index. */
80 
81 /* N0 (input) INTEGER */
82 /* Last index. */
83 
84 /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */
85 /* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */
86 /* an extra argument. */
87 
88 /* PP (input) INTEGER */
89 /* PP=0 for ping, PP=1 for pong. */
90 
91 /* TAU (input) DOUBLE PRECISION */
92 /* This is the shift. */
93 
94 /* DMIN (output) DOUBLE PRECISION */
95 /* Minimum value of d. */
96 
97 /* DMIN1 (output) DOUBLE PRECISION */
98 /* Minimum value of d, excluding D( N0 ). */
99 
100 /* DMIN2 (output) DOUBLE PRECISION */
101 /* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */
102 
103 /* DN (output) DOUBLE PRECISION */
104 /* d(N0), the last value of d. */
105 
106 /* DNM1 (output) DOUBLE PRECISION */
107 /* d(N0-1). */
108 
109 /* DNM2 (output) DOUBLE PRECISION */
110 /* d(N0-2). */
111 
112 /* IEEE (input) LOGICAL */
113 /* Flag for IEEE or non IEEE arithmetic. */
114 
115 /* ===================================================================== */
116 
117 /* .. Parameter .. */
118 /* .. */
119 /* .. Local Scalars .. */
120 /* .. */
121 /* .. Intrinsic Functions .. */
122 /* .. */
123 /* .. Executable Statements .. */
124 
125  /* Parameter adjustments */
126  --z__;
127 
128  /* Function Body */
129  if (*n0 - *i0 - 1 <= 0) {
130  return 0;
131  }
132 
133  j4 = (*i0 << 2) + *pp - 3;
134  emin = z__[j4 + 4];
135  d__ = z__[j4] - *tau;
136  *dmin__ = d__;
137  *dmin1 = -z__[j4];
138 
139  if (*ieee) {
140 
141 /* Code for IEEE arithmetic. */
142 
143  if (*pp == 0) {
144  i__1 = ( *n0 - 3 ) << 2;
145  for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
146  z__[j4 - 2] = d__ + z__[j4 - 1];
147  temp = z__[j4 + 1] / z__[j4 - 2];
148  d__ = d__ * temp - *tau;
149  *dmin__ = minMACRO(*dmin__,d__);
150  z__[j4] = z__[j4 - 1] * temp;
151 /* Computing MIN */
152  d__1 = z__[j4];
153  emin = minMACRO(d__1,emin);
154 /* L10: */
155  }
156  } else {
157  i__1 = ( *n0 - 3 ) << 2;
158  for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
159  z__[j4 - 3] = d__ + z__[j4];
160  temp = z__[j4 + 2] / z__[j4 - 3];
161  d__ = d__ * temp - *tau;
162  *dmin__ = minMACRO(*dmin__,d__);
163  z__[j4 - 1] = z__[j4] * temp;
164 /* Computing MIN */
165  d__1 = z__[j4 - 1];
166  emin = minMACRO(d__1,emin);
167 /* L20: */
168  }
169  }
170 
171 /* Unroll last two steps. */
172 
173  *dnm2 = d__;
174  *dmin2 = *dmin__;
175  j4 = ( ( *n0 - 2 ) << 2) - *pp;
176  j4p2 = j4 + (*pp << 1) - 1;
177  z__[j4 - 2] = *dnm2 + z__[j4p2];
178  z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
179  *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
180  *dmin__ = minMACRO(*dmin__,*dnm1);
181 
182  *dmin1 = *dmin__;
183  j4 += 4;
184  j4p2 = j4 + (*pp << 1) - 1;
185  z__[j4 - 2] = *dnm1 + z__[j4p2];
186  z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
187  *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
188  *dmin__ = minMACRO(*dmin__,*dn);
189 
190  } else {
191 
192 /* Code for non IEEE arithmetic. */
193 
194  if (*pp == 0) {
195  i__1 = ( *n0 - 3 ) << 2;
196  for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
197  z__[j4 - 2] = d__ + z__[j4 - 1];
198  if (d__ < 0.) {
199  return 0;
200  } else {
201  z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
202  d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau;
203  }
204  *dmin__ = minMACRO(*dmin__,d__);
205 /* Computing MIN */
206  d__1 = emin, d__2 = z__[j4];
207  emin = minMACRO(d__1,d__2);
208 /* L30: */
209  }
210  } else {
211  i__1 = ( *n0 - 3 ) << 2;
212  for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
213  z__[j4 - 3] = d__ + z__[j4];
214  if (d__ < 0.) {
215  return 0;
216  } else {
217  z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
218  d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau;
219  }
220  *dmin__ = minMACRO(*dmin__,d__);
221 /* Computing MIN */
222  d__1 = emin, d__2 = z__[j4 - 1];
223  emin = minMACRO(d__1,d__2);
224 /* L40: */
225  }
226  }
227 
228 /* Unroll last two steps. */
229 
230  *dnm2 = d__;
231  *dmin2 = *dmin__;
232  j4 = ( ( *n0 - 2 ) << 2) - *pp;
233  j4p2 = j4 + (*pp << 1) - 1;
234  z__[j4 - 2] = *dnm2 + z__[j4p2];
235  if (*dnm2 < 0.) {
236  return 0;
237  } else {
238  z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
239  *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
240  }
241  *dmin__ = minMACRO(*dmin__,*dnm1);
242 
243  *dmin1 = *dmin__;
244  j4 += 4;
245  j4p2 = j4 + (*pp << 1) - 1;
246  z__[j4 - 2] = *dnm1 + z__[j4p2];
247  if (*dnm1 < 0.) {
248  return 0;
249  } else {
250  z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
251  *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
252  }
253  *dmin__ = minMACRO(*dmin__,*dn);
254 
255  }
256 
257  z__[j4 + 2] = *dn;
258  z__[(*n0 << 2) - *pp] = emin;
259  return 0;
260 
261 /* End of DLASQ5 */
262 
263 } /* dlasq5_ */
264 
265 #endif