ergo
template_lapack_spgst.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_SPGST_HEADER
36 #define TEMPLATE_LAPACK_SPGST_HEADER
37 
38 #include "template_lapack_common.h"
39 
40 template<class Treal>
41 int template_lapack_spgst(const integer *itype, const char *uplo, const integer *n,
42  Treal *ap, const Treal *bp, integer *info)
43 {
44 /* -- LAPACK routine (version 3.0) --
45  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
46  Courant Institute, Argonne National Lab, and Rice University
47  March 31, 1993
48 
49 
50  Purpose
51  =======
52 
53  DSPGST reduces a real symmetric-definite generalized eigenproblem
54  to standard form, using packed storage.
55 
56  If ITYPE = 1, the problem is A*x = lambda*B*x,
57  and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
58 
59  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
60  B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.
61 
62  B must have been previously factorized as U**T*U or L*L**T by DPPTRF.
63 
64  Arguments
65  =========
66 
67  ITYPE (input) INTEGER
68  = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T);
69  = 2 or 3: compute U*A*U**T or L**T*A*L.
70 
71  UPLO (input) CHARACTER
72  = 'U': Upper triangle of A is stored and B is factored as
73  U**T*U;
74  = 'L': Lower triangle of A is stored and B is factored as
75  L*L**T.
76 
77  N (input) INTEGER
78  The order of the matrices A and B. N >= 0.
79 
80  AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
81  On entry, the upper or lower triangle of the symmetric matrix
82  A, packed columnwise in a linear array. The j-th column of A
83  is stored in the array AP as follows:
84  if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
85  if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
86 
87  On exit, if INFO = 0, the transformed matrix, stored in the
88  same format as A.
89 
90  BP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
91  The triangular factor from the Cholesky factorization of B,
92  stored in the same format as A, as returned by DPPTRF.
93 
94  INFO (output) INTEGER
95  = 0: successful exit
96  < 0: if INFO = -i, the i-th argument had an illegal value
97 
98  =====================================================================
99 
100 
101  Test the input parameters.
102 
103  Parameter adjustments */
104  /* Table of constant values */
105  integer c__1 = 1;
106  Treal c_b9 = -1.;
107  Treal c_b11 = 1.;
108 
109  /* System generated locals */
110  integer i__1, i__2;
111  Treal d__1;
112  /* Local variables */
113  integer j, k;
114  logical upper;
115  integer j1, k1;
116  integer jj, kk;
117  Treal ct;
118  Treal ajj;
119  integer j1j1;
120  Treal akk;
121  integer k1k1;
122  Treal bjj, bkk;
123 
124 
125  --bp;
126  --ap;
127 
128  /* Function Body */
129  *info = 0;
130  upper = template_blas_lsame(uplo, "U");
131  if (*itype < 1 || *itype > 3) {
132  *info = -1;
133  } else if (! upper && ! template_blas_lsame(uplo, "L")) {
134  *info = -2;
135  } else if (*n < 0) {
136  *info = -3;
137  }
138  if (*info != 0) {
139  i__1 = -(*info);
140  template_blas_erbla("SPGST ", &i__1);
141  return 0;
142  }
143 
144  if (*itype == 1) {
145  if (upper) {
146 
147 /* Compute inv(U')*A*inv(U)
148 
149  J1 and JJ are the indices of A(1,j) and A(j,j) */
150 
151  jj = 0;
152  i__1 = *n;
153  for (j = 1; j <= i__1; ++j) {
154  j1 = jj + 1;
155  jj += j;
156 
157 /* Compute the j-th column of the upper triangle of A */
158 
159  bjj = bp[jj];
160  template_blas_tpsv(uplo, "Transpose", "Nonunit", &j, &bp[1], &ap[j1], &
161  c__1);
162  i__2 = j - 1;
163  template_blas_spmv(uplo, &i__2, &c_b9, &ap[1], &bp[j1], &c__1, &c_b11, &
164  ap[j1], &c__1);
165  i__2 = j - 1;
166  d__1 = 1. / bjj;
167  template_blas_scal(&i__2, &d__1, &ap[j1], &c__1);
168  i__2 = j - 1;
169  ap[jj] = (ap[jj] - template_blas_dot(&i__2, &ap[j1], &c__1, &bp[j1], &
170  c__1)) / bjj;
171 /* L10: */
172  }
173  } else {
174 
175 /* Compute inv(L)*A*inv(L')
176 
177  KK and K1K1 are the indices of A(k,k) and A(k+1,k+1) */
178 
179  kk = 1;
180  i__1 = *n;
181  for (k = 1; k <= i__1; ++k) {
182  k1k1 = kk + *n - k + 1;
183 
184 /* Update the lower triangle of A(k:n,k:n) */
185 
186  akk = ap[kk];
187  bkk = bp[kk];
188 /* Computing 2nd power */
189  d__1 = bkk;
190  akk /= d__1 * d__1;
191  ap[kk] = akk;
192  if (k < *n) {
193  i__2 = *n - k;
194  d__1 = 1. / bkk;
195  template_blas_scal(&i__2, &d__1, &ap[kk + 1], &c__1);
196  ct = akk * -.5;
197  i__2 = *n - k;
198  template_blas_axpy(&i__2, &ct, &bp[kk + 1], &c__1, &ap[kk + 1], &c__1)
199  ;
200  i__2 = *n - k;
201  template_blas_spr2(uplo, &i__2, &c_b9, &ap[kk + 1], &c__1, &bp[kk + 1]
202  , &c__1, &ap[k1k1]);
203  i__2 = *n - k;
204  template_blas_axpy(&i__2, &ct, &bp[kk + 1], &c__1, &ap[kk + 1], &c__1)
205  ;
206  i__2 = *n - k;
207  template_blas_tpsv(uplo, "No transpose", "Non-unit", &i__2, &bp[k1k1],
208  &ap[kk + 1], &c__1);
209  }
210  kk = k1k1;
211 /* L20: */
212  }
213  }
214  } else {
215  if (upper) {
216 
217 /* Compute U*A*U'
218 
219  K1 and KK are the indices of A(1,k) and A(k,k) */
220 
221  kk = 0;
222  i__1 = *n;
223  for (k = 1; k <= i__1; ++k) {
224  k1 = kk + 1;
225  kk += k;
226 
227 /* Update the upper triangle of A(1:k,1:k) */
228 
229  akk = ap[kk];
230  bkk = bp[kk];
231  i__2 = k - 1;
232  template_blas_tpmv(uplo, "No transpose", "Non-unit", &i__2, &bp[1], &ap[
233  k1], &c__1);
234  ct = akk * .5;
235  i__2 = k - 1;
236  template_blas_axpy(&i__2, &ct, &bp[k1], &c__1, &ap[k1], &c__1);
237  i__2 = k - 1;
238  template_blas_spr2(uplo, &i__2, &c_b11, &ap[k1], &c__1, &bp[k1], &c__1, &
239  ap[1]);
240  i__2 = k - 1;
241  template_blas_axpy(&i__2, &ct, &bp[k1], &c__1, &ap[k1], &c__1);
242  i__2 = k - 1;
243  template_blas_scal(&i__2, &bkk, &ap[k1], &c__1);
244 /* Computing 2nd power */
245  d__1 = bkk;
246  ap[kk] = akk * (d__1 * d__1);
247 /* L30: */
248  }
249  } else {
250 
251 /* Compute L'*A*L
252 
253  JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1) */
254 
255  jj = 1;
256  i__1 = *n;
257  for (j = 1; j <= i__1; ++j) {
258  j1j1 = jj + *n - j + 1;
259 
260 /* Compute the j-th column of the lower triangle of A */
261 
262  ajj = ap[jj];
263  bjj = bp[jj];
264  i__2 = *n - j;
265  ap[jj] = ajj * bjj + template_blas_dot(&i__2, &ap[jj + 1], &c__1, &bp[jj
266  + 1], &c__1);
267  i__2 = *n - j;
268  template_blas_scal(&i__2, &bjj, &ap[jj + 1], &c__1);
269  i__2 = *n - j;
270  template_blas_spmv(uplo, &i__2, &c_b11, &ap[j1j1], &bp[jj + 1], &c__1, &
271  c_b11, &ap[jj + 1], &c__1);
272  i__2 = *n - j + 1;
273  template_blas_tpmv(uplo, "Transpose", "Non-unit", &i__2, &bp[jj], &ap[jj],
274  &c__1);
275  jj = j1j1;
276 /* L40: */
277  }
278  }
279  }
280  return 0;
281 
282 /* End of DSPGST */
283 
284 } /* dspgst_ */
285 
286 #endif