ergo
template_lapack_sygst.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_SYGST_HEADER
36 #define TEMPLATE_LAPACK_SYGST_HEADER
37 
38 #include "template_lapack_sygs2.h"
39 
40 template<class Treal>
41 int template_lapack_sygst(const integer *itype, const char *uplo, const integer *n,
42  Treal *a, const integer *lda, Treal *b, const integer *ldb, integer *
43  info)
44 {
45 /* -- LAPACK routine (version 3.0) --
46  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
47  Courant Institute, Argonne National Lab, and Rice University
48  September 30, 1994
49 
50 
51  Purpose
52  =======
53 
54  DSYGST reduces a real symmetric-definite generalized eigenproblem
55  to standard form.
56 
57  If ITYPE = 1, the problem is A*x = lambda*B*x,
58  and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
59 
60  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
61  B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.
62 
63  B must have been previously factorized as U**T*U or L*L**T by DPOTRF.
64 
65  Arguments
66  =========
67 
68  ITYPE (input) INTEGER
69  = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T);
70  = 2 or 3: compute U*A*U**T or L**T*A*L.
71 
72  UPLO (input) CHARACTER
73  = 'U': Upper triangle of A is stored and B is factored as
74  U**T*U;
75  = 'L': Lower triangle of A is stored and B is factored as
76  L*L**T.
77 
78  N (input) INTEGER
79  The order of the matrices A and B. N >= 0.
80 
81  A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
82  On entry, the symmetric matrix A. If UPLO = 'U', the leading
83  N-by-N upper triangular part of A contains the upper
84  triangular part of the matrix A, and the strictly lower
85  triangular part of A is not referenced. If UPLO = 'L', the
86  leading N-by-N lower triangular part of A contains the lower
87  triangular part of the matrix A, and the strictly upper
88  triangular part of A is not referenced.
89 
90  On exit, if INFO = 0, the transformed matrix, stored in the
91  same format as A.
92 
93  LDA (input) INTEGER
94  The leading dimension of the array A. LDA >= max(1,N).
95 
96  B (input) DOUBLE PRECISION array, dimension (LDB,N)
97  The triangular factor from the Cholesky factorization of B,
98  as returned by DPOTRF.
99 
100  LDB (input) INTEGER
101  The leading dimension of the array B. LDB >= max(1,N).
102 
103  INFO (output) INTEGER
104  = 0: successful exit
105  < 0: if INFO = -i, the i-th argument had an illegal value
106 
107  =====================================================================
108 
109 
110  Test the input parameters.
111 
112  Parameter adjustments */
113  /* Table of constant values */
114  integer c__1 = 1;
115  integer c_n1 = -1;
116  Treal c_b14 = 1.;
117  Treal c_b16 = -.5;
118  Treal c_b19 = -1.;
119  Treal c_b52 = .5;
120 
121  /* System generated locals */
122  integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
123  /* Local variables */
124  integer k;
125  logical upper;
126  integer kb;
127  integer nb;
128 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
129 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
130 
131 
132  a_dim1 = *lda;
133  a_offset = 1 + a_dim1 * 1;
134  a -= a_offset;
135  b_dim1 = *ldb;
136  b_offset = 1 + b_dim1 * 1;
137  b -= b_offset;
138 
139  /* Function Body */
140  *info = 0;
141  upper = template_blas_lsame(uplo, "U");
142  if (*itype < 1 || *itype > 3) {
143  *info = -1;
144  } else if (! upper && ! template_blas_lsame(uplo, "L")) {
145  *info = -2;
146  } else if (*n < 0) {
147  *info = -3;
148  } else if (*lda < maxMACRO(1,*n)) {
149  *info = -5;
150  } else if (*ldb < maxMACRO(1,*n)) {
151  *info = -7;
152  }
153  if (*info != 0) {
154  i__1 = -(*info);
155  template_blas_erbla("SYGST ", &i__1);
156  return 0;
157  }
158 
159 /* Quick return if possible */
160 
161  if (*n == 0) {
162  return 0;
163  }
164 
165 /* Determine the block size for this environment. */
166 
167  nb = template_lapack_ilaenv(&c__1, "DSYGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
168  ftnlen)1);
169 
170  if (nb <= 1 || nb >= *n) {
171 
172 /* Use unblocked code */
173 
174  template_lapack_sygs2(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
175  } else {
176 
177 /* Use blocked code */
178 
179  if (*itype == 1) {
180  if (upper) {
181 
182 /* Compute inv(U')*A*inv(U) */
183 
184  i__1 = *n;
185  i__2 = nb;
186  for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
187 /* Computing MIN */
188  i__3 = *n - k + 1;
189  kb = minMACRO(i__3,nb);
190 
191 /* Update the upper triangle of A(k:n,k:n) */
192 
193  template_lapack_sygs2(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k),
194  ldb, info);
195  if (k + kb <= *n) {
196  i__3 = *n - k - kb + 1;
197  template_blas_trsm("Left", uplo, "Transpose", "Non-unit", &kb, &
198  i__3, &c_b14, &b_ref(k, k), ldb, &a_ref(k, k
199  + kb), lda);
200  i__3 = *n - k - kb + 1;
201  template_blas_symm("Left", uplo, &kb, &i__3, &c_b16, &a_ref(k, k),
202  lda, &b_ref(k, k + kb), ldb, &c_b14, &a_ref(
203  k, k + kb), lda);
204  i__3 = *n - k - kb + 1;
205  template_blas_syr2k(uplo, "Transpose", &i__3, &kb, &c_b19, &a_ref(
206  k, k + kb), lda, &b_ref(k, k + kb), ldb, &
207  c_b14, &a_ref(k + kb, k + kb), lda);
208  i__3 = *n - k - kb + 1;
209  template_blas_symm("Left", uplo, &kb, &i__3, &c_b16, &a_ref(k, k),
210  lda, &b_ref(k, k + kb), ldb, &c_b14, &a_ref(
211  k, k + kb), lda);
212  i__3 = *n - k - kb + 1;
213  template_blas_trsm("Right", uplo, "No transpose", "Non-unit", &kb,
214  &i__3, &c_b14, &b_ref(k + kb, k + kb), ldb, &
215  a_ref(k, k + kb), lda);
216  }
217 /* L10: */
218  }
219  } else {
220 
221 /* Compute inv(L)*A*inv(L') */
222 
223  i__2 = *n;
224  i__1 = nb;
225  for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
226 /* Computing MIN */
227  i__3 = *n - k + 1;
228  kb = minMACRO(i__3,nb);
229 
230 /* Update the lower triangle of A(k:n,k:n) */
231 
232  template_lapack_sygs2(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k),
233  ldb, info);
234  if (k + kb <= *n) {
235  i__3 = *n - k - kb + 1;
236  template_blas_trsm("Right", uplo, "Transpose", "Non-unit", &i__3,
237  &kb, &c_b14, &b_ref(k, k), ldb, &a_ref(k + kb,
238  k), lda);
239  i__3 = *n - k - kb + 1;
240  template_blas_symm("Right", uplo, &i__3, &kb, &c_b16, &a_ref(k, k)
241  , lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref(
242  k + kb, k), lda);
243  i__3 = *n - k - kb + 1;
244  template_blas_syr2k(uplo, "No transpose", &i__3, &kb, &c_b19, &
245  a_ref(k + kb, k), lda, &b_ref(k + kb, k), ldb,
246  &c_b14, &a_ref(k + kb, k + kb), lda);
247  i__3 = *n - k - kb + 1;
248  template_blas_symm("Right", uplo, &i__3, &kb, &c_b16, &a_ref(k, k)
249  , lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref(
250  k + kb, k), lda);
251  i__3 = *n - k - kb + 1;
252  template_blas_trsm("Left", uplo, "No transpose", "Non-unit", &
253  i__3, &kb, &c_b14, &b_ref(k + kb, k + kb),
254  ldb, &a_ref(k + kb, k), lda);
255  }
256 /* L20: */
257  }
258  }
259  } else {
260  if (upper) {
261 
262 /* Compute U*A*U' */
263 
264  i__1 = *n;
265  i__2 = nb;
266  for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
267 /* Computing MIN */
268  i__3 = *n - k + 1;
269  kb = minMACRO(i__3,nb);
270 
271 /* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */
272 
273  i__3 = k - 1;
274  template_blas_trmm("Left", uplo, "No transpose", "Non-unit", &i__3, &
275  kb, &c_b14, &b[b_offset], ldb, &a_ref(1, k), lda);
276  i__3 = k - 1;
277  template_blas_symm("Right", uplo, &i__3, &kb, &c_b52, &a_ref(k, k),
278  lda, &b_ref(1, k), ldb, &c_b14, &a_ref(1, k), lda);
279  i__3 = k - 1;
280  template_blas_syr2k(uplo, "No transpose", &i__3, &kb, &c_b14, &a_ref(
281  1, k), lda, &b_ref(1, k), ldb, &c_b14, &a[
282  a_offset], lda);
283  i__3 = k - 1;
284  template_blas_symm("Right", uplo, &i__3, &kb, &c_b52, &a_ref(k, k),
285  lda, &b_ref(1, k), ldb, &c_b14, &a_ref(1, k), lda);
286  i__3 = k - 1;
287  template_blas_trmm("Right", uplo, "Transpose", "Non-unit", &i__3, &kb,
288  &c_b14, &b_ref(k, k), ldb, &a_ref(1, k), lda);
289  template_lapack_sygs2(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k),
290  ldb, info);
291 /* L30: */
292  }
293  } else {
294 
295 /* Compute L'*A*L */
296 
297  i__2 = *n;
298  i__1 = nb;
299  for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
300 /* Computing MIN */
301  i__3 = *n - k + 1;
302  kb = minMACRO(i__3,nb);
303 
304 /* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */
305 
306  i__3 = k - 1;
307  template_blas_trmm("Right", uplo, "No transpose", "Non-unit", &kb, &
308  i__3, &c_b14, &b[b_offset], ldb, &a_ref(k, 1),
309  lda);
310  i__3 = k - 1;
311  template_blas_symm("Left", uplo, &kb, &i__3, &c_b52, &a_ref(k, k),
312  lda, &b_ref(k, 1), ldb, &c_b14, &a_ref(k, 1), lda);
313  i__3 = k - 1;
314  template_blas_syr2k(uplo, "Transpose", &i__3, &kb, &c_b14, &a_ref(k,
315  1), lda, &b_ref(k, 1), ldb, &c_b14, &a[a_offset],
316  lda);
317  i__3 = k - 1;
318  template_blas_symm("Left", uplo, &kb, &i__3, &c_b52, &a_ref(k, k),
319  lda, &b_ref(k, 1), ldb, &c_b14, &a_ref(k, 1), lda);
320  i__3 = k - 1;
321  template_blas_trmm("Left", uplo, "Transpose", "Non-unit", &kb, &i__3,
322  &c_b14, &b_ref(k, k), ldb, &a_ref(k, 1), lda);
323  template_lapack_sygs2(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k),
324  ldb, info);
325 /* L40: */
326  }
327  }
328  }
329  }
330  return 0;
331 
332 /* End of DSYGST */
333 
334 } /* dsygst_ */
335 
336 #undef b_ref
337 #undef a_ref
338 
339 
340 #endif