ergo
template_lapack_orgql.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_ORGQL_HEADER
36 #define TEMPLATE_LAPACK_ORGQL_HEADER
37 
38 
39 template<class Treal>
40 int template_lapack_orgql(const integer *m, const integer *n, const integer *k, Treal *
41  a, const integer *lda, const Treal *tau, Treal *work, const integer *lwork,
42  integer *info)
43 {
44 /* -- LAPACK routine (version 3.0) --
45  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
46  Courant Institute, Argonne National Lab, and Rice University
47  June 30, 1999
48 
49 
50  Purpose
51  =======
52 
53  DORGQL generates an M-by-N real matrix Q with orthonormal columns,
54  which is defined as the last N columns of a product of K elementary
55  reflectors of order M
56 
57  Q = H(k) . . . H(2) H(1)
58 
59  as returned by DGEQLF.
60 
61  Arguments
62  =========
63 
64  M (input) INTEGER
65  The number of rows of the matrix Q. M >= 0.
66 
67  N (input) INTEGER
68  The number of columns of the matrix Q. M >= N >= 0.
69 
70  K (input) INTEGER
71  The number of elementary reflectors whose product defines the
72  matrix Q. N >= K >= 0.
73 
74  A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
75  On entry, the (n-k+i)-th column must contain the vector which
76  defines the elementary reflector H(i), for i = 1,2,...,k, as
77  returned by DGEQLF in the last k columns of its array
78  argument A.
79  On exit, the M-by-N matrix Q.
80 
81  LDA (input) INTEGER
82  The first dimension of the array A. LDA >= max(1,M).
83 
84  TAU (input) DOUBLE PRECISION array, dimension (K)
85  TAU(i) must contain the scalar factor of the elementary
86  reflector H(i), as returned by DGEQLF.
87 
88  WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
89  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
90 
91  LWORK (input) INTEGER
92  The dimension of the array WORK. LWORK >= max(1,N).
93  For optimum performance LWORK >= N*NB, where NB is the
94  optimal blocksize.
95 
96  If LWORK = -1, then a workspace query is assumed; the routine
97  only calculates the optimal size of the WORK array, returns
98  this value as the first entry of the WORK array, and no error
99  message related to LWORK is issued by XERBLA.
100 
101  INFO (output) INTEGER
102  = 0: successful exit
103  < 0: if INFO = -i, the i-th argument has an illegal value
104 
105  =====================================================================
106 
107 
108  Test the input arguments
109 
110  Parameter adjustments */
111  /* Table of constant values */
112  integer c__1 = 1;
113  integer c_n1 = -1;
114  integer c__3 = 3;
115  integer c__2 = 2;
116 
117  /* System generated locals */
118  integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
119  /* Local variables */
120  integer i__, j, l, nbmin, iinfo;
121  integer ib, nb, kk;
122  integer nx;
123  integer ldwork, lwkopt;
124  logical lquery;
125  integer iws;
126 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
127 
128 
129  a_dim1 = *lda;
130  a_offset = 1 + a_dim1 * 1;
131  a -= a_offset;
132  --tau;
133  --work;
134 
135  /* Function Body */
136  *info = 0;
137  nb = template_lapack_ilaenv(&c__1, "DORGQL", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
138  lwkopt = maxMACRO(1,*n) * nb;
139  work[1] = (Treal) lwkopt;
140  lquery = *lwork == -1;
141  if (*m < 0) {
142  *info = -1;
143  } else if (*n < 0 || *n > *m) {
144  *info = -2;
145  } else if (*k < 0 || *k > *n) {
146  *info = -3;
147  } else if (*lda < maxMACRO(1,*m)) {
148  *info = -5;
149  } else if (*lwork < maxMACRO(1,*n) && ! lquery) {
150  *info = -8;
151  }
152  if (*info != 0) {
153  i__1 = -(*info);
154  template_blas_erbla("ORGQL ", &i__1);
155  return 0;
156  } else if (lquery) {
157  return 0;
158  }
159 
160 /* Quick return if possible */
161 
162  if (*n <= 0) {
163  work[1] = 1.;
164  return 0;
165  }
166 
167  nbmin = 2;
168  nx = 0;
169  iws = *n;
170  if (nb > 1 && nb < *k) {
171 
172 /* Determine when to cross over from blocked to unblocked code.
173 
174  Computing MAX */
175  i__1 = 0, i__2 = template_lapack_ilaenv(&c__3, "DORGQL", " ", m, n, k, &c_n1, (
176  ftnlen)6, (ftnlen)1);
177  nx = maxMACRO(i__1,i__2);
178  if (nx < *k) {
179 
180 /* Determine if workspace is large enough for blocked code. */
181 
182  ldwork = *n;
183  iws = ldwork * nb;
184  if (*lwork < iws) {
185 
186 /* Not enough workspace to use optimal NB: reduce NB and
187  determine the minimum value of NB. */
188 
189  nb = *lwork / ldwork;
190 /* Computing MAX */
191  i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DORGQL", " ", m, n, k, &c_n1,
192  (ftnlen)6, (ftnlen)1);
193  nbmin = maxMACRO(i__1,i__2);
194  }
195  }
196  }
197 
198  if (nb >= nbmin && nb < *k && nx < *k) {
199 
200 /* Use blocked code after the first block.
201  The last kk columns are handled by the block method.
202 
203  Computing MIN */
204  i__1 = *k, i__2 = (*k - nx + nb - 1) / nb * nb;
205  kk = minMACRO(i__1,i__2);
206 
207 /* Set A(m-kk+1:m,1:n-kk) to zero. */
208 
209  i__1 = *n - kk;
210  for (j = 1; j <= i__1; ++j) {
211  i__2 = *m;
212  for (i__ = *m - kk + 1; i__ <= i__2; ++i__) {
213  a_ref(i__, j) = 0.;
214 /* L10: */
215  }
216 /* L20: */
217  }
218  } else {
219  kk = 0;
220  }
221 
222 /* Use unblocked code for the first or only block. */
223 
224  i__1 = *m - kk;
225  i__2 = *n - kk;
226  i__3 = *k - kk;
227  template_lapack_org2l(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], &iinfo)
228  ;
229 
230  if (kk > 0) {
231 
232 /* Use blocked code */
233 
234  i__1 = *k;
235  i__2 = nb;
236  for (i__ = *k - kk + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
237  i__2) {
238 /* Computing MIN */
239  i__3 = nb, i__4 = *k - i__ + 1;
240  ib = minMACRO(i__3,i__4);
241  if (*n - *k + i__ > 1) {
242 
243 /* Form the triangular factor of the block reflector
244  H = H(i+ib-1) . . . H(i+1) H(i) */
245 
246  i__3 = *m - *k + i__ + ib - 1;
247  template_lapack_larft("Backward", "Columnwise", &i__3, &ib, &a_ref(1, *n - *
248  k + i__), lda, &tau[i__], &work[1], &ldwork);
249 
250 /* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */
251 
252  i__3 = *m - *k + i__ + ib - 1;
253  i__4 = *n - *k + i__ - 1;
254  template_lapack_larfb("Left", "No transpose", "Backward", "Columnwise", &
255  i__3, &i__4, &ib, &a_ref(1, *n - *k + i__), lda, &
256  work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], &
257  ldwork);
258  }
259 
260 /* Apply H to rows 1:m-k+i+ib-1 of current block */
261 
262  i__3 = *m - *k + i__ + ib - 1;
263  template_lapack_org2l(&i__3, &ib, &ib, &a_ref(1, *n - *k + i__), lda, &tau[i__],
264  &work[1], &iinfo);
265 
266 /* Set rows m-k+i+ib:m of current block to zero */
267 
268  i__3 = *n - *k + i__ + ib - 1;
269  for (j = *n - *k + i__; j <= i__3; ++j) {
270  i__4 = *m;
271  for (l = *m - *k + i__ + ib; l <= i__4; ++l) {
272  a_ref(l, j) = 0.;
273 /* L30: */
274  }
275 /* L40: */
276  }
277 /* L50: */
278  }
279  }
280 
281  work[1] = (Treal) iws;
282  return 0;
283 
284 /* End of DORGQL */
285 
286 } /* dorgql_ */
287 
288 #undef a_ref
289 
290 
291 #endif