ergo
template_lapack_ormqr.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_ORMQR_HEADER
36 #define TEMPLATE_LAPACK_ORMQR_HEADER
37 
38 
39 template<class Treal>
40 int template_lapack_ormqr(char *side, char *trans, const integer *m, const integer *n,
41  const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal *
42  c__, const integer *ldc, Treal *work, const integer *lwork, integer *info)
43 {
44 /* -- LAPACK routine (version 3.0) --
45  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
46  Courant Institute, Argonne National Lab, and Rice University
47  June 30, 1999
48 
49 
50  Purpose
51  =======
52 
53  DORMQR overwrites the general real M-by-N matrix C with
54 
55  SIDE = 'L' SIDE = 'R'
56  TRANS = 'N': Q * C C * Q
57  TRANS = 'T': Q**T * C C * Q**T
58 
59  where Q is a real orthogonal matrix defined as the product of k
60  elementary reflectors
61 
62  Q = H(1) H(2) . . . H(k)
63 
64  as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
65  if SIDE = 'R'.
66 
67  Arguments
68  =========
69 
70  SIDE (input) CHARACTER*1
71  = 'L': apply Q or Q**T from the Left;
72  = 'R': apply Q or Q**T from the Right.
73 
74  TRANS (input) CHARACTER*1
75  = 'N': No transpose, apply Q;
76  = 'T': Transpose, apply Q**T.
77 
78  M (input) INTEGER
79  The number of rows of the matrix C. M >= 0.
80 
81  N (input) INTEGER
82  The number of columns of the matrix C. N >= 0.
83 
84  K (input) INTEGER
85  The number of elementary reflectors whose product defines
86  the matrix Q.
87  If SIDE = 'L', M >= K >= 0;
88  if SIDE = 'R', N >= K >= 0.
89 
90  A (input) DOUBLE PRECISION array, dimension (LDA,K)
91  The i-th column must contain the vector which defines the
92  elementary reflector H(i), for i = 1,2,...,k, as returned by
93  DGEQRF in the first k columns of its array argument A.
94  A is modified by the routine but restored on exit.
95 
96  LDA (input) INTEGER
97  The leading dimension of the array A.
98  If SIDE = 'L', LDA >= max(1,M);
99  if SIDE = 'R', LDA >= max(1,N).
100 
101  TAU (input) DOUBLE PRECISION array, dimension (K)
102  TAU(i) must contain the scalar factor of the elementary
103  reflector H(i), as returned by DGEQRF.
104 
105  C (input/output) DOUBLE PRECISION array, dimension (LDC,N)
106  On entry, the M-by-N matrix C.
107  On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
108 
109  LDC (input) INTEGER
110  The leading dimension of the array C. LDC >= max(1,M).
111 
112  WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
113  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
114 
115  LWORK (input) INTEGER
116  The dimension of the array WORK.
117  If SIDE = 'L', LWORK >= max(1,N);
118  if SIDE = 'R', LWORK >= max(1,M).
119  For optimum performance LWORK >= N*NB if SIDE = 'L', and
120  LWORK >= M*NB if SIDE = 'R', where NB is the optimal
121  blocksize.
122 
123  If LWORK = -1, then a workspace query is assumed; the routine
124  only calculates the optimal size of the WORK array, returns
125  this value as the first entry of the WORK array, and no error
126  message related to LWORK is issued by XERBLA.
127 
128  INFO (output) INTEGER
129  = 0: successful exit
130  < 0: if INFO = -i, the i-th argument had an illegal value
131 
132  =====================================================================
133 
134 
135  Test the input arguments
136 
137  Parameter adjustments */
138  /* Table of constant values */
139  integer c__1 = 1;
140  integer c_n1 = -1;
141  integer c__2 = 2;
142  integer c__65 = 65;
143 
144  /* System generated locals */
145  address a__1[2];
146  integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
147  i__5;
148  char ch__1[2];
149  /* Local variables */
150  logical left;
151  integer i__;
152  Treal t[4160] /* was [65][64] */;
153  integer nbmin, iinfo, i1, i2, i3;
154  integer ib, ic, jc, nb, mi, ni;
155  integer nq, nw;
156  logical notran;
157  integer ldwork, lwkopt;
158  logical lquery;
159  integer iws;
160 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
161 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
162 
163 
164  a_dim1 = *lda;
165  a_offset = 1 + a_dim1 * 1;
166  a -= a_offset;
167  --tau;
168  c_dim1 = *ldc;
169  c_offset = 1 + c_dim1 * 1;
170  c__ -= c_offset;
171  --work;
172 
173  /* Initialization added by Elias to get rid of compiler warnings. */
174  lwkopt = 0;
175  nb = 0;
176  /* Function Body */
177  *info = 0;
178  left = template_blas_lsame(side, "L");
179  notran = template_blas_lsame(trans, "N");
180  lquery = *lwork == -1;
181 
182 /* NQ is the order of Q and NW is the minimum dimension of WORK */
183 
184  if (left) {
185  nq = *m;
186  nw = *n;
187  } else {
188  nq = *n;
189  nw = *m;
190  }
191  if (! left && ! template_blas_lsame(side, "R")) {
192  *info = -1;
193  } else if (! notran && ! template_blas_lsame(trans, "T")) {
194  *info = -2;
195  } else if (*m < 0) {
196  *info = -3;
197  } else if (*n < 0) {
198  *info = -4;
199  } else if (*k < 0 || *k > nq) {
200  *info = -5;
201  } else if (*lda < maxMACRO(1,nq)) {
202  *info = -7;
203  } else if (*ldc < maxMACRO(1,*m)) {
204  *info = -10;
205  } else if (*lwork < maxMACRO(1,nw) && ! lquery) {
206  *info = -12;
207  }
208 
209  if (*info == 0) {
210 
211 /* Determine the block size. NB may be at most NBMAX, where NBMAX
212  is used to define the local array T.
213 
214  Computing MIN
215  Writing concatenation */
216  i__3[0] = 1, a__1[0] = side;
217  i__3[1] = 1, a__1[1] = trans;
218  template_blas_s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
219  i__1 = 64, i__2 = template_lapack_ilaenv(&c__1, "DORMQR", ch__1, m, n, k, &c_n1, (
220  ftnlen)6, (ftnlen)2);
221  nb = minMACRO(i__1,i__2);
222  lwkopt = maxMACRO(1,nw) * nb;
223  work[1] = (Treal) lwkopt;
224  }
225 
226  if (*info != 0) {
227  i__1 = -(*info);
228  template_blas_erbla("ORMQR ", &i__1);
229  return 0;
230  } else if (lquery) {
231  return 0;
232  }
233 
234 /* Quick return if possible */
235 
236  if (*m == 0 || *n == 0 || *k == 0) {
237  work[1] = 1.;
238  return 0;
239  }
240 
241  nbmin = 2;
242  ldwork = nw;
243  if (nb > 1 && nb < *k) {
244  iws = nw * nb;
245  if (*lwork < iws) {
246  nb = *lwork / ldwork;
247 /* Computing MAX
248  Writing concatenation */
249  i__3[0] = 1, a__1[0] = side;
250  i__3[1] = 1, a__1[1] = trans;
251  template_blas_s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
252  i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DORMQR", ch__1, m, n, k, &c_n1, (
253  ftnlen)6, (ftnlen)2);
254  nbmin = maxMACRO(i__1,i__2);
255  }
256  } else {
257  iws = nw;
258  }
259 
260  if (nb < nbmin || nb >= *k) {
261 
262 /* Use unblocked code */
263 
264  template_lapack_orm2r(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
265  c_offset], ldc, &work[1], &iinfo);
266  } else {
267 
268 /* Use blocked code */
269 
270  if ( ( left && ! notran ) || ( ! left && notran ) ) {
271  i1 = 1;
272  i2 = *k;
273  i3 = nb;
274  } else {
275  i1 = (*k - 1) / nb * nb + 1;
276  i2 = 1;
277  i3 = -nb;
278  }
279 
280  if (left) {
281  ni = *n;
282  jc = 1;
283  } else {
284  mi = *m;
285  ic = 1;
286  }
287 
288  i__1 = i2;
289  i__2 = i3;
290  for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
291 /* Computing MIN */
292  i__4 = nb, i__5 = *k - i__ + 1;
293  ib = minMACRO(i__4,i__5);
294 
295 /* Form the triangular factor of the block reflector
296  H = H(i) H(i+1) . . . H(i+ib-1) */
297 
298  i__4 = nq - i__ + 1;
299  template_lapack_larft("Forward", "Columnwise", &i__4, &ib, &a_ref(i__, i__),
300  lda, &tau[i__], t, &c__65);
301  if (left) {
302 
303 /* H or H' is applied to C(i:m,1:n) */
304 
305  mi = *m - i__ + 1;
306  ic = i__;
307  } else {
308 
309 /* H or H' is applied to C(1:m,i:n) */
310 
311  ni = *n - i__ + 1;
312  jc = i__;
313  }
314 
315 /* Apply H or H' */
316 
317  template_lapack_larfb(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &
318  a_ref(i__, i__), lda, t, &c__65, &c___ref(ic, jc), ldc, &
319  work[1], &ldwork);
320 /* L10: */
321  }
322  }
323  work[1] = (Treal) lwkopt;
324  return 0;
325 
326 /* End of DORMQR */
327 
328 } /* dormqr_ */
329 
330 #undef c___ref
331 #undef a_ref
332 
333 
334 #endif