ergo
template_blas_trsm.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_BLAS_TRSM_HEADER
36 #define TEMPLATE_BLAS_TRSM_HEADER
37 
38 #include "template_blas_common.h"
39 
40 template<class Treal>
41 int template_blas_trsm(const char *side, const char *uplo, const char *transa, const char *diag,
42  const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *
43  lda, Treal *b, const integer *ldb)
44 {
45  /* System generated locals */
46  integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
47  /* Local variables */
48  integer info;
49  Treal temp;
50  integer i__, j, k;
51  logical lside;
52  integer nrowa;
53  logical upper;
54  logical nounit;
55 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
56 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
57 /* Purpose
58  =======
59  DTRSM solves one of the matrix equations
60  op( A )*X = alpha*B, or X*op( A ) = alpha*B,
61  where alpha is a scalar, X and B are m by n matrices, A is a unit, or
62  non-unit, upper or lower triangular matrix and op( A ) is one of
63  op( A ) = A or op( A ) = A'.
64  The matrix X is overwritten on B.
65  Parameters
66  ==========
67  SIDE - CHARACTER*1.
68  On entry, SIDE specifies whether op( A ) appears on the left
69  or right of X as follows:
70  SIDE = 'L' or 'l' op( A )*X = alpha*B.
71  SIDE = 'R' or 'r' X*op( A ) = alpha*B.
72  Unchanged on exit.
73  UPLO - CHARACTER*1.
74  On entry, UPLO specifies whether the matrix A is an upper or
75  lower triangular matrix as follows:
76  UPLO = 'U' or 'u' A is an upper triangular matrix.
77  UPLO = 'L' or 'l' A is a lower triangular matrix.
78  Unchanged on exit.
79  TRANSA - CHARACTER*1.
80  On entry, TRANSA specifies the form of op( A ) to be used in
81  the matrix multiplication as follows:
82  TRANSA = 'N' or 'n' op( A ) = A.
83  TRANSA = 'T' or 't' op( A ) = A'.
84  TRANSA = 'C' or 'c' op( A ) = A'.
85  Unchanged on exit.
86  DIAG - CHARACTER*1.
87  On entry, DIAG specifies whether or not A is unit triangular
88  as follows:
89  DIAG = 'U' or 'u' A is assumed to be unit triangular.
90  DIAG = 'N' or 'n' A is not assumed to be unit
91  triangular.
92  Unchanged on exit.
93  M - INTEGER.
94  On entry, M specifies the number of rows of B. M must be at
95  least zero.
96  Unchanged on exit.
97  N - INTEGER.
98  On entry, N specifies the number of columns of B. N must be
99  at least zero.
100  Unchanged on exit.
101  ALPHA - DOUBLE PRECISION.
102  On entry, ALPHA specifies the scalar alpha. When alpha is
103  zero then A is not referenced and B need not be set before
104  entry.
105  Unchanged on exit.
106  A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
107  when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'.
108  Before entry with UPLO = 'U' or 'u', the leading k by k
109  upper triangular part of the array A must contain the upper
110  triangular matrix and the strictly lower triangular part of
111  A is not referenced.
112  Before entry with UPLO = 'L' or 'l', the leading k by k
113  lower triangular part of the array A must contain the lower
114  triangular matrix and the strictly upper triangular part of
115  A is not referenced.
116  Note that when DIAG = 'U' or 'u', the diagonal elements of
117  A are not referenced either, but are assumed to be unity.
118  Unchanged on exit.
119  LDA - INTEGER.
120  On entry, LDA specifies the first dimension of A as declared
121  in the calling (sub) program. When SIDE = 'L' or 'l' then
122  LDA must be at least max( 1, m ), when SIDE = 'R' or 'r'
123  then LDA must be at least max( 1, n ).
124  Unchanged on exit.
125  B - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
126  Before entry, the leading m by n part of the array B must
127  contain the right-hand side matrix B, and on exit is
128  overwritten by the solution matrix X.
129  LDB - INTEGER.
130  On entry, LDB specifies the first dimension of B as declared
131  in the calling (sub) program. LDB must be at least
132  max( 1, m ).
133  Unchanged on exit.
134  Level 3 Blas routine.
135  -- Written on 8-February-1989.
136  Jack Dongarra, Argonne National Laboratory.
137  Iain Duff, AERE Harwell.
138  Jeremy Du Croz, Numerical Algorithms Group Ltd.
139  Sven Hammarling, Numerical Algorithms Group Ltd.
140  Test the input parameters.
141  Parameter adjustments */
142  a_dim1 = *lda;
143  a_offset = 1 + a_dim1 * 1;
144  a -= a_offset;
145  b_dim1 = *ldb;
146  b_offset = 1 + b_dim1 * 1;
147  b -= b_offset;
148  /* Function Body */
149  lside = template_blas_lsame(side, "L");
150  if (lside) {
151  nrowa = *m;
152  } else {
153  nrowa = *n;
154  }
155  nounit = template_blas_lsame(diag, "N");
156  upper = template_blas_lsame(uplo, "U");
157  info = 0;
158  if (! lside && ! template_blas_lsame(side, "R")) {
159  info = 1;
160  } else if (! upper && ! template_blas_lsame(uplo, "L")) {
161  info = 2;
162  } else if (! template_blas_lsame(transa, "N") && ! template_blas_lsame(transa,
163  "T") && ! template_blas_lsame(transa, "C")) {
164  info = 3;
165  } else if (! template_blas_lsame(diag, "U") && ! template_blas_lsame(diag,
166  "N")) {
167  info = 4;
168  } else if (*m < 0) {
169  info = 5;
170  } else if (*n < 0) {
171  info = 6;
172  } else if (*lda < maxMACRO(1,nrowa)) {
173  info = 9;
174  } else if (*ldb < maxMACRO(1,*m)) {
175  info = 11;
176  }
177  if (info != 0) {
178  template_blas_erbla("TRSM ", &info);
179  return 0;
180  }
181 /* Quick return if possible. */
182  if (*n == 0) {
183  return 0;
184  }
185 /* And when alpha.eq.zero. */
186  if (*alpha == 0.) {
187  i__1 = *n;
188  for (j = 1; j <= i__1; ++j) {
189  i__2 = *m;
190  for (i__ = 1; i__ <= i__2; ++i__) {
191  b_ref(i__, j) = 0.;
192 /* L10: */
193  }
194 /* L20: */
195  }
196  return 0;
197  }
198 /* Start the operations. */
199  if (lside) {
200  if (template_blas_lsame(transa, "N")) {
201 /* Form B := alpha*inv( A )*B. */
202  if (upper) {
203  i__1 = *n;
204  for (j = 1; j <= i__1; ++j) {
205  if (*alpha != 1.) {
206  i__2 = *m;
207  for (i__ = 1; i__ <= i__2; ++i__) {
208  b_ref(i__, j) = *alpha * b_ref(i__, j);
209 /* L30: */
210  }
211  }
212  for (k = *m; k >= 1; --k) {
213  if (b_ref(k, j) != 0.) {
214  if (nounit) {
215  b_ref(k, j) = b_ref(k, j) / a_ref(k, k);
216  }
217  i__2 = k - 1;
218  for (i__ = 1; i__ <= i__2; ++i__) {
219  b_ref(i__, j) = b_ref(i__, j) - b_ref(k, j) *
220  a_ref(i__, k);
221 /* L40: */
222  }
223  }
224 /* L50: */
225  }
226 /* L60: */
227  }
228  } else {
229  i__1 = *n;
230  for (j = 1; j <= i__1; ++j) {
231  if (*alpha != 1.) {
232  i__2 = *m;
233  for (i__ = 1; i__ <= i__2; ++i__) {
234  b_ref(i__, j) = *alpha * b_ref(i__, j);
235 /* L70: */
236  }
237  }
238  i__2 = *m;
239  for (k = 1; k <= i__2; ++k) {
240  if (b_ref(k, j) != 0.) {
241  if (nounit) {
242  b_ref(k, j) = b_ref(k, j) / a_ref(k, k);
243  }
244  i__3 = *m;
245  for (i__ = k + 1; i__ <= i__3; ++i__) {
246  b_ref(i__, j) = b_ref(i__, j) - b_ref(k, j) *
247  a_ref(i__, k);
248 /* L80: */
249  }
250  }
251 /* L90: */
252  }
253 /* L100: */
254  }
255  }
256  } else {
257 /* Form B := alpha*inv( A' )*B. */
258  if (upper) {
259  i__1 = *n;
260  for (j = 1; j <= i__1; ++j) {
261  i__2 = *m;
262  for (i__ = 1; i__ <= i__2; ++i__) {
263  temp = *alpha * b_ref(i__, j);
264  i__3 = i__ - 1;
265  for (k = 1; k <= i__3; ++k) {
266  temp -= a_ref(k, i__) * b_ref(k, j);
267 /* L110: */
268  }
269  if (nounit) {
270  temp /= a_ref(i__, i__);
271  }
272  b_ref(i__, j) = temp;
273 /* L120: */
274  }
275 /* L130: */
276  }
277  } else {
278  i__1 = *n;
279  for (j = 1; j <= i__1; ++j) {
280  for (i__ = *m; i__ >= 1; --i__) {
281  temp = *alpha * b_ref(i__, j);
282  i__2 = *m;
283  for (k = i__ + 1; k <= i__2; ++k) {
284  temp -= a_ref(k, i__) * b_ref(k, j);
285 /* L140: */
286  }
287  if (nounit) {
288  temp /= a_ref(i__, i__);
289  }
290  b_ref(i__, j) = temp;
291 /* L150: */
292  }
293 /* L160: */
294  }
295  }
296  }
297  } else {
298  if (template_blas_lsame(transa, "N")) {
299 /* Form B := alpha*B*inv( A ). */
300  if (upper) {
301  i__1 = *n;
302  for (j = 1; j <= i__1; ++j) {
303  if (*alpha != 1.) {
304  i__2 = *m;
305  for (i__ = 1; i__ <= i__2; ++i__) {
306  b_ref(i__, j) = *alpha * b_ref(i__, j);
307 /* L170: */
308  }
309  }
310  i__2 = j - 1;
311  for (k = 1; k <= i__2; ++k) {
312  if (a_ref(k, j) != 0.) {
313  i__3 = *m;
314  for (i__ = 1; i__ <= i__3; ++i__) {
315  b_ref(i__, j) = b_ref(i__, j) - a_ref(k, j) *
316  b_ref(i__, k);
317 /* L180: */
318  }
319  }
320 /* L190: */
321  }
322  if (nounit) {
323  temp = 1. / a_ref(j, j);
324  i__2 = *m;
325  for (i__ = 1; i__ <= i__2; ++i__) {
326  b_ref(i__, j) = temp * b_ref(i__, j);
327 /* L200: */
328  }
329  }
330 /* L210: */
331  }
332  } else {
333  for (j = *n; j >= 1; --j) {
334  if (*alpha != 1.) {
335  i__1 = *m;
336  for (i__ = 1; i__ <= i__1; ++i__) {
337  b_ref(i__, j) = *alpha * b_ref(i__, j);
338 /* L220: */
339  }
340  }
341  i__1 = *n;
342  for (k = j + 1; k <= i__1; ++k) {
343  if (a_ref(k, j) != 0.) {
344  i__2 = *m;
345  for (i__ = 1; i__ <= i__2; ++i__) {
346  b_ref(i__, j) = b_ref(i__, j) - a_ref(k, j) *
347  b_ref(i__, k);
348 /* L230: */
349  }
350  }
351 /* L240: */
352  }
353  if (nounit) {
354  temp = 1. / a_ref(j, j);
355  i__1 = *m;
356  for (i__ = 1; i__ <= i__1; ++i__) {
357  b_ref(i__, j) = temp * b_ref(i__, j);
358 /* L250: */
359  }
360  }
361 /* L260: */
362  }
363  }
364  } else {
365 /* Form B := alpha*B*inv( A' ). */
366  if (upper) {
367  for (k = *n; k >= 1; --k) {
368  if (nounit) {
369  temp = 1. / a_ref(k, k);
370  i__1 = *m;
371  for (i__ = 1; i__ <= i__1; ++i__) {
372  b_ref(i__, k) = temp * b_ref(i__, k);
373 /* L270: */
374  }
375  }
376  i__1 = k - 1;
377  for (j = 1; j <= i__1; ++j) {
378  if (a_ref(j, k) != 0.) {
379  temp = a_ref(j, k);
380  i__2 = *m;
381  for (i__ = 1; i__ <= i__2; ++i__) {
382  b_ref(i__, j) = b_ref(i__, j) - temp * b_ref(
383  i__, k);
384 /* L280: */
385  }
386  }
387 /* L290: */
388  }
389  if (*alpha != 1.) {
390  i__1 = *m;
391  for (i__ = 1; i__ <= i__1; ++i__) {
392  b_ref(i__, k) = *alpha * b_ref(i__, k);
393 /* L300: */
394  }
395  }
396 /* L310: */
397  }
398  } else {
399  i__1 = *n;
400  for (k = 1; k <= i__1; ++k) {
401  if (nounit) {
402  temp = 1. / a_ref(k, k);
403  i__2 = *m;
404  for (i__ = 1; i__ <= i__2; ++i__) {
405  b_ref(i__, k) = temp * b_ref(i__, k);
406 /* L320: */
407  }
408  }
409  i__2 = *n;
410  for (j = k + 1; j <= i__2; ++j) {
411  if (a_ref(j, k) != 0.) {
412  temp = a_ref(j, k);
413  i__3 = *m;
414  for (i__ = 1; i__ <= i__3; ++i__) {
415  b_ref(i__, j) = b_ref(i__, j) - temp * b_ref(
416  i__, k);
417 /* L330: */
418  }
419  }
420 /* L340: */
421  }
422  if (*alpha != 1.) {
423  i__2 = *m;
424  for (i__ = 1; i__ <= i__2; ++i__) {
425  b_ref(i__, k) = *alpha * b_ref(i__, k);
426 /* L350: */
427  }
428  }
429 /* L360: */
430  }
431  }
432  }
433  }
434  return 0;
435 /* End of DTRSM . */
436 } /* dtrsm_ */
437 #undef b_ref
438 #undef a_ref
439 
440 #endif