ergo
template_blas_syr2k.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_BLAS_SYR2K_HEADER
36 #define TEMPLATE_BLAS_SYR2K_HEADER
37 
38 
39 template<class Treal>
40 int template_blas_syr2k(const char *uplo, const char *trans, const integer *n,
41  const integer *k, const Treal *alpha, const Treal *a,
42  const integer *lda, const Treal *b, const integer *ldb,
43  const Treal *beta, Treal *c__, const integer *ldc)
44 {
45  /* System generated locals */
46  integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
47  i__3;
48  /* Local variables */
49  integer info;
50  Treal temp1, temp2;
51  integer i__, j, l;
52  integer nrowa;
53  logical upper;
54 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
55 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
56 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
57 /* Purpose
58  =======
59  DSYR2K performs one of the symmetric rank 2k operations
60  C := alpha*A*B' + alpha*B*A' + beta*C,
61  or
62  C := alpha*A'*B + alpha*B'*A + beta*C,
63  where alpha and beta are scalars, C is an n by n symmetric matrix
64  and A and B are n by k matrices in the first case and k by n
65  matrices in the second case.
66  Parameters
67  ==========
68  UPLO - CHARACTER*1.
69  On entry, UPLO specifies whether the upper or lower
70  triangular part of the array C is to be referenced as
71  follows:
72  UPLO = 'U' or 'u' Only the upper triangular part of C
73  is to be referenced.
74  UPLO = 'L' or 'l' Only the lower triangular part of C
75  is to be referenced.
76  Unchanged on exit.
77  TRANS - CHARACTER*1.
78  On entry, TRANS specifies the operation to be performed as
79  follows:
80  TRANS = 'N' or 'n' C := alpha*A*B' + alpha*B*A' +
81  beta*C.
82  TRANS = 'T' or 't' C := alpha*A'*B + alpha*B'*A +
83  beta*C.
84  TRANS = 'C' or 'c' C := alpha*A'*B + alpha*B'*A +
85  beta*C.
86  Unchanged on exit.
87  N - INTEGER.
88  On entry, N specifies the order of the matrix C. N must be
89  at least zero.
90  Unchanged on exit.
91  K - INTEGER.
92  On entry with TRANS = 'N' or 'n', K specifies the number
93  of columns of the matrices A and B, and on entry with
94  TRANS = 'T' or 't' or 'C' or 'c', K specifies the number
95  of rows of the matrices A and B. K must be at least zero.
96  Unchanged on exit.
97  ALPHA - DOUBLE PRECISION.
98  On entry, ALPHA specifies the scalar alpha.
99  Unchanged on exit.
100  A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
101  k when TRANS = 'N' or 'n', and is n otherwise.
102  Before entry with TRANS = 'N' or 'n', the leading n by k
103  part of the array A must contain the matrix A, otherwise
104  the leading k by n part of the array A must contain the
105  matrix A.
106  Unchanged on exit.
107  LDA - INTEGER.
108  On entry, LDA specifies the first dimension of A as declared
109  in the calling (sub) program. When TRANS = 'N' or 'n'
110  then LDA must be at least max( 1, n ), otherwise LDA must
111  be at least max( 1, k ).
112  Unchanged on exit.
113  B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
114  k when TRANS = 'N' or 'n', and is n otherwise.
115  Before entry with TRANS = 'N' or 'n', the leading n by k
116  part of the array B must contain the matrix B, otherwise
117  the leading k by n part of the array B must contain the
118  matrix B.
119  Unchanged on exit.
120  LDB - INTEGER.
121  On entry, LDB specifies the first dimension of B as declared
122  in the calling (sub) program. When TRANS = 'N' or 'n'
123  then LDB must be at least max( 1, n ), otherwise LDB must
124  be at least max( 1, k ).
125  Unchanged on exit.
126  BETA - DOUBLE PRECISION.
127  On entry, BETA specifies the scalar beta.
128  Unchanged on exit.
129  C - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
130  Before entry with UPLO = 'U' or 'u', the leading n by n
131  upper triangular part of the array C must contain the upper
132  triangular part of the symmetric matrix and the strictly
133  lower triangular part of C is not referenced. On exit, the
134  upper triangular part of the array C is overwritten by the
135  upper triangular part of the updated matrix.
136  Before entry with UPLO = 'L' or 'l', the leading n by n
137  lower triangular part of the array C must contain the lower
138  triangular part of the symmetric matrix and the strictly
139  upper triangular part of C is not referenced. On exit, the
140  lower triangular part of the array C is overwritten by the
141  lower triangular part of the updated matrix.
142  LDC - INTEGER.
143  On entry, LDC specifies the first dimension of C as declared
144  in the calling (sub) program. LDC must be at least
145  max( 1, n ).
146  Unchanged on exit.
147  Level 3 Blas routine.
148  -- Written on 8-February-1989.
149  Jack Dongarra, Argonne National Laboratory.
150  Iain Duff, AERE Harwell.
151  Jeremy Du Croz, Numerical Algorithms Group Ltd.
152  Sven Hammarling, Numerical Algorithms Group Ltd.
153  Test the input parameters.
154  Parameter adjustments */
155  a_dim1 = *lda;
156  a_offset = 1 + a_dim1 * 1;
157  a -= a_offset;
158  b_dim1 = *ldb;
159  b_offset = 1 + b_dim1 * 1;
160  b -= b_offset;
161  c_dim1 = *ldc;
162  c_offset = 1 + c_dim1 * 1;
163  c__ -= c_offset;
164  /* Function Body */
165  if (template_blas_lsame(trans, "N")) {
166  nrowa = *n;
167  } else {
168  nrowa = *k;
169  }
170  upper = template_blas_lsame(uplo, "U");
171  info = 0;
172  if (! upper && ! template_blas_lsame(uplo, "L")) {
173  info = 1;
174  } else if (! template_blas_lsame(trans, "N") && ! template_blas_lsame(trans,
175  "T") && ! template_blas_lsame(trans, "C")) {
176  info = 2;
177  } else if (*n < 0) {
178  info = 3;
179  } else if (*k < 0) {
180  info = 4;
181  } else if (*lda < maxMACRO(1,nrowa)) {
182  info = 7;
183  } else if (*ldb < maxMACRO(1,nrowa)) {
184  info = 9;
185  } else if (*ldc < maxMACRO(1,*n)) {
186  info = 12;
187  }
188  if (info != 0) {
189  template_blas_erbla("SYR2K ", &info);
190  return 0;
191  }
192 /* Quick return if possible. */
193  if (*n == 0 || ( (*alpha == 0. || *k == 0) && *beta == 1. ) ) {
194  return 0;
195  }
196 /* And when alpha.eq.zero. */
197  if (*alpha == 0.) {
198  if (upper) {
199  if (*beta == 0.) {
200  i__1 = *n;
201  for (j = 1; j <= i__1; ++j) {
202  i__2 = j;
203  for (i__ = 1; i__ <= i__2; ++i__) {
204  c___ref(i__, j) = 0.;
205 /* L10: */
206  }
207 /* L20: */
208  }
209  } else {
210  i__1 = *n;
211  for (j = 1; j <= i__1; ++j) {
212  i__2 = j;
213  for (i__ = 1; i__ <= i__2; ++i__) {
214  c___ref(i__, j) = *beta * c___ref(i__, j);
215 /* L30: */
216  }
217 /* L40: */
218  }
219  }
220  } else {
221  if (*beta == 0.) {
222  i__1 = *n;
223  for (j = 1; j <= i__1; ++j) {
224  i__2 = *n;
225  for (i__ = j; i__ <= i__2; ++i__) {
226  c___ref(i__, j) = 0.;
227 /* L50: */
228  }
229 /* L60: */
230  }
231  } else {
232  i__1 = *n;
233  for (j = 1; j <= i__1; ++j) {
234  i__2 = *n;
235  for (i__ = j; i__ <= i__2; ++i__) {
236  c___ref(i__, j) = *beta * c___ref(i__, j);
237 /* L70: */
238  }
239 /* L80: */
240  }
241  }
242  }
243  return 0;
244  }
245 /* Start the operations. */
246  if (template_blas_lsame(trans, "N")) {
247 /* Form C := alpha*A*B' + alpha*B*A' + C. */
248  if (upper) {
249  i__1 = *n;
250  for (j = 1; j <= i__1; ++j) {
251  if (*beta == 0.) {
252  i__2 = j;
253  for (i__ = 1; i__ <= i__2; ++i__) {
254  c___ref(i__, j) = 0.;
255 /* L90: */
256  }
257  } else if (*beta != 1.) {
258  i__2 = j;
259  for (i__ = 1; i__ <= i__2; ++i__) {
260  c___ref(i__, j) = *beta * c___ref(i__, j);
261 /* L100: */
262  }
263  }
264  i__2 = *k;
265  for (l = 1; l <= i__2; ++l) {
266  if (a_ref(j, l) != 0. || b_ref(j, l) != 0.) {
267  temp1 = *alpha * b_ref(j, l);
268  temp2 = *alpha * a_ref(j, l);
269  i__3 = j;
270  for (i__ = 1; i__ <= i__3; ++i__) {
271  c___ref(i__, j) = c___ref(i__, j) + a_ref(i__, l)
272  * temp1 + b_ref(i__, l) * temp2;
273 /* L110: */
274  }
275  }
276 /* L120: */
277  }
278 /* L130: */
279  }
280  } else {
281  i__1 = *n;
282  for (j = 1; j <= i__1; ++j) {
283  if (*beta == 0.) {
284  i__2 = *n;
285  for (i__ = j; i__ <= i__2; ++i__) {
286  c___ref(i__, j) = 0.;
287 /* L140: */
288  }
289  } else if (*beta != 1.) {
290  i__2 = *n;
291  for (i__ = j; i__ <= i__2; ++i__) {
292  c___ref(i__, j) = *beta * c___ref(i__, j);
293 /* L150: */
294  }
295  }
296  i__2 = *k;
297  for (l = 1; l <= i__2; ++l) {
298  if (a_ref(j, l) != 0. || b_ref(j, l) != 0.) {
299  temp1 = *alpha * b_ref(j, l);
300  temp2 = *alpha * a_ref(j, l);
301  i__3 = *n;
302  for (i__ = j; i__ <= i__3; ++i__) {
303  c___ref(i__, j) = c___ref(i__, j) + a_ref(i__, l)
304  * temp1 + b_ref(i__, l) * temp2;
305 /* L160: */
306  }
307  }
308 /* L170: */
309  }
310 /* L180: */
311  }
312  }
313  } else {
314 /* Form C := alpha*A'*B + alpha*B'*A + C. */
315  if (upper) {
316  i__1 = *n;
317  for (j = 1; j <= i__1; ++j) {
318  i__2 = j;
319  for (i__ = 1; i__ <= i__2; ++i__) {
320  temp1 = 0.;
321  temp2 = 0.;
322  i__3 = *k;
323  for (l = 1; l <= i__3; ++l) {
324  temp1 += a_ref(l, i__) * b_ref(l, j);
325  temp2 += b_ref(l, i__) * a_ref(l, j);
326 /* L190: */
327  }
328  if (*beta == 0.) {
329  c___ref(i__, j) = *alpha * temp1 + *alpha * temp2;
330  } else {
331  c___ref(i__, j) = *beta * c___ref(i__, j) + *alpha *
332  temp1 + *alpha * temp2;
333  }
334 /* L200: */
335  }
336 /* L210: */
337  }
338  } else {
339  i__1 = *n;
340  for (j = 1; j <= i__1; ++j) {
341  i__2 = *n;
342  for (i__ = j; i__ <= i__2; ++i__) {
343  temp1 = 0.;
344  temp2 = 0.;
345  i__3 = *k;
346  for (l = 1; l <= i__3; ++l) {
347  temp1 += a_ref(l, i__) * b_ref(l, j);
348  temp2 += b_ref(l, i__) * a_ref(l, j);
349 /* L220: */
350  }
351  if (*beta == 0.) {
352  c___ref(i__, j) = *alpha * temp1 + *alpha * temp2;
353  } else {
354  c___ref(i__, j) = *beta * c___ref(i__, j) + *alpha *
355  temp1 + *alpha * temp2;
356  }
357 /* L230: */
358  }
359 /* L240: */
360  }
361  }
362  }
363  return 0;
364 /* End of DSYR2K. */
365 } /* dsyr2k_ */
366 #undef c___ref
367 #undef b_ref
368 #undef a_ref
369 
370 #endif