ergo
template_lapack_getrf.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_GETRF_HEADER
36 #define TEMPLATE_LAPACK_GETRF_HEADER
37 
38 
39 template<class Treal>
40 int template_lapack_getrf(const integer *m, const integer *n, Treal *a, const integer *
41  lda, integer *ipiv, integer *info)
42 {
43 /* -- LAPACK routine (version 3.0) --
44  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
45  Courant Institute, Argonne National Lab, and Rice University
46  March 31, 1993
47 
48 
49  Purpose
50  =======
51 
52  DGETRF computes an LU factorization of a general M-by-N matrix A
53  using partial pivoting with row interchanges.
54 
55  The factorization has the form
56  A = P * L * U
57  where P is a permutation matrix, L is lower triangular with unit
58  diagonal elements (lower trapezoidal if m > n), and U is upper
59  triangular (upper trapezoidal if m < n).
60 
61  This is the right-looking Level 3 BLAS version of the algorithm.
62 
63  Arguments
64  =========
65 
66  M (input) INTEGER
67  The number of rows of the matrix A. M >= 0.
68 
69  N (input) INTEGER
70  The number of columns of the matrix A. N >= 0.
71 
72  A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
73  On entry, the M-by-N matrix to be factored.
74  On exit, the factors L and U from the factorization
75  A = P*L*U; the unit diagonal elements of L are not stored.
76 
77  LDA (input) INTEGER
78  The leading dimension of the array A. LDA >= max(1,M).
79 
80  IPIV (output) INTEGER array, dimension (min(M,N))
81  The pivot indices; for 1 <= i <= min(M,N), row i of the
82  matrix was interchanged with row IPIV(i).
83 
84  INFO (output) INTEGER
85  = 0: successful exit
86  < 0: if INFO = -i, the i-th argument had an illegal value
87  > 0: if INFO = i, U(i,i) is exactly zero. The factorization
88  has been completed, but the factor U is exactly
89  singular, and division by zero will occur if it is used
90  to solve a system of equations.
91 
92  =====================================================================
93 
94 
95  Test the input parameters.
96 
97  Parameter adjustments */
98  /* Table of constant values */
99  integer c__1 = 1;
100  integer c_n1 = -1;
101  Treal c_b16 = 1.;
102  Treal c_b19 = -1.;
103 
104  /* System generated locals */
105  integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
106  /* Local variables */
107  integer i__, j;
108  integer iinfo;
109  integer jb, nb;
110 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
111 
112 
113  a_dim1 = *lda;
114  a_offset = 1 + a_dim1 * 1;
115  a -= a_offset;
116  --ipiv;
117 
118  /* Function Body */
119  *info = 0;
120  if (*m < 0) {
121  *info = -1;
122  } else if (*n < 0) {
123  *info = -2;
124  } else if (*lda < maxMACRO(1,*m)) {
125  *info = -4;
126  }
127  if (*info != 0) {
128  i__1 = -(*info);
129  template_blas_erbla("GETRF ", &i__1);
130  return 0;
131  }
132 
133 /* Quick return if possible */
134 
135  if (*m == 0 || *n == 0) {
136  return 0;
137  }
138 
139 /* Determine the block size for this environment. */
140 
141  nb = template_lapack_ilaenv(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
142  1);
143  if (nb <= 1 || nb >= minMACRO(*m,*n)) {
144 
145 /* Use unblocked code. */
146 
147  template_lapack_getf2(m, n, &a[a_offset], lda, &ipiv[1], info);
148  } else {
149 
150 /* Use blocked code. */
151 
152  i__1 = minMACRO(*m,*n);
153  i__2 = nb;
154  for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
155 /* Computing MIN */
156  i__3 = minMACRO(*m,*n) - j + 1;
157  jb = minMACRO(i__3,nb);
158 
159 /* Factor diagonal and subdiagonal blocks and test for exact
160  singularity. */
161 
162  i__3 = *m - j + 1;
163  template_lapack_getf2(&i__3, &jb, &a_ref(j, j), lda, &ipiv[j], &iinfo);
164 
165 /* Adjust INFO and the pivot indices. */
166 
167  if (*info == 0 && iinfo > 0) {
168  *info = iinfo + j - 1;
169  }
170 /* Computing MIN */
171  i__4 = *m, i__5 = j + jb - 1;
172  i__3 = minMACRO(i__4,i__5);
173  for (i__ = j; i__ <= i__3; ++i__) {
174  ipiv[i__] = j - 1 + ipiv[i__];
175 /* L10: */
176  }
177 
178 /* Apply interchanges to columns 1:J-1. */
179 
180  i__3 = j - 1;
181  i__4 = j + jb - 1;
182  template_lapack_laswp(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
183 
184  if (j + jb <= *n) {
185 
186 /* Apply interchanges to columns J+JB:N. */
187 
188  i__3 = *n - j - jb + 1;
189  i__4 = j + jb - 1;
190  template_lapack_laswp(&i__3, &a_ref(1, j + jb), lda, &j, &i__4, &ipiv[1], &
191  c__1);
192 
193 /* Compute block row of U. */
194 
195  i__3 = *n - j - jb + 1;
196  template_blas_trsm("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
197  c_b16, &a_ref(j, j), lda, &a_ref(j, j + jb), lda);
198  if (j + jb <= *m) {
199 
200 /* Update trailing submatrix. */
201 
202  i__3 = *m - j - jb + 1;
203  i__4 = *n - j - jb + 1;
204  template_blas_gemm("No transpose", "No transpose", &i__3, &i__4, &jb,
205  &c_b19, &a_ref(j + jb, j), lda, &a_ref(j, j + jb),
206  lda, &c_b16, &a_ref(j + jb, j + jb), lda);
207  }
208  }
209 /* L20: */
210  }
211  }
212  return 0;
213 
214 /* End of DGETRF */
215 
216 } /* dgetrf_ */
217 
218 #undef a_ref
219 
220 
221 #endif