ergo
gblas.h
Go to the documentation of this file.
1 /* Ergo, version 3.2, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
43 #ifndef GBLAS
44 #define GBLAS
45 #include <ctime>
46 #include "Failure.h"
47 
48 // We need to include config.h to get USE_LINALG_TEMPLATES flag
49 #include "config.h"
50 
51 #include "template_lapack_common.h"
52 
53 /* LEVEL 3 */
54 extern "C" void dgemm_(const char *ta,const char *tb,
55  const int *n, const int *k, const int *l,
56  const double *alpha,const double *A,const int *lda,
57  const double *B, const int *ldb,
58  const double *beta, double *C, const int *ldc);
59 extern "C" void dpptrf_(const char *uplo,const int *n, double* ap, int *info);
60 extern "C" void dspgst_(const int *itype, const char *uplo,const int *n,
61  double* ap,const double *bp,int *info);
62 extern "C" void dtptri_(const char *uplo,const char *diag,const int *n,
63  double* ap,int *info);
64 /* unit triangular means that a value of 1.0 is assumed */
65 /* for the diagonal elements (hence diagonal not stored in packed format) */
66 extern "C" void dtrmm_(const char *side,const char *uplo,const char *transa,
67  const char *diag,const int *m,const int *n,
68  const double *alpha,const double *A,const int *lda,
69  double *B,const int *ldb);
70 extern "C" void dsygv_(const int *itype,const char *jobz,
71  const char *uplo,const int *n,
72  double *A,const int *lda,double *B,const int *ldb,
73  double* w,double* work,const int *lwork,int *info);
74 extern "C" void dggev_(const char *jobbl, const char *jobvr, const int *n,
75  double *A, const int *lda, double *B, const int *ldb,
76  double *alphar, double *alphai, double *beta,
77  double *vl, const int *ldvl,
78  double *vr, const int *ldvr,
79  double *work, const int *lwork, int *info);
80 extern "C" void dpotrf_(const char *uplo, const int *n, double *A,
81  const int *lda, int *info);
82 extern "C" void dtrtri_(const char *uplo,const char *diag,const int *n,
83  double *A, const int *lda, int *info);
84 extern "C" void dsyrk_(const char *uplo, const char *trans, const int *n,
85  const int *k, const double *alpha, const double *A,
86  const int *lda, const double *beta,
87  double *C, const int *ldc);
88 extern "C" void dsymm_(const char *side,const char *uplo,
89  const int *m,const int *n,
90  const double *alpha,const double *A,const int *lda,
91  const double *B,const int *ldb, const double* beta,
92  double *C,const int *ldc);
93 extern "C" void dpocon_(const char *uplo, const int *n, const double *A,
94  const int *lda, const double *anorm, double *rcond,
95  double *work, int *iwork, int *info);
96 extern "C" void dstevx_(const char *jobz, const char *range, const int *n,
97  double *d, double *e, const double *vl,
98  const double *vu, const int *il, const int *iu,
99  const double *abstol, int *m, double *w, double *z,
100  const int *ldz, double *work, int *iwork, int *ifail,
101  int *info);
102 extern "C" void dstevr_(const char *jobz, const char *range, const int *n,
103  double *d, double *e, const double *vl,
104  const double *vu, const int *il, const int *iu,
105  const double *abstol, int *m, double *w, double *z,
106  const int *ldz, int* isuppz, double *work, int* lwork,
107  int *iwork, int* liwork, int *info);
108 extern "C" void dsyev_(const char *jobz, const char *uplo, const int *n,
109  double *a, const int *lda, double *w, double *work,
110  const int *lwork, int *info);
111 
112 /* LEVEL 2 */
113 extern "C" void dgemv_(const char *ta, const int *m, const int *n,
114  const double *alpha, const double *A, const int *lda,
115  const double *x, const int *incx, const double *beta,
116  double *y, const int *incy);
117 extern "C" void dsymv_(const char *uplo, const int *n,
118  const double *alpha, const double *A, const int *lda,
119  const double *x, const int *incx, const double *beta,
120  double *y, const int *incy);
121 extern "C" void dtrmv_(const char *uplo, const char *trans, const char *diag,
122  const int *n, const double *A, const int *lda,
123  double *x, const int *incx);
124 /* LEVEL 1 */
125 extern "C" void dscal_(const int* n,const double* da, double* dx,
126  const int* incx);
127 extern "C" double ddot_(const int* n, const double* dx, const int* incx,
128  const double* dy, const int* incy);
129 extern "C" void daxpy_(const int* n, const double* da, const double* dx,
130  const int* incx, double* dy,const int* incy);
131 
132 /* Single precision */
133 /* LEVEL 3 */
134 extern "C" void sgemm_(const char *ta,const char *tb,
135  const int *n, const int *k, const int *l,
136  const float *alpha,const float *A,const int *lda,
137  const float *B, const int *ldb,
138  const float *beta, float *C, const int *ldc);
139 extern "C" void spptrf_(const char *uplo,const int *n, float* ap, int *info);
140 extern "C" void sspgst_(const int *itype, const char *uplo,const int *n,
141  float* ap,const float *bp,int *info);
142 extern "C" void stptri_(const char *uplo,const char *diag,const int *n,
143  float* ap,int *info);
144 /* unit triangular means that a value of 1.0 is assumed */
145 /* for the diagonal elements (hence diagonal not stored in packed format) */
146 extern "C" void strmm_(const char *side,const char *uplo,const char *transa,
147  const char *diag,const int *m,const int *n,
148  const float *alpha,const float *A,const int *lda,
149  float *B,const int *ldb);
150 extern "C" void ssygv_(const int *itype,const char *jobz,
151  const char *uplo,const int *n,
152  float *A,const int *lda,float *B,const int *ldb,
153  float* w,float* work,const int *lwork,int *info);
154 extern "C" void sggev_(const char *jobbl, const char *jobvr, const int *n,
155  float *A, const int *lda, float *B, const int *ldb,
156  float *alphar, float *alphai, float *beta,
157  float *vl, const int *ldvl,
158  float *vr, const int *ldvr,
159  float *work, const int *lwork, int *info);
160 extern "C" void spotrf_(const char *uplo, const int *n, float *A,
161  const int *lda, int *info);
162 extern "C" void strtri_(const char *uplo,const char *diag,const int *n,
163  float *A, const int *lda, int *info);
164 extern "C" void ssyrk_(const char *uplo, const char *trans, const int *n,
165  const int *k, const float *alpha, const float *A,
166  const int *lda, const float *beta,
167  float *C, const int *ldc);
168 extern "C" void ssymm_(const char *side,const char *uplo,
169  const int *m,const int *n,
170  const float *alpha,const float *A,const int *lda,
171  const float *B,const int *ldb, const float* beta,
172  float *C,const int *ldc);
173 extern "C" void spocon_(const char *uplo, const int *n, const float *A,
174  const int *lda, const float *anorm, float *rcond,
175  float *work, int *iwork, int *info);
176 extern "C" void sstevx_(const char *jobz, const char *range, const int *n,
177  float *d, float *e, const float *vl,
178  const float *vu, const int *il, const int *iu,
179  const float *abstol, int *m, float *w, float *z,
180  const int *ldz, float *work, int *iwork, int *ifail,
181  int *info);
182 extern "C" void sstevr_(const char *jobz, const char *range, const int *n,
183  float *d, float *e, const float *vl,
184  const float *vu, const int *il, const int *iu,
185  const float *abstol, int *m, float *w, float *z,
186  const int *ldz, int* isuppz, float *work, int* lwork,
187  int *iwork, int* liwork, int *info);
188 extern "C" void ssyev_(const char *jobz, const char *uplo, const int *n,
189  float *a, const int *lda, float *w, float *work,
190  const int *lwork, int *info);
191 
192 /* LEVEL 2 */
193 extern "C" void sgemv_(const char *ta, const int *m, const int *n,
194  const float *alpha, const float *A, const int *lda,
195  const float *x, const int *incx, const float *beta,
196  float *y, const int *incy);
197 extern "C" void ssymv_(const char *uplo, const int *n,
198  const float *alpha, const float *A, const int *lda,
199  const float *x, const int *incx, const float *beta,
200  float *y, const int *incy);
201 extern "C" void strmv_(const char *uplo, const char *trans, const char *diag,
202  const int *n, const float *A, const int *lda,
203  float *x, const int *incx);
204 /* LEVEL 1 */
205 extern "C" void sscal_(const int* n,const float* da, float* dx,
206  const int* incx);
207 #if 0
208 // sdot_ is unreliable because of varying return type in different
209 // implementations. We therefore always use template dot for single precision
210 extern "C" double sdot_(const int* n, const float* dx, const int* incx,
211  const float* dy, const int* incy);
212 #endif
213 extern "C" void saxpy_(const int* n, const float* da, const float* dx,
214  const int* incx, float* dy,const int* incy);
215 
216 namespace mat
217 {
218  struct Gblas {
219  static float time;
220  static bool timekeeping;
221  };
222 
223  /*************** Default version throws exception */
224  template<class T>
225  inline static void gemm(const char *ta,const char *tb,
226  const int *n, const int *k, const int *l,
227  const T *alpha,const T *A,const int *lda,
228  const T *B, const int *ldb,
229  const T *beta,T *C, const int *ldc) {
230  template_blas_gemm(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
231  }
232 
233 
234  /* Computes the Cholesky factorization of a symmetric *
235  * positive definite matrix in packed storage. */
236  template<class T>
237  inline static void pptrf(const char *uplo,const int *n, T* ap, int *info) {
238  template_lapack_pptrf(uplo,n,ap,info);
239  }
240 
241  template<class T>
242  inline static void spgst(const int *itype, const char *uplo,const int *n,
243  T* ap,const T *bp,int *info) {
244  template_lapack_spgst(itype,uplo,n,ap,bp,info);
245  }
246 
247  /* Computes the inverse of a triangular matrix in packed storage. */
248  template<class T>
249  inline static void tptri(const char *uplo,const char *diag,const int *n,
250  T* ap,int *info) {
251  template_lapack_tptri(uplo,diag,n,ap,info);
252  }
253 
254  template<class T>
255  inline static void trmm(const char *side,const char *uplo,
256  const char *transa, const char *diag,
257  const int *m,const int *n,
258  const T *alpha,const T *A,const int *lda,
259  T *B,const int *ldb) {
260  template_blas_trmm(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
261  }
262 
263  /* Computes all eigenvalues and the eigenvectors of a generalized *
264  * symmetric-definite generalized eigenproblem, *
265  * Ax= lambda Bx, ABx= lambda x, or BAx= lambda x. */
266  template<class T>
267  inline static void sygv(const int *itype,const char *jobz,
268  const char *uplo,const int *n,
269  T *A,const int *lda,T *B,const int *ldb,
270  T* w,T* work,const int *lwork,int *info) {
271  template_lapack_sygv(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
272  }
273 
274  template<class T>
275  inline static void ggev(const char *jobbl, const char *jobvr,
276  const int *n, T *A, const int *lda,
277  T *B, const int *ldb, T *alphar,
278  T *alphai, T *beta, T *vl,
279  const int *ldvl, T *vr, const int *ldvr,
280  T *work, const int *lwork, int *info) {
281  template_lapack_ggev(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
282  ldvl, vr, ldvr, work, lwork, info);
283  }
284 
285  /* Computes the Cholesky factorization of a symmetric *
286  * positive definite matrix in packed storage. */
287  template<class T>
288  inline static void potrf(const char *uplo, const int *n, T *A,
289  const int *lda, int *info) {
290  template_lapack_potrf(uplo, n, A, lda, info);
291  }
292 
293  /* Computes the inverse of a triangular matrix. */
294  template<class T>
295  inline static void trtri(const char *uplo,const char *diag,const int *n,
296  T *A, const int *lda, int *info) {
297  // Create copies of strings because they cannot be const inside trtri.
298  char uploCopy[2];
299  char diagCopy[2];
300  uploCopy[0] = uplo[0];
301  uploCopy[1] = '\0';
302  diagCopy[0] = diag[0];
303  diagCopy[1] = '\0';
304  template_lapack_trtri(uploCopy, diagCopy, n, A, lda, info);
305  }
306 
307  template<class T>
308  inline static void syrk(const char *uplo, const char *trans, const int *n,
309  const int *k, const T *alpha, const T *A,
310  const int *lda, const T *beta,
311  T *C, const int *ldc) {
312  template_blas_syrk(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
313  }
314 
315  template<class T>
316  inline static void symm(const char *side,const char *uplo,
317  const int *m,const int *n,
318  const T *alpha,const T *A,const int *lda,
319  const T *B,const int *ldb, const T* beta,
320  T *C,const int *ldc) {
321  template_blas_symm(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
322  }
323 
324  template<class T>
325  inline static void pocon(const char *uplo, const int *n, const T *A,
326  const int *lda, const T *anorm, T *rcond,
327  T *work, int *iwork, int *info) {
328  template_lapack_pocon(uplo, n, A, lda, anorm, rcond, work, iwork, info);
329  }
330 
331  template<class T>
332  inline static void stevx(const char *jobz, const char *range,
333  const int *n, T *d, T *e, const T *vl,
334  const T *vu, const int *il, const int *iu,
335  const T *abstol, int *m, T *w, T *z,
336  const int *ldz, T *work, int *iwork, int *ifail,
337  int *info) {
338  template_lapack_stevx(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
339  work, iwork, ifail, info);
340  }
341 
342  template<class T>
343  inline static void stevr(const char *jobz, const char *range, const int *n,
344  T *d, T *e, const T *vl,
345  const T *vu, const int *il, const int *iu,
346  const T *abstol, int *m, T *w, T *z,
347  const int *ldz, int* isuppz, T *work, int* lwork,
348  int *iwork, int* liwork, int *info) {
349  template_lapack_stevr(jobz, range, n, d, e, vl, vu, il, iu, abstol,
350  m, w, z, ldz, isuppz,
351  work, lwork, iwork, liwork, info);
352  }
353 
354 
355  template<class T>
356  inline static void syev(const char *jobz, const char *uplo, const int *n,
357  T *a, const int *lda, T *w, T *work,
358  const int *lwork, int *info) {
359  template_lapack_syev(jobz, uplo, n, a, lda, w, work, lwork, info);
360  }
361 
362 
363  /* LEVEL 2 */
364  template<class T>
365  inline static void gemv(const char *ta, const int *m, const int *n,
366  const T *alpha, const T *A,
367  const int *lda,
368  const T *x, const int *incx,
369  const T *beta, T *y, const int *incy) {
370  template_blas_gemv(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
371  }
372 
373  template<class T>
374  inline static void symv(const char *uplo, const int *n,
375  const T *alpha, const T *A,
376  const int *lda, const T *x,
377  const int *incx, const T *beta,
378  T *y, const int *incy) {
379  template_blas_symv(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
380  }
381 
382  template<class T>
383  inline static void trmv(const char *uplo, const char *trans,
384  const char *diag, const int *n,
385  const T *A, const int *lda,
386  T *x, const int *incx) {
387  template_blas_trmv(uplo, trans, diag, n, A, lda, x, incx);
388  }
389 
390 
391  /* LEVEL 1 */
392  template<class T>
393  inline static void scal(const int* n,const T* da, T* dx,
394  const int* incx) {
395  template_blas_scal(n, da, dx, incx);
396  }
397 
398  template<class T>
399  inline static T dot(const int* n, const T* dx, const int* incx,
400  const T* dy, const int* incy) {
401  return template_blas_dot(n, dx, incx, dy, incy);
402  }
403 
404  template<class T>
405  inline static void axpy(const int* n, const T* da, const T* dx,
406  const int* incx, T* dy,const int* incy) {
407  template_blas_axpy(n, da, dx, incx, dy, incy);
408  }
409 
410 
411 
412 
413  /* Below follows specializations for double, single, etc.
414  These specializations are not needed if template_blas and template_lapack are used,
415  so in that case we skip this entire section. */
416 #ifndef USE_LINALG_TEMPLATES
417 
418 
419  /*************** Double specialization */
420  template<>
421  inline void gemm<double>(const char *ta,const char *tb,
422  const int *n, const int *k, const int *l,
423  const double *alpha,
424  const double *A,const int *lda,
425  const double *B, const int *ldb,
426  const double *beta,
427  double *C, const int *ldc) {
428  if (Gblas::timekeeping) {
429  clock_t start = clock();
430  dgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
431  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
432  }
433  else {
434  dgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
435  }
436  }
437 
438  template<>
439  inline void pptrf<double>(const char *uplo,const int *n,
440  double* ap, int *info) {
441  if (Gblas::timekeeping) {
442  clock_t start = clock();
443  dpptrf_(uplo,n,ap,info);
444  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
445  }
446  else {
447  dpptrf_(uplo,n,ap,info);
448  }
449  }
450 
451  template<>
452  inline void spgst<double>(const int *itype, const char *uplo,
453  const int *n,
454  double* ap,const double *bp,int *info) {
455  if (Gblas::timekeeping) {
456  clock_t start = clock();
457  dspgst_(itype,uplo,n,ap,bp,info);
458  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
459  }
460  else {
461  dspgst_(itype,uplo,n,ap,bp,info);
462  }
463  }
464 
465  template<>
466  inline void tptri<double>(const char *uplo,const char *diag,const int *n,
467  double* ap,int *info) {
468  if (Gblas::timekeeping) {
469  clock_t start = clock();
470  dtptri_(uplo,diag,n,ap,info);
471  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
472  }
473  else {
474  dtptri_(uplo,diag,n,ap,info);
475  }
476  }
477 
478  template<>
479  inline void trmm<double>(const char *side,const char *uplo,
480  const char *transa,
481  const char *diag,const int *m,const int *n,
482  const double *alpha,
483  const double *A,const int *lda,
484  double *B,const int *ldb) {
485  if (Gblas::timekeeping) {
486  clock_t start = clock();
487  dtrmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
488  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
489  }
490  else {
491  dtrmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
492  }
493  }
494 
495  template<>
496  inline void sygv<double>(const int *itype,const char *jobz,
497  const char *uplo,const int *n,
498  double *A,const int *lda,
499  double *B,const int *ldb,
500  double* w,double* work,
501  const int *lwork,int *info) {
502  if (Gblas::timekeeping) {
503  clock_t start = clock();
504  dsygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
505  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
506  }
507  else {
508  dsygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
509  }
510  }
511 
512  template<>
513  inline void ggev<double>(const char *jobbl, const char *jobvr,
514  const int *n, double *A, const int *lda,
515  double *B, const int *ldb, double *alphar,
516  double *alphai, double *beta, double *vl,
517  const int *ldvl, double *vr, const int *ldvr,
518  double *work, const int *lwork, int *info) {
519  if (Gblas::timekeeping) {
520  clock_t start = clock();
521  dggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
522  ldvl, vr, ldvr, work, lwork, info);
523  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
524  }
525  else {
526  dggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
527  ldvl, vr, ldvr, work, lwork, info);
528  }
529  }
530 
531 
532  template<>
533  inline void potrf<double>(const char *uplo, const int *n, double *A,
534  const int *lda, int *info) {
535  if (Gblas::timekeeping) {
536  clock_t start = clock();
537  dpotrf_(uplo, n, A, lda, info);
538  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
539  }
540  else {
541  dpotrf_(uplo, n, A, lda, info);
542  }
543  }
544 
545  template<>
546  inline void trtri<double>(const char *uplo,const char *diag,const int *n,
547  double *A, const int *lda, int *info) {
548  if (Gblas::timekeeping) {
549  clock_t start = clock();
550  dtrtri_(uplo, diag, n, A, lda, info);
551  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
552  }
553  else {
554  dtrtri_(uplo, diag, n, A, lda, info);
555  }
556  }
557 
558  template<>
559  inline void syrk<double>(const char *uplo, const char *trans,
560  const int *n, const int *k, const double *alpha,
561  const double *A, const int *lda,
562  const double *beta, double *C, const int *ldc) {
563  if (Gblas::timekeeping) {
564  clock_t start = clock();
565  dsyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
566  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
567  }
568  else {
569  dsyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
570  }
571  }
572 
573  template<>
574  inline void symm<double>(const char *side,const char *uplo,
575  const int *m,const int *n, const double *alpha,
576  const double *A,const int *lda,
577  const double *B,const int *ldb,
578  const double* beta,
579  double *C,const int *ldc) {
580  if (Gblas::timekeeping) {
581  clock_t start = clock();
582  dsymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
583  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
584  }
585  else {
586  dsymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
587  }
588  }
589 
590  template<>
591  inline void pocon<double>(const char *uplo, const int *n,
592  const double *A, const int *lda,
593  const double *anorm, double *rcond,
594  double *work, int *iwork, int *info) {
595  if (Gblas::timekeeping) {
596  clock_t start = clock();
597  dpocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
598  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
599  }
600  else {
601  dpocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
602  }
603  }
604 
605  template<>
606  inline void stevx<double>(const char *jobz, const char *range,
607  const int *n, double *d, double *e,
608  const double *vl,
609  const double *vu, const int *il, const int *iu,
610  const double *abstol, int *m, double *w,
611  double *z,
612  const int *ldz, double *work, int *iwork,
613  int *ifail, int *info) {
614  if (Gblas::timekeeping) {
615  clock_t start = clock();
616  dstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
617  work, iwork, ifail, info);
618  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
619  }
620  else {
621  dstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
622  work, iwork, ifail, info);
623  }
624  }
625 
626  template<>
627  inline void stevr<double>(const char *jobz, const char *range,
628  const int *n, double *d, double *e,
629  const double *vl, const double *vu,
630  const int *il, const int *iu,
631  const double *abstol,
632  int *m, double *w,
633  double *z, const int *ldz, int* isuppz,
634  double *work, int* lwork,
635  int *iwork, int* liwork, int *info) {
636  if (Gblas::timekeeping) {
637  clock_t start = clock();
638  dstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol,
639  m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
640  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
641  }
642  else {
643  dstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol,
644  m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
645  }
646  }
647 
648 
649 
650  template<>
651  inline void syev<double>(const char *jobz, const char *uplo, const int *n,
652  double *a, const int *lda, double *w,
653  double *work, const int *lwork, int *info) {
654  if (Gblas::timekeeping) {
655  clock_t start = clock();
656  dsyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
657  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
658  }
659  else {
660  dsyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
661  }
662  }
663 
664 
665  /* LEVEL 2 */
666  template<>
667  inline void gemv<double>(const char *ta, const int *m, const int *n,
668  const double *alpha, const double *A,
669  const int *lda,
670  const double *x, const int *incx,
671  const double *beta, double *y, const int *incy) {
672  if (Gblas::timekeeping) {
673  clock_t start = clock();
674  dgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
675  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
676  }
677  else {
678  dgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
679  }
680  }
681 
682  template<>
683  inline void symv<double>(const char *uplo, const int *n,
684  const double *alpha, const double *A,
685  const int *lda, const double *x,
686  const int *incx, const double *beta,
687  double *y, const int *incy) {
688  if (Gblas::timekeeping) {
689  clock_t start = clock();
690  dsymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
691  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
692  }
693  else {
694  dsymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
695  }
696  }
697 
698  template<>
699  inline void trmv<double>(const char *uplo, const char *trans,
700  const char *diag, const int *n,
701  const double *A, const int *lda,
702  double *x, const int *incx) {
703  if (Gblas::timekeeping) {
704  clock_t start = clock();
705  dtrmv_(uplo, trans, diag, n, A, lda, x, incx);
706  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
707  }
708  else {
709  dtrmv_(uplo, trans, diag, n, A, lda, x, incx);
710  }
711  }
712 
713 
714  /* LEVEL 1 */
715  template<>
716  inline void scal<double>(const int* n,const double* da, double* dx,
717  const int* incx) {
718  if (Gblas::timekeeping) {
719  clock_t start = clock();
720  dscal_(n, da, dx, incx);
721  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
722  }
723  else {
724  dscal_(n, da, dx, incx);
725  }
726  }
727 
728  template<>
729  inline double dot<double>(const int* n, const double* dx, const int* incx,
730  const double* dy, const int* incy) {
731  double tmp = 0;
732  if (Gblas::timekeeping) {
733  clock_t start = clock();
734  tmp = ddot_(n, dx, incx, dy, incy);
735  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
736  }
737  else {
738  tmp = ddot_(n, dx, incx, dy, incy);
739  }
740  return tmp;
741  }
742 
743  template<>
744  inline void axpy<double>(const int* n, const double* da, const double* dx,
745  const int* incx, double* dy,const int* incy) {
746  if (Gblas::timekeeping) {
747  clock_t start = clock();
748  daxpy_(n, da, dx, incx, dy, incy);
749  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
750  }
751  else {
752  daxpy_(n, da, dx, incx, dy, incy);
753  }
754  }
755 
756 
757  /*************** Single specialization */
758  template<>
759  inline void gemm<float>(const char *ta,const char *tb,
760  const int *n, const int *k, const int *l,
761  const float *alpha,
762  const float *A,const int *lda,
763  const float *B, const int *ldb,
764  const float *beta,
765  float *C, const int *ldc) {
766  if (Gblas::timekeeping) {
767  clock_t start = clock();
768  sgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
769  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
770  }
771  else {
772  sgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
773  }
774  }
775 
776  template<>
777  inline void pptrf<float>(const char *uplo,const int *n,
778  float* ap, int *info) {
779  if (Gblas::timekeeping) {
780  clock_t start = clock();
781  spptrf_(uplo,n,ap,info);
782  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
783  }
784  else {
785  spptrf_(uplo,n,ap,info);
786  }
787  }
788 
789  template<>
790  inline void spgst<float>(const int *itype, const char *uplo,
791  const int *n,
792  float* ap,const float *bp,int *info) {
793  if (Gblas::timekeeping) {
794  clock_t start = clock();
795  sspgst_(itype,uplo,n,ap,bp,info);
796  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
797  }
798  else {
799  sspgst_(itype,uplo,n,ap,bp,info);
800  }
801  }
802 
803  template<>
804  inline void tptri<float>(const char *uplo,const char *diag,
805  const int *n,
806  float* ap,int *info) {
807  if (Gblas::timekeeping) {
808  clock_t start = clock();
809  stptri_(uplo,diag,n,ap,info);
810  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
811  }
812  else {
813  stptri_(uplo,diag,n,ap,info);
814  }
815  }
816 
817  template<>
818  inline void trmm<float>(const char *side,const char *uplo,
819  const char *transa,
820  const char *diag,const int *m,const int *n,
821  const float *alpha,
822  const float *A,const int *lda,
823  float *B,const int *ldb) {
824  if (Gblas::timekeeping) {
825  clock_t start = clock();
826  strmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
827  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
828  }
829  else {
830  strmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
831  }
832  }
833 
834  template<>
835  inline void sygv<float>(const int *itype,const char *jobz,
836  const char *uplo,const int *n,
837  float *A,const int *lda,
838  float *B,const int *ldb,
839  float* w,float* work,
840  const int *lwork,int *info) {
841  if (Gblas::timekeeping) {
842  clock_t start = clock();
843  ssygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
844  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
845  }
846  else {
847  ssygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
848  }
849  }
850 
851  template<>
852  inline void ggev<float>(const char *jobbl, const char *jobvr,
853  const int *n, float *A, const int *lda,
854  float *B, const int *ldb, float *alphar,
855  float *alphai, float *beta, float *vl,
856  const int *ldvl, float *vr, const int *ldvr,
857  float *work, const int *lwork, int *info) {
858  if (Gblas::timekeeping) {
859  clock_t start = clock();
860  sggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
861  ldvl, vr, ldvr, work, lwork, info);
862  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
863  }
864  else {
865  sggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
866  ldvl, vr, ldvr, work, lwork, info);
867  }
868  }
869 
870 
871  template<>
872  inline void potrf<float>(const char *uplo, const int *n, float *A,
873  const int *lda, int *info) {
874  if (Gblas::timekeeping) {
875  clock_t start = clock();
876  spotrf_(uplo, n, A, lda, info);
877  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
878  }
879  else {
880  spotrf_(uplo, n, A, lda, info);
881  }
882  }
883 
884  template<>
885  inline void trtri<float>(const char *uplo,const char *diag,const int *n,
886  float *A, const int *lda, int *info) {
887  if (Gblas::timekeeping) {
888  clock_t start = clock();
889  strtri_(uplo, diag, n, A, lda, info);
890  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
891  }
892  else {
893  strtri_(uplo, diag, n, A, lda, info);
894  }
895  }
896 
897  template<>
898  inline void syrk<float>(const char *uplo, const char *trans,
899  const int *n, const int *k, const float *alpha,
900  const float *A, const int *lda,
901  const float *beta, float *C, const int *ldc) {
902  if (Gblas::timekeeping) {
903  clock_t start = clock();
904  ssyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
905  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
906  }
907  else {
908  ssyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
909  }
910  }
911 
912  template<>
913  inline void symm<float>(const char *side,const char *uplo,
914  const int *m,const int *n, const float *alpha,
915  const float *A,const int *lda,
916  const float *B,const int *ldb,
917  const float* beta,
918  float *C,const int *ldc) {
919  if (Gblas::timekeeping) {
920  clock_t start = clock();
921  ssymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
922  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
923  }
924  else {
925  ssymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
926  }
927  }
928 
929  template<>
930  inline void pocon<float>(const char *uplo, const int *n,
931  const float *A, const int *lda,
932  const float *anorm, float *rcond,
933  float *work, int *iwork, int *info) {
934  if (Gblas::timekeeping) {
935  clock_t start = clock();
936  spocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
937  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
938  }
939  else {
940  spocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
941  }
942  }
943 
944  template<>
945  inline void stevx<float>(const char *jobz, const char *range,
946  const int *n, float *d, float *e,
947  const float *vl,
948  const float *vu, const int *il, const int *iu,
949  const float *abstol, int *m, float *w,
950  float *z,
951  const int *ldz, float *work, int *iwork,
952  int *ifail, int *info) {
953  if (Gblas::timekeeping) {
954  clock_t start = clock();
955  sstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
956  work, iwork, ifail, info);
957  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
958  }
959  else {
960  sstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
961  work, iwork, ifail, info);
962  }
963  }
964 
965  template<>
966  inline void stevr<float>(const char *jobz, const char *range,
967  const int *n, float *d, float *e,
968  const float *vl, const float *vu,
969  const int *il, const int *iu,
970  const float *abstol,
971  int *m, float *w,
972  float *z, const int *ldz, int* isuppz,
973  float *work, int* lwork,
974  int *iwork, int* liwork, int *info) {
975  if (Gblas::timekeeping) {
976  clock_t start = clock();
977  sstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol,
978  m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
979  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
980  }
981  else {
982  sstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol,
983  m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
984  }
985  }
986 
987  template<>
988  inline void syev<float>(const char *jobz, const char *uplo, const int *n,
989  float *a, const int *lda, float *w,
990  float *work, const int *lwork, int *info) {
991  if (Gblas::timekeeping) {
992  clock_t start = clock();
993  ssyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
994  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
995  }
996  else {
997  ssyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
998  }
999  }
1000 
1001 
1002  /* LEVEL 2 */
1003  template<>
1004  inline void gemv<float>(const char *ta, const int *m, const int *n,
1005  const float *alpha, const float *A,
1006  const int *lda,
1007  const float *x, const int *incx,
1008  const float *beta, float *y, const int *incy) {
1009  if (Gblas::timekeeping) {
1010  clock_t start = clock();
1011  sgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
1012  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1013  }
1014  else {
1015  sgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
1016  }
1017  }
1018 
1019  template<>
1020  inline void symv<float>(const char *uplo, const int *n,
1021  const float *alpha, const float *A,
1022  const int *lda, const float *x,
1023  const int *incx, const float *beta,
1024  float *y, const int *incy) {
1025  if (Gblas::timekeeping) {
1026  clock_t start = clock();
1027  ssymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
1028  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1029  }
1030  else {
1031  ssymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
1032  }
1033  }
1034 
1035  template<>
1036  inline void trmv<float>(const char *uplo, const char *trans,
1037  const char *diag, const int *n,
1038  const float *A, const int *lda,
1039  float *x, const int *incx) {
1040  if (Gblas::timekeeping) {
1041  clock_t start = clock();
1042  strmv_(uplo, trans, diag, n, A, lda, x, incx);
1043  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1044  }
1045  else {
1046  strmv_(uplo, trans, diag, n, A, lda, x, incx);
1047  }
1048  }
1049 
1050  /* LEVEL 1 */
1051  template<>
1052  inline void scal<float>(const int* n,const float* da, float* dx,
1053  const int* incx) {
1054  if (Gblas::timekeeping) {
1055  clock_t start = clock();
1056  sscal_(n, da, dx, incx);
1057  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1058  }
1059  else {
1060  sscal_(n, da, dx, incx);
1061  }
1062  }
1063 #if 0
1064  // Sdot has different return type in different BLAS implementations
1065  // Therefore the specialization for single precision is removed
1066  template<>
1067  inline float dot<float>(const int* n, const float* dx, const int* incx,
1068  const float* dy, const int* incy) {
1069  float tmp;
1070  if (Gblas::timekeeping) {
1071  clock_t start = clock();
1072  sdot_(n, dx, incx, dy, incy);
1073  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1074  }
1075  else {
1076  sdot_(n, dx, incx, dy, incy);
1077  }
1078  return tmp;
1079  }
1080 #endif
1081 
1082  template<>
1083  inline void axpy<float>(const int* n, const float* da, const float* dx,
1084  const int* incx, float* dy,const int* incy) {
1085  if (Gblas::timekeeping) {
1086  clock_t start = clock();
1087  saxpy_(n, da, dx, incx, dy, incy);
1088  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1089  }
1090  else {
1091  saxpy_(n, da, dx, incx, dy, incy);
1092  }
1093  }
1094 
1095  /* END OF SPECIALIZATIONS */
1096 #endif
1097 
1098 
1099 
1100 
1101 
1102 
1103 
1104 
1105 
1106  /* Other */
1107 
1108  template<class Treal>
1109  static void fulltopacked(const Treal* full, Treal* packed, const int size){
1110  int pind=0;
1111  for (int col=0;col<size;col++)
1112  {
1113  for(int row=0;row<=col;row++)
1114  {
1115  packed[pind]=full[col*size+row];
1116  pind++;
1117  }
1118  }
1119  }
1120 
1121  template<class Treal>
1122  static void packedtofull(const Treal* packed, Treal* full, const int size){
1123  int psize=(size+1)*size/2;
1124  int col=0;
1125  int row=0;
1126  for(int pind=0;pind<psize;pind++)
1127  {
1128  if (col==row)
1129  {
1130  full[col*size+row]=packed[pind];
1131  col++;
1132  row=0;
1133  }
1134  else
1135  {
1136  full[col*size+row]=packed[pind];
1137  full[row*size+col]=packed[pind];
1138  row++;
1139  }
1140  }
1141  }
1142 
1143  template<class Treal>
1144  static void tripackedtofull(const Treal* packed,Treal* full,
1145  const int size) {
1146  int psize=(size+1)*size/2;
1147  int col=0;
1148  int row=0;
1149  for(int pind=0;pind<psize;pind++)
1150  {
1151  if (col==row)
1152  {
1153  full[col*size+row]=packed[pind];
1154  col++;
1155  row=0;
1156  }
1157  else
1158  {
1159  full[col*size+row]=packed[pind];
1160  full[row*size+col]=0;
1161  row++;
1162  }
1163  }
1164  }
1165 
1166  template<class Treal>
1167  static void trifulltofull(Treal* trifull, const int size) {
1168  for(int col = 0; col < size - 1; col++)
1169  for(int row = col + 1; row < size; row++)
1170  trifull[col * size + row] = 0;
1171  }
1172 
1173 
1174 } /* namespace mat */
1175 
1176 #endif /* GBLAS */