NFFT  3.3.0
nfct.c
1 /*
2  * Copyright (c) 2002, 2015 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 /* $Id$ */
20 
21 /* Nonequispaced fast cosine transform */
22 
23 /* Author: Steffen Klatt 2004-2006, Jens Keiner 2010 */
24 
25 /* configure header */
26 #include "config.h"
27 
28 /* complex datatype (maybe) */
29 #ifdef HAVE_COMPLEX_H
30 #include<complex.h>
31 #endif
32 
33 /* NFFT headers */
34 #include "nfft3.h"
35 #include "infft.h"
36 
37 #ifdef _OPENMP
38 #include <omp.h>
39 #endif
40 
41 #ifdef OMP_ASSERT
42 #include <assert.h>
43 #endif
44 
45 #undef X
46 #define X(name) NFCT(name)
47 
49 static inline INT intprod(const INT *vec, const INT a, const INT d)
50 {
51  INT t, p;
52 
53  p = 1;
54  for (t = 0; t < d; t++)
55  p *= vec[t] - a;
56 
57  return p;
58 }
59 
60 /* handy shortcuts */
61 #define BASE(x) COS(x)
62 #define NN(x) (x - 1)
63 #define OFFSET 0
64 #define FOURIER_TRAFO FFTW_REDFT00
65 #define FFTW_DEFAULT_FLAGS FFTW_ESTIMATE | FFTW_DESTROY_INPUT
66 
67 #define NODE(p,r) (ths->x[(p) * ths->d + (r)])
68 
69 #define MACRO_with_FG_PSI fg_psi[t][lj[t]]
70 #define MACRO_with_PRE_PSI ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj[t]]
71 #define MACRO_without_PRE_PSI PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + t]) \
72  - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t)
73 #define MACRO_compute_PSI PHI((2 * NN(ths->n[t])), (NODE(j,t) - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t)
74 
90 void X(trafo_direct)(const X(plan) *ths)
91 {
92  R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f;
93 
94  memset(f, 0, (size_t)(ths->M_total) * sizeof(R));
95 
96  if (ths->d == 1)
97  {
98  /* specialize for univariate case, rationale: faster */
99  INT j;
100 #ifdef _OPENMP
101  #pragma omp parallel for default(shared) private(j)
102 #endif
103  for (j = 0; j < ths->M_total; j++)
104  {
105  INT k_L;
106  for (k_L = 0; k_L < ths->N_total; k_L++)
107  {
108  R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
109  f[j] += f_hat[k_L] * BASE(omega);
110  }
111  }
112  }
113  else
114  {
115  /* multivariate case */
116  INT j;
117 #ifdef _OPENMP
118  #pragma omp parallel for default(shared) private(j)
119 #endif
120  for (j = 0; j < ths->M_total; j++)
121  {
122  R x[ths->d], omega, Omega[ths->d + 1];
123  INT t, t2, k_L, k[ths->d];
124  Omega[0] = K(1.0);
125  for (t = 0; t < ths->d; t++)
126  {
127  k[t] = OFFSET;
128  x[t] = K2PI * ths->x[j * ths->d + t];
129  Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t];
130  }
131  omega = Omega[ths->d];
132 
133  for (k_L = 0; k_L < ths->N_total; k_L++)
134  {
135  f[j] += f_hat[k_L] * omega;
136  {
137  for (t = ths->d - 1; (t >= 1) && (k[t] == (ths->N[t] - 1)); t--)
138  k[t] = OFFSET;
139 
140  k[t]++;
141 
142  for (t2 = t; t2 < ths->d; t2++)
143  Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2];
144 
145  omega = Omega[ths->d];
146  }
147  }
148  }
149  }
150 }
151 
152 void X(adjoint_direct)(const X(plan) *ths)
153 {
154  R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f;
155 
156  memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R));
157 
158  if (ths->d == 1)
159  {
160  /* specialize for univariate case, rationale: faster */
161 #ifdef _OPENMP
162  INT k_L;
163  #pragma omp parallel for default(shared) private(k_L)
164  for (k_L = 0; k_L < ths->N_total; k_L++)
165  {
166  INT j;
167  for (j = 0; j < ths->M_total; j++)
168  {
169  R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
170  f_hat[k_L] += f[j] * BASE(omega);
171  }
172  }
173 #else
174  INT j;
175  for (j = 0; j < ths->M_total; j++)
176  {
177  INT k_L;
178  for (k_L = 0; k_L < ths->N_total; k_L++)
179  {
180  R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
181  f_hat[k_L] += f[j] * BASE(omega);
182  }
183  }
184 #endif
185  }
186  else
187  {
188  /* multivariate case */
189  INT j, k_L;
190 #ifdef _OPENMP
191  #pragma omp parallel for default(shared) private(j, k_L)
192  for (k_L = 0; k_L < ths->N_total; k_L++)
193  {
194  INT k[ths->d], k_temp, t;
195 
196  k_temp = k_L;
197 
198  for (t = ths->d - 1; t >= 0; t--)
199  {
200  k[t] = k_temp % ths->N[t];
201  k_temp /= ths->N[t];
202  }
203 
204  for (j = 0; j < ths->M_total; j++)
205  {
206  R omega = K(1.0);
207  for (t = 0; t < ths->d; t++)
208  omega *= BASE(K2PI * (k[t] + OFFSET) * ths->x[j * ths->d + t]);
209  f_hat[k_L] += f[j] * omega;
210  }
211  }
212 #else
213  for (j = 0; j < ths->M_total; j++)
214  {
215  R x[ths->d], omega, Omega[ths->d+1];
216  INT t, t2, k[ths->d];
217  Omega[0] = K(1.0);
218  for (t = 0; t < ths->d; t++)
219  {
220  k[t] = OFFSET;
221  x[t] = K2PI * ths->x[j * ths->d + t];
222  Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t];
223  }
224  omega = Omega[ths->d];
225  for (k_L = 0; k_L < ths->N_total; k_L++)
226  {
227  f_hat[k_L] += f[j] * omega;
228 
229  for (t = ths->d-1; (t >= 1) && (k[t] == ths->N[t] - 1); t--)
230  k[t] = OFFSET;
231 
232  k[t]++;
233 
234  for (t2 = t; t2 < ths->d; t2++)
235  Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2];
236 
237  omega = Omega[ths->d];
238  }
239  }
240 #endif
241  }
242 }
243 
263 static inline void uo(const X(plan) *ths, const INT j, INT *up, INT *op,
264  const INT act_dim)
265 {
266  const R xj = ths->x[j * ths->d + act_dim];
267  INT c = LRINT(xj * (2 * NN(ths->n[(act_dim)])));
268 
269  (*up) = c - (ths->m);
270  (*op) = c + 1 + (ths->m);
271 }
272 
273 #define MACRO_D_compute_A \
274 { \
275  g_hat[kg_plain[ths->d]] = f_hat[k_L] * c_phi_inv_k[ths->d]; \
276 }
277 
278 #define MACRO_D_compute_T \
279 { \
280  f_hat[k_L] = g_hat[kg_plain[ths->d]] * c_phi_inv_k[ths->d]; \
281 }
282 
283 #define MACRO_D_init_result_A memset(g_hat, 0, (size_t)(ths->n_total) * sizeof(R));
284 
285 #define MACRO_D_init_result_T memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R));
286 
287 #define MACRO_with_PRE_PHI_HUT ths->c_phi_inv[t][kg[t]]
288 
289 #define MACRO_compute_PHI_HUT_INV (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), kg[t] + OFFSET, t)))
290 
291 #define MACRO_init_k_ks \
292 { \
293  for (t = 0; t < ths->d; t++) \
294  { \
295  kg[t] = 0; \
296  } \
297  i = 0; \
298 }
299 
300 #define MACRO_update_c_phi_inv_k(what_kind, which_phi) \
301 { \
302  for (t = i; t < ths->d; t++) \
303  { \
304  MACRO_update_c_phi_inv_k_ ## what_kind(which_phi); \
305  kg_plain[t+1] = kg_plain[t] * ths->n[t] + kg[t]; \
306  } \
307 }
308 
309 #define MACRO_update_c_phi_inv_k_A(which_phi) \
310 { \
311  c_phi_inv_k[t+1] = (kg[t] == 0 ? K(1.0) : K(0.5)) * c_phi_inv_k[t] * MACRO_ ## which_phi; \
312 }
313 
314 #define MACRO_update_c_phi_inv_k_T(which_phi) \
315 { \
316  c_phi_inv_k[t+1] = c_phi_inv_k[t] * MACRO_ ## which_phi; \
317 }
318 
319 #define MACRO_count_k_ks \
320 { \
321  kg[ths->d - 1]++; \
322  i = ths->d - 1; \
323 \
324  while ((kg[i] == ths->N[i]) && (i > 0)) \
325  { \
326  kg[i - 1]++; \
327  kg[i] = 0; \
328  i--; \
329  } \
330 }
331 
332 /* sub routines for the fast transforms matrix vector multiplication with D, D^T */
333 #define MACRO_D(which_one) \
334 static inline void D_ ## which_one (X(plan) *ths) \
335 { \
336  R *g_hat, *f_hat; /* local copy */ \
337  R c_phi_inv_k[ths->d+1]; /* postfix product of PHI_HUT */ \
338  INT t; /* index dimensions */ \
339  INT i; \
340  INT k_L; /* plain index */ \
341  INT kg[ths->d]; /* multi index in g_hat */ \
342  INT kg_plain[ths->d+1]; /* postfix plain index */ \
343 \
344  f_hat = (R*)ths->f_hat; g_hat = (R*)ths->g_hat; \
345  MACRO_D_init_result_ ## which_one; \
346 \
347  c_phi_inv_k[0] = K(1.0); \
348  kg_plain[0] = 0; \
349 \
350  MACRO_init_k_ks; \
351 \
352  if (ths->flags & PRE_PHI_HUT) \
353  { \
354  for (k_L = 0; k_L < ths->N_total; k_L++) \
355  { \
356  MACRO_update_c_phi_inv_k(which_one, with_PRE_PHI_HUT); \
357  MACRO_D_compute_ ## which_one; \
358  MACRO_count_k_ks; \
359  } \
360  } \
361  else \
362  { \
363  for (k_L = 0; k_L < ths->N_total; k_L++) \
364  { \
365  MACRO_update_c_phi_inv_k(which_one,compute_PHI_HUT_INV); \
366  MACRO_D_compute_ ## which_one; \
367  MACRO_count_k_ks; \
368  } \
369  } \
370 }
371 
372 MACRO_D(A)
373 MACRO_D(T)
374 
375 /* sub routines for the fast transforms matrix vector multiplication with B, B^T */
376 #define MACRO_B_init_result_A memset(f, 0, (size_t)(ths->M_total) * sizeof(R));
377 #define MACRO_B_init_result_T memset(g, 0, (size_t)(ths->n_total) * sizeof(R));
378 
379 #define MACRO_B_PRE_FULL_PSI_compute_A \
380 { \
381  (*fj) += ths->psi[ix] * g[ths->psi_index_g[ix]]; \
382 }
383 
384 #define MACRO_B_PRE_FULL_PSI_compute_T \
385 { \
386  R factor = K(1.0); \
387  INT d = ths->psi_index_g[ix]; \
388  for (t = ths->d - 1; t >= 0; t--) \
389  { \
390  INT m = d % ths->n[t]; \
391  if (m != 0 && m != ths->n[t] - 1) \
392  factor *= K(0.5); \
393  d = d / ths->n[t]; \
394  } \
395  g[ths->psi_index_g[ix]] += factor * ths->psi[ix] * (*fj); \
396 }
397 
398 #define MACRO_B_compute_A \
399 { \
400  (*fj) += phi_prod[ths->d] * g[ll_plain[ths->d]]; \
401 }
402 
403 #define MACRO_B_compute_T \
404 { \
405  g[ll_plain[ths->d]] += phi_prod[ths->d] * (*fj); \
406 }
407 
408 #define MACRO_init_uo_l_lj_t \
409 { \
410  for (t2 = 0; t2 < ths->d; t2++) \
411  { \
412  uo(ths, j, &u[t2], &o[t2], t2); \
413  \
414  /* determine index in g-array corresponding to u[(t2)] */ \
415  if (u[(t2)] < 0) \
416  lg_offset[(t2)] = \
417  (u[(t2)] % (2 * NN(ths->n[(t2)]))) + (2 * NN(ths->n[(t2)])); \
418  else \
419  lg_offset[(t2)] = u[(t2)] % (2 * NN(ths->n[(t2)])); \
420  if (lg_offset[(t2)] > NN(ths->n[(t2)])) \
421  lg_offset[(t2)] = -(2 * NN(ths->n[(t2)]) - lg_offset[(t2)]); \
422  \
423  if (lg_offset[t2] <= 0) \
424  { \
425  l[t2] = -lg_offset[t2]; \
426  count_lg[t2] = -1; \
427  } \
428  else \
429  { \
430  l[t2] = +lg_offset[t2]; \
431  count_lg[t2] = +1; \
432  } \
433  \
434  lj[t2] = 0; \
435  } \
436  t2 = 0; \
437 }
438 
439 #define FOO_A K(1.0)
440 
441 #define FOO_T ((l[t] == 0 || l[t] == ths->n[t] - 1) ? K(1.0) : K(0.5))
442 
443 #define MACRO_update_phi_prod_ll_plain(which_one,which_psi) \
444 { \
445  for (t = t2; t < ths->d; t++) \
446  { \
447  phi_prod[t+1] = (FOO_ ## which_one) * phi_prod[t] * (MACRO_ ## which_psi); \
448  ll_plain[t+1] = ll_plain[t] * ths->n[t] + l[t]; \
449  } \
450 }
451 
452 #define MACRO_count_uo_l_lj_t \
453 { \
454  /* turn around if we hit one of the boundaries */ \
455  if ((l[(ths->d-1)] == 0) || (l[(ths->d-1)] == NN(ths->n[(ths->d-1)]))) \
456  count_lg[(ths->d-1)] *= -1; \
457  \
458  /* move array index */ \
459  l[(ths->d-1)] += count_lg[(ths->d-1)]; \
460  \
461  lj[ths->d - 1]++; \
462  t2 = ths->d - 1; \
463  \
464  while ((lj[t2] == (2 * ths->m + 2)) && (t2 > 0)) \
465  { \
466  lj[t2 - 1]++; \
467  lj[t2] = 0; \
468  /* ansonsten lg[i-1] verschieben */ \
469  \
470  /* turn around if we hit one of the boundaries */ \
471  if ((l[(t2 - 1)] == 0) || (l[(t2 - 1)] == NN(ths->n[(t2 - 1)]))) \
472  count_lg[(t2 - 1)] *= -1; \
473  /* move array index */ \
474  l[(t2 - 1)] += count_lg[(t2 - 1)]; \
475  \
476  /* lg[i] = anfangswert */ \
477  if (lg_offset[t2] <= 0) \
478  { \
479  l[t2] = -lg_offset[t2]; \
480  count_lg[t2] = -1; \
481  } \
482  else \
483  { \
484  l[t2] = +lg_offset[t2]; \
485  count_lg[t2] = +1; \
486  } \
487  \
488  t2--; \
489  } \
490 }
491 
492 #define MACRO_B(which_one) \
493 static inline void B_ ## which_one (X(plan) *ths) \
494 { \
495  INT lprod; /* 'regular bandwidth' of matrix B */ \
496  INT u[ths->d], o[ths->d]; /* multi band with respect to x_j */ \
497  INT t, t2; /* index dimensions */ \
498  INT j; /* index nodes */ \
499  INT l_L, ix; /* index one row of B */ \
500  INT l[ths->d]; /* multi index u<=l<=o (real index of g in array) */ \
501  INT lj[ths->d]; /* multi index 0<=lc<2m+2 */ \
502  INT ll_plain[ths->d+1]; /* postfix plain index in g */ \
503  R phi_prod[ths->d+1]; /* postfix product of PHI */ \
504  R *f, *g; /* local copy */ \
505  R *fj; /* local copy */ \
506  R y[ths->d]; \
507  R fg_psi[ths->d][2*ths->m+2]; \
508  R fg_exp_l[ths->d][2*ths->m+2]; \
509  INT l_fg,lj_fg; \
510  R tmpEXP1, tmpEXP2, tmpEXP2sq, tmp1, tmp2, tmp3; \
511  R ip_w; \
512  INT ip_u; \
513  INT ip_s = ths->K/(ths->m+2); \
514  INT lg_offset[ths->d]; /* offset in g according to u */ \
515  INT count_lg[ths->d]; /* count summands (2m+2) */ \
516 \
517  f = (R*)ths->f; g = (R*)ths->g; \
518 \
519  MACRO_B_init_result_ ## which_one \
520 \
521  if (ths->flags & PRE_FULL_PSI) \
522  { \
523  for (ix = 0, j = 0, fj = f; j < ths->M_total; j++, fj++) \
524  { \
525  for (l_L = 0; l_L < ths->psi_index_f[j]; l_L++, ix++) \
526  { \
527  MACRO_B_PRE_FULL_PSI_compute_ ## which_one; \
528  } \
529  } \
530  return; \
531  } \
532 \
533  phi_prod[0] = K(1.0); \
534  ll_plain[0] = 0; \
535 \
536  for (t = 0, lprod = 1; t < ths->d; t++) \
537  lprod *= (2 * ths->m + 2); \
538 \
539  if (ths->flags & PRE_PSI) \
540  { \
541  for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
542  { \
543  MACRO_init_uo_l_lj_t; \
544  \
545  for (l_L = 0; l_L < lprod; l_L++) \
546  { \
547  MACRO_update_phi_prod_ll_plain(which_one, with_PRE_PSI); \
548  \
549  MACRO_B_compute_ ## which_one; \
550  \
551  MACRO_count_uo_l_lj_t; \
552  } /* for(l_L) */ \
553  } /* for(j) */ \
554  return; \
555  } /* if(PRE_PSI) */ \
556  \
557  if (ths->flags & PRE_FG_PSI) \
558  { \
559  for (t = 0; t < ths->d; t++) \
560  { \
561  tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \
562  tmpEXP2sq = tmpEXP2 * tmpEXP2; \
563  tmp2 = K(1.0); \
564  tmp3 = K(1.0); \
565  fg_exp_l[t][0] = K(1.0); \
566  \
567  for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \
568  { \
569  tmp3 = tmp2 * tmpEXP2; \
570  tmp2 *= tmpEXP2sq; \
571  fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \
572  } \
573  } \
574  \
575  for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
576  { \
577  MACRO_init_uo_l_lj_t; \
578  \
579  for (t = 0; t < ths->d; t++) \
580  { \
581  fg_psi[t][0] = ths->psi[2 * (j * ths->d + t)]; \
582  tmpEXP1 = ths->psi[2 * (j * ths->d + t) + 1]; \
583  tmp1 = K(1.0); \
584  \
585  for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \
586  { \
587  tmp1 *= tmpEXP1; \
588  fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \
589  } \
590  } \
591  \
592  for (l_L= 0; l_L < lprod; l_L++) \
593  { \
594  MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
595  \
596  MACRO_B_compute_ ## which_one; \
597  \
598  MACRO_count_uo_l_lj_t; \
599  } \
600  } \
601  return; \
602  } \
603  \
604  if (ths->flags & FG_PSI) \
605  { \
606  for (t = 0; t < ths->d; t++) \
607  { \
608  tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \
609  tmpEXP2sq = tmpEXP2 * tmpEXP2; \
610  tmp2 = K(1.0); \
611  tmp3 = K(1.0); \
612  fg_exp_l[t][0] = K(1.0); \
613  for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \
614  { \
615  tmp3 = tmp2 * tmpEXP2; \
616  tmp2 *= tmpEXP2sq; \
617  fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \
618  } \
619  } \
620  \
621  for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
622  { \
623  MACRO_init_uo_l_lj_t; \
624  \
625  for (t = 0; t < ths->d; t++) \
626  { \
627  fg_psi[t][0] = (PHI((2 * NN(ths->n[t])), (ths->x[j*ths->d+t] - ((R)u[t])/(2 * NN(ths->n[t]))),(t)));\
628  \
629  tmpEXP1 = EXP(K(2.0) * ((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u[t]) / ths->b[t]); \
630  tmp1 = K(1.0); \
631  for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \
632  { \
633  tmp1 *= tmpEXP1; \
634  fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \
635  } \
636  } \
637  \
638  for (l_L = 0; l_L < lprod; l_L++) \
639  { \
640  MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
641  \
642  MACRO_B_compute_ ## which_one; \
643  \
644  MACRO_count_uo_l_lj_t; \
645  } \
646  } \
647  return; \
648  } \
649  \
650  if (ths->flags & PRE_LIN_PSI) \
651  { \
652  for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
653  { \
654  MACRO_init_uo_l_lj_t; \
655  \
656  for (t = 0; t < ths->d; t++) \
657  { \
658  y[t] = (((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - (R)u[t]) \
659  * ((R)ths->K))/(ths->m + 2); \
660  ip_u = LRINT(FLOOR(y[t])); \
661  ip_w = y[t]-ip_u; \
662  for (l_fg = u[t], lj_fg = 0; l_fg <= o[t]; l_fg++, lj_fg++) \
663  { \
664  fg_psi[t][lj_fg] = ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s)] \
665  * (1-ip_w) + ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s+1)] \
666  * (ip_w); \
667  } \
668  } \
669  \
670  for (l_L = 0; l_L < lprod; l_L++) \
671  { \
672  MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
673  \
674  MACRO_B_compute_ ## which_one; \
675  \
676  MACRO_count_uo_l_lj_t; \
677  } /* for(l_L) */ \
678  } /* for(j) */ \
679  return; \
680  } /* if(PRE_LIN_PSI) */ \
681  \
682  /* no precomputed psi at all */ \
683  for (j = 0, fj = &f[0]; j < ths->M_total; j++, fj += 1) \
684  { \
685  MACRO_init_uo_l_lj_t; \
686  \
687  for (l_L = 0; l_L < lprod; l_L++) \
688  { \
689  MACRO_update_phi_prod_ll_plain(which_one, without_PRE_PSI); \
690  \
691  MACRO_B_compute_ ## which_one; \
692  \
693  MACRO_count_uo_l_lj_t; \
694  } /* for (l_L) */ \
695  } /* for (j) */ \
696 } /* B */
697 
698 MACRO_B(A)
699 MACRO_B(T)
700 
704 void X(trafo)(X(plan) *ths)
705 {
706  switch(ths->d)
707  {
708  default:
709  {
710  /* use ths->my_fftw_r2r_plan */
711  ths->g_hat = ths->g1;
712  ths->g = ths->g2;
713 
714  /* form \f$ \hat g_k = \frac{\hat f_k}{c_k\left(\phi\right)} \text{ for }
715  * k \in I_N \f$ */
716  TIC(0)
717  D_A(ths);
718  TOC(0)
719 
720  /* Compute by d-variate discrete Fourier transform
721  * \f$ g_l = \sum_{k \in I_N} \hat g_k {\rm e}^{-2\pi {\rm i} \frac{kl}{n}}
722  * \text{ for } l \in I_n \f$ */
723  TIC_FFTW(1)
724  FFTW(execute)(ths->my_fftw_r2r_plan);
725  TOC_FFTW(1)
726 
727  /*if (ths->flags & PRE_FULL_PSI)
728  full_psi__A(ths);*/
729 
730  /* Set \f$ f_j = \sum_{l \in I_n,m(x_j)} g_l \psi\left(x_j-\frac{l}{n}\right)
731  * \text{ for } j=0,\hdots,M-1 \f$ */
732  TIC(2)
733  B_A(ths);
734  TOC(2)
735 
736  /*if (ths->flags & PRE_FULL_PSI)
737  {
738  Y(free)(ths->psi_index_g);
739  Y(free)(ths->psi_index_f);
740  }*/
741  }
742  }
743 } /* trafo */
744 
745 void X(adjoint)(X(plan) *ths)
746 {
747  switch(ths->d)
748  {
749  default:
750  {
751  /* use ths->my_fftw_plan */
752  ths->g_hat = ths->g2;
753  ths->g = ths->g1;
754 
755  /*if (ths->flags & PRE_FULL_PSI)
756  full_psi__T(ths);*/
757 
758  /* Set \f$ g_l = \sum_{j=0}^{M-1} f_j \psi\left(x_j-\frac{l}{n}\right)
759  * \text{ for } l \in I_n,m(x_j) \f$ */
760  TIC(2)
761  B_T(ths);
762  TOC(2)
763 
764  /* Compute by d-variate discrete cosine transform
765  * \f$ \hat g_k = \sum_{l \in I_n} g_l {\rm e}^{-2\pi {\rm i} \frac{kl}{n}}
766  * \text{ for } k \in I_N\f$ */
767  TIC_FFTW(1)
768  FFTW(execute)(ths->my_fftw_r2r_plan);
769  TOC_FFTW(1)
770 
771  /* Form \f$ \hat f_k = \frac{\hat g_k}{c_k\left(\phi\right)} \text{ for }
772  * k \in I_N \f$ */
773  TIC(0)
774  D_T(ths);
775  TOC(0)
776  }
777  }
778 } /* adjoint */
779 
782 static inline void precompute_phi_hut(X(plan) *ths)
783 {
784  INT ks[ths->d]; /* index over all frequencies */
785  INT t; /* index over all dimensions */
786 
787  ths->c_phi_inv = (R**) Y(malloc)((size_t)(ths->d) * sizeof(R*));
788 
789  for (t = 0; t < ths->d; t++)
790  {
791  ths->c_phi_inv[t] = (R*)Y(malloc)((size_t)(ths->N[t] - OFFSET) * sizeof(R));
792 
793  for (ks[t] = 0; ks[t] < ths->N[t] - OFFSET; ks[t]++)
794  {
795  ths->c_phi_inv[t][ks[t]] = (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), ks[t] + OFFSET, t)));
796  }
797  }
798 } /* phi_hut */
799 
805 void X(precompute_lin_psi)(X(plan) *ths)
806 {
807  INT t;
808  INT j;
809  R step;
811  for (t = 0; t < ths->d; t++)
812  {
813  step = ((R)(ths->m+2)) / (((R)ths->K) * (2 * NN(ths->n[t])));
814 
815  for (j = 0; j <= ths->K; j++)
816  {
817  ths->psi[(ths->K + 1) * t + j] = PHI((2 * NN(ths->n[t])), (j * step), t);
818  } /* for(j) */
819  } /* for(t) */
820 }
821 
822 void X(precompute_fg_psi)(X(plan) *ths)
823 {
824  INT t; /* index over all dimensions */
825  INT u, o; /* depends on x_j */
826 
827 // sort(ths);
828 
829  for (t = 0; t < ths->d; t++)
830  {
831  INT j;
832 // #pragma omp parallel for default(shared) private(j,u,o)
833  for (j = 0; j < ths->M_total; j++)
834  {
835  uo(ths, j, &u, &o, t);
836 
837  ths->psi[2 * (j*ths->d + t)] = (PHI((2 * NN(ths->n[t])),(ths->x[j * ths->d + t] - ((R)u) / (2 * NN(ths->n[t]))),(t)));
838  ths->psi[2 * (j*ths->d + t) + 1] = EXP(K(2.0) * ( (2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u) / ths->b[t]);
839  } /* for(j) */
840  }
841  /* for(t) */
842 } /* nfft_precompute_fg_psi */
843 
844 void X(precompute_psi)(X(plan) *ths)
845 {
846  INT t; /* index over all dimensions */
847  INT lj; /* index 0<=lj<u+o+1 */
848  INT u, o; /* depends on x_j */
849 
850  //sort(ths);
851 
852  for (t = 0; t < ths->d; t++)
853  {
854  INT j;
855 
856  for (j = 0; j < ths->M_total; j++)
857  {
858  uo(ths, j, &u, &o, t);
859 
860  for(lj = 0; lj < (2 * ths->m + 2); lj++)
861  ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj] =
862  (PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + (t)]) - ((R)(lj + u)) / (K(2.0) * ((R)NN(ths->n[t])))), t));
863  } /* for (j) */
864  } /* for (t) */
865 } /* precompute_psi */
866 
867 void X(precompute_full_psi)(X(plan) *ths)
868 {
869 //#ifdef _OPENMP
870 // sort(ths);
871 //
872 // nfft_precompute_full_psi_omp(ths);
873 //#else
874  INT t, t2; /* index over all dimensions */
875  INT j; /* index over all nodes */
876  INT l_L; /* plain index 0 <= l_L < lprod */
877  INT l[ths->d]; /* multi index u<=l<=o */
878  INT lj[ths->d]; /* multi index 0<=lj<u+o+1 */
879  INT ll_plain[ths->d+1]; /* postfix plain index */
880  INT lprod; /* 'bandwidth' of matrix B */
881  INT u[ths->d], o[ths->d]; /* depends on x_j */
882  INT count_lg[ths->d];
883  INT lg_offset[ths->d];
884 
885  R phi_prod[ths->d+1];
886 
887  INT ix, ix_old;
888 
889  //sort(ths);
890 
891  phi_prod[0] = K(1.0);
892  ll_plain[0] = 0;
893 
894  for (t = 0, lprod = 1; t < ths->d; t++)
895  lprod *= 2 * ths->m + 2;
896 
897  for (j = 0, ix = 0, ix_old = 0; j < ths->M_total; j++)
898  {
899  MACRO_init_uo_l_lj_t;
900 
901  for (l_L = 0; l_L < lprod; l_L++, ix++)
902  {
903  MACRO_update_phi_prod_ll_plain(A, without_PRE_PSI);
904 
905  ths->psi_index_g[ix] = ll_plain[ths->d];
906  ths->psi[ix] = phi_prod[ths->d];
907 
908  MACRO_count_uo_l_lj_t;
909  } /* for (l_L) */
910 
911  ths->psi_index_f[j] = ix - ix_old;
912  ix_old = ix;
913  } /* for(j) */
914 //#endif
915 }
916 
917 void X(precompute_one_psi)(X(plan) *ths)
918 {
919  if(ths->flags & PRE_PSI)
920  X(precompute_psi)(ths);
921  if(ths->flags & PRE_FULL_PSI)
922  X(precompute_full_psi)(ths);
923  if(ths->flags & PRE_FG_PSI)
924  X(precompute_fg_psi)(ths);
925  if(ths->flags & PRE_LIN_PSI)
926  X(precompute_lin_psi)(ths);
927 }
928 
929 static inline void init_help(X(plan) *ths)
930 {
931  INT t; /* index over all dimensions */
932  INT lprod; /* 'bandwidth' of matrix B */
933 
934  if (ths->flags & NFFT_OMP_BLOCKWISE_ADJOINT)
935  ths->flags |= NFFT_SORT_NODES;
936 
937  ths->N_total = intprod(ths->N, OFFSET, ths->d);
938  ths->n_total = intprod(ths->n, 0, ths->d);
939 
940  ths->sigma = (R*)Y(malloc)((size_t)(ths->d) * sizeof(R));
941 
942  for (t = 0; t < ths->d; t++)
943  ths->sigma[t] = ((R)NN(ths->n[t])) / ths->N[t];
944 
945  /* Assign r2r transform kinds for each dimension */
946  ths->r2r_kind = (FFTW(r2r_kind)*)Y(malloc)((size_t)(ths->d) * sizeof (FFTW(r2r_kind)));
947  for (t = 0; t < ths->d; t++)
948  ths->r2r_kind[t] = FOURIER_TRAFO;
949 
950  WINDOW_HELP_INIT;
951 
952  if (ths->flags & MALLOC_X)
953  ths->x = (R*)Y(malloc)((size_t)(ths->d * ths->M_total) * sizeof(R));
954 
955  if (ths->flags & MALLOC_F_HAT)
956  ths->f_hat = (R*)Y(malloc)((size_t)(ths->N_total) * sizeof(R));
957 
958  if (ths->flags & MALLOC_F)
959  ths->f = (R*)Y(malloc)((size_t)(ths->M_total) * sizeof(R));
960 
961  if (ths->flags & PRE_PHI_HUT)
962  precompute_phi_hut(ths);
963 
964  if(ths->flags & PRE_LIN_PSI)
965  {
966  ths->K = (1U<< 10) * (ths->m+2);
967  ths->psi = (R*) Y(malloc)((size_t)((ths->K + 1) * ths->d) * sizeof(R));
968  }
969 
970  if(ths->flags & PRE_FG_PSI)
971  ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * 2) * sizeof(R));
972 
973  if (ths->flags & PRE_PSI)
974  ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * (2 * ths->m + 2 )) * sizeof(R));
975 
976  if(ths->flags & PRE_FULL_PSI)
977  {
978  for (t = 0, lprod = 1; t < ths->d; t++)
979  lprod *= 2 * ths->m + 2;
980 
981  ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(R));
982 
983  ths->psi_index_f = (INT*) Y(malloc)((size_t)(ths->M_total) * sizeof(INT));
984  ths->psi_index_g = (INT*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(INT));
985  }
986 
987  if (ths->flags & FFTW_INIT)
988  {
989  ths->g1 = (R*)Y(malloc)((size_t)(ths->n_total) * sizeof(R));
990 
991  if (ths->flags & FFT_OUT_OF_PLACE)
992  ths->g2 = (R*) Y(malloc)((size_t)(ths->n_total) * sizeof(R));
993  else
994  ths->g2 = ths->g1;
995 
996  {
997  int *_n = Y(malloc)((size_t)(ths->d) * sizeof(int));
998 
999  for (t = 0; t < ths->d; t++)
1000  _n[t] = (int)(ths->n[t]);
1001 
1002  ths->my_fftw_r2r_plan = FFTW(plan_r2r)((int)ths->d, _n, ths->g1, ths->g2, ths->r2r_kind, ths->fftw_flags);
1003  Y(free)(_n);
1004  }
1005  }
1006 
1007 // if(ths->flags & NFFT_SORT_NODES)
1008 // ths->index_x = (INT*) Y(malloc)(sizeof(INT)*2*ths->M_total);
1009 // else
1010 // ths->index_x = NULL;
1011 
1012  ths->mv_trafo = (void (*) (void* ))X(trafo);
1013  ths->mv_adjoint = (void (*) (void* ))X(adjoint);
1014 }
1015 
1016 void X(init)(X(plan) *ths, int d, int *N, int M_total)
1017 {
1018  int t; /* index over all dimensions */
1019 
1020  ths->d = (INT)d;
1021 
1022  ths->N = (INT*) Y(malloc)((size_t)(d) * sizeof(INT));
1023 
1024  for (t = 0; t < d; t++)
1025  ths->N[t] = (INT)N[t];
1026 
1027  ths->M_total = (INT)M_total;
1028 
1029  ths->n = (INT*) Y(malloc)((size_t)(d) * sizeof(INT));
1030 
1031  for (t = 0; t < d; t++)
1032  ths->n[t] = 2 * (Y(next_power_of_2)(ths->N[t]) - 1) + OFFSET;
1033 
1034  ths->m = WINDOW_HELP_ESTIMATE_m;
1035 
1036  if (d > 1)
1037  {
1038 //#ifdef _OPENMP
1039 // ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1040 // FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES |
1041 // NFFT_OMP_BLOCKWISE_ADJOINT;
1042 //#else
1043  ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1044  FFTW_INIT | FFT_OUT_OF_PLACE | NFFT_SORT_NODES;
1045 //#endif
1046  }
1047  else
1048  ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1049  FFTW_INIT | FFT_OUT_OF_PLACE;
1050 
1051  ths->fftw_flags = FFTW_ESTIMATE | FFTW_DESTROY_INPUT;
1052 
1053  init_help(ths);
1054 }
1055 
1056 void X(init_guru)(X(plan) *ths, int d, int *N, int M_total, int *n, int m,
1057  unsigned flags, unsigned fftw_flags)
1058 {
1059  INT t; /* index over all dimensions */
1060 
1061  ths->d = (INT)d;
1062  ths->M_total = (INT)M_total;
1063  ths->N = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT));
1064 
1065  for (t = 0; t < d; t++)
1066  ths->N[t] = (INT)N[t];
1067 
1068  ths->n = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT));
1069 
1070  for (t = 0; t < d; t++)
1071  ths->n[t] = (INT)n[t];
1072 
1073  ths->m = (INT)m;
1074 
1075  ths->flags = flags;
1076  ths->fftw_flags = fftw_flags;
1077 
1078  init_help(ths);
1079 }
1080 
1081 void X(init_1d)(X(plan) *ths, int N1, int M_total)
1082 {
1083  int N[1];
1084 
1085  N[0] = N1;
1086 
1087  X(init)(ths, 1, N, M_total);
1088 }
1089 
1090 void X(init_2d)(X(plan) *ths, int N1, int N2, int M_total)
1091 {
1092  int N[2];
1093 
1094  N[0] = N1;
1095  N[1] = N2;
1096 
1097  X(init)(ths, 2, N, M_total);
1098 }
1099 
1100 void X(init_3d)(X(plan) *ths, int N1, int N2, int N3, int M_total)
1101 {
1102  int N[3];
1103 
1104  N[0] = N1;
1105  N[1] = N2;
1106  N[2] = N3;
1107 
1108  X(init)(ths, 3, N, M_total);
1109 }
1110 
1111 const char* X(check)(X(plan) *ths)
1112 {
1113  INT j;
1114 
1115  if (!ths->f)
1116  return "Member f not initialized.";
1117 
1118  if (!ths->x)
1119  return "Member x not initialized.";
1120 
1121  if (!ths->f_hat)
1122  return "Member f_hat not initialized.";
1123 
1124  for (j = 0; j < ths->M_total * ths->d; j++)
1125  {
1126  if ((ths->x[j] < K(0.0)) || (ths->x[j] >= K(0.5)))
1127  {
1128  return "ths->x out of range [0.0,0.5)";
1129  }
1130  }
1131 
1132  for (j = 0; j < ths->d; j++)
1133  {
1134  if (ths->sigma[j] <= 1)
1135  return "Oversampling factor too small";
1136 
1137  if(ths->N[j] - 1 <= ths->m)
1138  return "Polynomial degree N is smaller than cut-off m";
1139 
1140  if(ths->N[j]%2 == 1)
1141  return "polynomial degree N has to be even";
1142  }
1143  return 0;
1144 }
1145 
1146 void X(finalize)(X(plan) *ths)
1147 {
1148  INT t; /* index over dimensions */
1149 
1150 // if(ths->flags & NFFT_SORT_NODES)
1151 // Y(free)(ths->index_x);
1152 
1153  if (ths->flags & FFTW_INIT)
1154  {
1155 #ifdef _OPENMP
1156  #pragma omp critical (nfft_omp_critical_fftw_plan)
1157 #endif
1158  FFTW(destroy_plan)(ths->my_fftw_r2r_plan);
1159 
1160  if (ths->flags & FFT_OUT_OF_PLACE)
1161  Y(free)(ths->g2);
1162 
1163  Y(free)(ths->g1);
1164  }
1165 
1166  if(ths->flags & PRE_FULL_PSI)
1167  {
1168  Y(free)(ths->psi_index_g);
1169  Y(free)(ths->psi_index_f);
1170  Y(free)(ths->psi);
1171  }
1172 
1173  if (ths->flags & PRE_PSI)
1174  Y(free)(ths->psi);
1175 
1176  if(ths->flags & PRE_FG_PSI)
1177  Y(free)(ths->psi);
1178 
1179  if(ths->flags & PRE_LIN_PSI)
1180  Y(free)(ths->psi);
1181 
1182  if (ths->flags & PRE_PHI_HUT)
1183  {
1184  for (t = 0; t < ths->d; t++)
1185  Y(free)(ths->c_phi_inv[t]);
1186  Y(free)(ths->c_phi_inv);
1187  }
1188 
1189  if (ths->flags & MALLOC_F)
1190  Y(free)(ths->f);
1191 
1192  if(ths->flags & MALLOC_F_HAT)
1193  Y(free)(ths->f_hat);
1194 
1195  if (ths->flags & MALLOC_X)
1196  Y(free)(ths->x);
1197 
1198  WINDOW_HELP_FINALIZE;
1199 
1200  Y(free)(ths->N);
1201  Y(free)(ths->n);
1202  Y(free)(ths->sigma);
1203 
1204  Y(free)(ths->r2r_kind);
1205 } /* finalize */
#define TIC(a)
Timing, method works since the inaccurate timer is updated mostly in the measured function...
Definition: infft.h:1415
#define X(name)
Include header for C99 complex datatype.
Definition: fastsum.h:53