NFFT  3.3.0
fastgauss.c
1 /*
2  * Copyright (c) 2002, 2015 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 /* $Id$ */
20 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <math.h>
30 #include <complex.h>
31 
32 #define NFFT_PRECISION_DOUBLE
33 
34 #include "nfft3mp.h"
35 
44 #define DGT_PRE_CEXP (1U<< 0)
45 
55 #define FGT_NDFT (1U<< 1)
56 
65 #define FGT_APPROX_B (1U<< 2)
66 
68 typedef struct
69 {
70  int N;
71  int M;
73  NFFT_C *alpha;
74  NFFT_C *f;
76  unsigned flags;
79  NFFT_C sigma;
81  NFFT_R *x;
82  NFFT_R *y;
84  NFFT_C *pre_cexp;
86  int n;
87  NFFT_R p;
89  NFFT_C *b;
91  NFFT(plan) *nplan1;
92  NFFT(plan) *nplan2;
94 } fgt_plan;
95 
103 static void dgt_trafo(fgt_plan *ths)
104 {
105  NFFT_INT j, k, l;
106 
107  for (j = 0; j < ths->M; j++)
108  ths->f[j] = NFFT_K(0.0);
109 
110  if (ths->flags & DGT_PRE_CEXP)
111  for (j = 0, l = 0; j < ths->M; j++)
112  for (k = 0; k < ths->N; k++, l++)
113  ths->f[j] += ths->alpha[k] * ths->pre_cexp[l];
114  else
115  for (j = 0; j < ths->M; j++)
116  for (k = 0; k < ths->N; k++)
117  ths->f[j] += ths->alpha[k]
118  * NFFT_M(cexp)(
119  -ths->sigma * (ths->y[j] - ths->x[k])
120  * (ths->y[j] - ths->x[k]));
121 }
122 
130 static void fgt_trafo(fgt_plan *ths)
131 {
132  NFFT_INT l;
133 
134  if (ths->flags & FGT_NDFT)
135  {
136  NFFT(adjoint_direct)(ths->nplan1);
137 
138  for (l = 0; l < ths->n; l++)
139  ths->nplan1->f_hat[l] *= ths->b[l];
140 
141  NFFT(trafo_direct)(ths->nplan2);
142  }
143  else
144  {
145  NFFT(adjoint)(ths->nplan1);
146 
147  for (l = 0; l < ths->n; l++)
148  ths->nplan1->f_hat[l] *= ths->b[l];
149 
150  NFFT(trafo)(ths->nplan2);
151  }
152 }
153 
168 static void fgt_init_guru(fgt_plan *ths, int N, int M, NFFT_C sigma, int n, NFFT_R p, int m,
169  unsigned flags)
170 {
171  int j, n_fftw;
172  FFTW(plan) fplan;
173 
174  ths->M = M;
175  ths->N = N;
176  ths->sigma = sigma;
177  ths->flags = flags;
178 
179  ths->x = (NFFT_R*) NFFT(malloc)((size_t)(ths->N) * sizeof(NFFT_R));
180  ths->y = (NFFT_R*) NFFT(malloc)((size_t)(ths->M) * sizeof(NFFT_R));
181  ths->alpha = (NFFT_C*) NFFT(malloc)((size_t)(ths->N) * sizeof(NFFT_C));
182  ths->f = (NFFT_C*) NFFT(malloc)((size_t)(ths->M) * sizeof(NFFT_C));
183 
184  ths->n = n;
185  ths->p = p;
186 
187  ths->b = (NFFT_C*) NFFT(malloc)((size_t)(ths->n) * sizeof(NFFT_C));
188 
189  ths->nplan1 = (NFFT(plan)*) NFFT(malloc)(sizeof(NFFT(plan)));
190  ths->nplan2 = (NFFT(plan)*) NFFT(malloc)(sizeof(NFFT(plan)));
191 
192  n_fftw = (int)NFFT(next_power_of_2)(2 * ths->n);
193 
194  {
195  NFFT(init_guru)(ths->nplan1, 1, &(ths->n), ths->N, &n_fftw, m, PRE_PHI_HUT |
196  PRE_PSI | MALLOC_X | MALLOC_F_HAT | FFTW_INIT, FFTW_MEASURE);
197  NFFT(init_guru)(ths->nplan2, 1, &(ths->n), ths->M, &n_fftw, m, PRE_PHI_HUT |
198  PRE_PSI | MALLOC_X | FFTW_INIT, FFTW_MEASURE);
199  }
200 
201  ths->nplan1->f = ths->alpha;
202  ths->nplan2->f_hat = ths->nplan1->f_hat;
203  ths->nplan2->f = ths->f;
204 
205  if (ths->flags & FGT_APPROX_B)
206  {
207  fplan = FFTW(plan_dft_1d)(ths->n, ths->b, ths->b, FFTW_FORWARD,
208  FFTW_MEASURE);
209 
210  for (j = 0; j < ths->n; j++)
211  ths->b[j] = NFFT_M(cexp)(
212  -ths->p * ths->p * ths->sigma * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0)) * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0))
213  / ((NFFT_R) (ths->n * ths->n))) / ((NFFT_R)(ths->n));
214 
215  NFFT(fftshift_complex_int)(ths->b, 1, &ths->n);
216  FFTW(execute)(fplan);
217  NFFT(fftshift_complex_int)(ths->b, 1, &ths->n);
218 
219  FFTW(destroy_plan)(fplan);
220  }
221  else
222  {
223  for (j = 0; j < ths->n; j++)
224  ths->b[j] = NFFT_K(1.0) / ths->p * NFFT_M(csqrt)(NFFT_KPI / ths->sigma)
225  * NFFT_M(cexp)(
226  -NFFT_KPI * NFFT_KPI * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0)) * ((NFFT_R)(j) - (NFFT_R)(ths->n) / NFFT_K(2.0))
227  / (ths->p * ths->p * ths->sigma));
228  }
229 }
230 
242 static void fgt_init(fgt_plan *ths, int N, int M, NFFT_C sigma, NFFT_R eps)
243 {
244  NFFT_R p;
245  int n;
246 
247  p = NFFT_K(0.5) + NFFT_M(sqrt)(-NFFT_M(log)(eps) / NFFT_M(creal)(sigma));
248 
249  if (p < NFFT_K(1.0))
250  p = NFFT_K(1.0);
251 
252  n = (int)(2 * (NFFT_M(lrint)(NFFT_M(ceil)(p * NFFT_M(cabs)(sigma) / NFFT_KPI * NFFT_M(sqrt)(-NFFT_M(log)(eps) / NFFT_M(creal)(sigma))))));
253 
254  fgt_init_guru(ths, N, M, sigma, n, p, 7, N * M <= ((NFFT_INT) (1U << 20)) ? DGT_PRE_CEXP : 0);
255 }
256 
264 {
265  int j, k, l;
266 
267  if (ths->flags & DGT_PRE_CEXP)
268  {
269  ths->pre_cexp = (NFFT_C*) NFFT(malloc)((size_t)(ths->M * ths->N) * sizeof(NFFT_C));
270 
271  for (j = 0, l = 0; j < ths->M; j++)
272  for (k = 0; k < ths->N; k++, l++)
273  ths->pre_cexp[l] = NFFT_M(cexp)(
274  -ths->sigma * (ths->y[j] - ths->x[k]) * (ths->y[j] - ths->x[k]));
275  }
276 
277  for (j = 0; j < ths->nplan1->M_total; j++)
278  ths->nplan1->x[j] = ths->x[j] / ths->p;
279  for (j = 0; j < ths->nplan2->M_total; j++)
280  ths->nplan2->x[j] = ths->y[j] / ths->p;
281 
282  if (ths->nplan1->flags & PRE_PSI)
283  NFFT(precompute_psi)(ths->nplan1);
284  if (ths->nplan2->flags & PRE_PSI)
285  NFFT(precompute_psi)(ths->nplan2);
286 }
287 
294 static void fgt_finalize(fgt_plan *ths)
295 {
296  NFFT(finalize)(ths->nplan2);
297  NFFT(finalize)(ths->nplan1);
298 
299  NFFT(free)(ths->nplan2);
300  NFFT(free)(ths->nplan1);
301 
302  NFFT(free)(ths->b);
303 
304  NFFT(free)(ths->f);
305  NFFT(free)(ths->y);
306 
307  NFFT(free)(ths->alpha);
308  NFFT(free)(ths->x);
309 }
310 
317 static void fgt_test_init_rand(fgt_plan *ths)
318 {
319  NFFT_INT j, k;
320 
321  for (k = 0; k < ths->N; k++)
322  ths->x[k] = NFFT(drand48)() / NFFT_K(2.0) - NFFT_K(1.0) / NFFT_K(4.0);
323 
324  for (j = 0; j < ths->M; j++)
325  ths->y[j] = NFFT(drand48)() / NFFT_K(2.0) - NFFT_K(1.0) / NFFT_K(4.0);
326 
327  for (k = 0; k < ths->N; k++)
328  ths->alpha[k] = NFFT(drand48)() - NFFT_K(1.0) / NFFT_K(2.0)
329  + _Complex_I * (NFFT(drand48)() - NFFT_K(1.0) / NFFT_K(2.0));
330 }
331 
340 static NFFT_R fgt_test_measure_time(fgt_plan *ths, unsigned dgt)
341 {
342  int r;
343  NFFT_R t0, t1, time_diff;
344  NFFT_R t_out;
345  NFFT_R tau = NFFT_K(0.01);
346 
347  t_out = NFFT_K(0.0);
348  r = 0;
349  while (t_out < tau)
350  {
351  r++;
352  t0 = NFFT(clock_gettime_seconds)();
353  if (dgt)
354  dgt_trafo(ths);
355  else
356  fgt_trafo(ths);
357  t1 = NFFT(clock_gettime_seconds)();
358  time_diff = t1 - t0;
359  t_out += time_diff;
360  }
361  t_out /= (NFFT_R)(r);
362 
363  return t_out;
364 }
365 
375 static void fgt_test_simple(int N, int M, NFFT_C sigma, NFFT_R eps)
376 {
377  fgt_plan my_plan;
378  NFFT_C *swap_dgt;
379 
380  fgt_init(&my_plan, N, M, sigma, eps);
381  swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(my_plan.M) * sizeof(NFFT_C));
382 
383  fgt_test_init_rand(&my_plan);
384  fgt_init_node_dependent(&my_plan);
385 
386  NFFT_CSWAP(swap_dgt, my_plan.f);
387  dgt_trafo(&my_plan);
388  NFFT(vpr_complex)(my_plan.f, my_plan.M, "discrete gauss transform");
389  NFFT_CSWAP(swap_dgt, my_plan.f);
390 
391  fgt_trafo(&my_plan);
392  NFFT(vpr_complex)(my_plan.f, my_plan.M, "fast gauss transform");
393 
394  printf("\n relative error: %1.3" NFFT__FES__ "\n",
395  NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M,
396  my_plan.alpha, my_plan.N));
397 
398  NFFT(free)(swap_dgt);
399  fgt_finalize(&my_plan);
400 }
401 
411 static void fgt_test_andersson(void)
412 {
413  fgt_plan my_plan;
414  NFFT_C *swap_dgt;
415  int N;
416 
417  NFFT_C sigma = NFFT_K(4.0) * (NFFT_K(138.0) + _Complex_I * NFFT_K(100.0));
418  int n = 128;
419  int N_dgt_pre_exp = (int) (1U << 11);
420  int N_dgt = (int) (1U << 19);
421 
422  printf("n=%d, sigma=%1.3" NFFT__FES__ "+i%1.3" NFFT__FES__ "\n", n, NFFT_M(creal)(sigma), NFFT_M(cimag)(sigma));
423 
424  for (N = ((NFFT_INT) (1U << 6)); N < ((NFFT_INT) (1U << 22)); N = N << 1)
425  {
426  printf("$%d$\t & ", N);
427 
428  fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, N < N_dgt_pre_exp ? DGT_PRE_CEXP : 0);
429 
430  swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(my_plan.M) * sizeof(NFFT_C));
431 
432  fgt_test_init_rand(&my_plan);
433 
434  fgt_init_node_dependent(&my_plan);
435 
436  if (N < N_dgt)
437  {
438  NFFT_CSWAP(swap_dgt, my_plan.f);
439  if (N < N_dgt_pre_exp)
440  my_plan.flags ^= DGT_PRE_CEXP;
441 
442  printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 1));
443  if (N < N_dgt_pre_exp)
444  my_plan.flags ^= DGT_PRE_CEXP;
445  NFFT_CSWAP(swap_dgt, my_plan.f);
446  }
447  else
448  printf("\t\t & ");
449 
450  if (N < N_dgt_pre_exp)
451  printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 1));
452  else
453  printf("\t\t & ");
454 
455  my_plan.flags ^= FGT_NDFT;
456  printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 0));
457  my_plan.flags ^= FGT_NDFT;
458 
459  printf("$%1.1" NFFT__FES__ "$\t & ", fgt_test_measure_time(&my_plan, 0));
460 
461  printf("$%1.1" NFFT__FES__ "$\t \\\\ \n",
462  NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));
463  fflush(stdout);
464 
465  NFFT(free)(swap_dgt);
466  fgt_finalize(&my_plan);
467  FFTW(cleanup)();
468  }
469 }
470 
477 static void fgt_test_error(void)
478 {
479  fgt_plan my_plan;
480  NFFT_C *swap_dgt;
481  int n, mi;
482 
483  NFFT_C sigma = NFFT_K(4.0) * (NFFT_K(138.0) + _Complex_I * NFFT_K(100.0));
484  int N = 1000;
485  int M = 1000;
486  int m[2] = { 7, 3 };
487 
488  printf("N=%d;\tM=%d;\nsigma=%1.3" NFFT__FES__ "+i*%1.3" NFFT__FES__ ";\n", N, M, NFFT_M(creal)(sigma),
489  NFFT_M(cimag)(sigma));
490  printf("error=[\n");
491 
492  swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(M) * sizeof(NFFT_C));
493 
494  for (n = 8; n <= 128; n += 4)
495  {
496  printf("%d\t", n);
497  for (mi = 0; mi < 2; mi++)
498  {
499  fgt_init_guru(&my_plan, N, M, sigma, n, 1, m[mi], 0);
500  fgt_test_init_rand(&my_plan);
501  fgt_init_node_dependent(&my_plan);
502 
503  NFFT_CSWAP(swap_dgt, my_plan.f);
504  dgt_trafo(&my_plan);
505  NFFT_CSWAP(swap_dgt, my_plan.f);
506 
507  fgt_trafo(&my_plan);
508 
509  printf("%1.3" NFFT__FES__ "\t",
510  NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));
511  fflush(stdout);
512 
513  fgt_finalize(&my_plan);
514  FFTW(cleanup)();
515  }
516  printf("\n");
517  }
518  printf("];\n");
519 
520  NFFT(free)(swap_dgt);
521 }
522 
529 static void fgt_test_error_p(void)
530 {
531  fgt_plan my_plan;
532  NFFT_C *swap_dgt;
533  int n, pi;
534 
535  NFFT_C sigma = NFFT_K(20.0) + NFFT_K(40.0) * _Complex_I;
536  int N = 1000;
537  int M = 1000;
538  NFFT_R p[3] = {NFFT_K(1.0), NFFT_K(1.5), NFFT_K(2.0)};
539 
540  printf("N=%d;\tM=%d;\nsigma=%1.3" NFFT__FES__ "+i*%1.3" NFFT__FES__ ";\n", N, M, NFFT_M(creal)(sigma),
541  NFFT_M(cimag)(sigma));
542  printf("error=[\n");
543 
544  swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(M) * sizeof(NFFT_C));
545 
546  for (n = 8; n <= 128; n += 4)
547  {
548  printf("%d\t", n);
549  for (pi = 0; pi < 3; pi++)
550  {
551  fgt_init_guru(&my_plan, N, M, sigma, n, p[pi], 7, 0);
552  fgt_test_init_rand(&my_plan);
553  fgt_init_node_dependent(&my_plan);
554 
555  NFFT_CSWAP(swap_dgt, my_plan.f);
556  dgt_trafo(&my_plan);
557  NFFT_CSWAP(swap_dgt, my_plan.f);
558 
559  fgt_trafo(&my_plan);
560 
561  printf("%1.3" NFFT__FES__ "\t",
562  NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));
563  fflush(stdout);
564 
565  fgt_finalize(&my_plan);
566  FFTW(cleanup)();
567  }
568  printf("\n");
569  }
570  printf("];\n");
571 }
572 
578 int main(int argc, char **argv)
579 {
580  if (argc != 2)
581  {
582  fprintf(stderr, "fastgauss type\n");
583  fprintf(stderr, " type\n");
584  fprintf(stderr, " 0 - Simple test.\n");
585  fprintf(stderr, " 1 - Compares accuracy and execution time.\n");
586  fprintf(stderr, " Pipe to output_andersson.tex\n");
587  fprintf(stderr, " 2 - Compares accuracy.\n");
588  fprintf(stderr, " Pipe to output_error.m\n");
589  fprintf(stderr, " 3 - Compares accuracy.\n");
590  fprintf(stderr, " Pipe to output_error_p.m\n");
591  return EXIT_FAILURE;
592  }
593 
594  if (atoi(argv[1]) == 0)
595  fgt_test_simple(10, 10, NFFT_K(5.0) + NFFT_K(3.0) * _Complex_I, NFFT_K(0.001));
596 
597  if (atoi(argv[1]) == 1)
599 
600  if (atoi(argv[1]) == 2)
601  fgt_test_error();
602 
603  if (atoi(argv[1]) == 3)
605 
606  return EXIT_SUCCESS;
607 }
608 /* \} */
static void fgt_test_andersson(void)
Compares accuracy and execution time of the fast Gauss transform with increasing expansion degree...
Definition: fastgauss.c:411
static void fgt_init_guru(fgt_plan *ths, int N, int M, NFFT_C sigma, int n, NFFT_R p, int m, unsigned flags)
Initialisation of a transform plan, guru.
Definition: fastgauss.c:168
unsigned flags
flags for precomputation and approximation type
Definition: fastgauss.c:76
#define FGT_APPROX_B
If this flag is set, the discrete Fourier coefficients of the uniformly sampled Gaussian are used ins...
Definition: fastgauss.c:65
NFFT_R p
period, at least 1
Definition: fastgauss.c:87
NFFT_C * f
target evaluations
Definition: fastgauss.c:74
#define DGT_PRE_CEXP
If this flag is set, the whole matrix is precomputed and stored for the discrete Gauss transfrom...
Definition: fastgauss.c:44
static void fgt_test_init_rand(fgt_plan *ths)
Random initialisation of a fgt plan.
Definition: fastgauss.c:317
int N
number of source nodes
Definition: fastgauss.c:70
static void fgt_finalize(fgt_plan *ths)
Destroys the transform plan.
Definition: fastgauss.c:294
Structure for the Gauss transform.
Definition: fastgauss.c:68
int M
number of target nodes
Definition: fastgauss.c:71
NFFT_C sigma
parameter of the Gaussian
Definition: fastgauss.c:79
NFFT_R * y
target nodes in
Definition: fastgauss.c:82
NFFT_C * b
expansion coefficients
Definition: fastgauss.c:89
static void fgt_init_node_dependent(fgt_plan *ths)
Initialisation of a transform plan, depends on source and target nodes.
Definition: fastgauss.c:263
static void fgt_trafo(fgt_plan *ths)
Executes the fast Gauss transform.
Definition: fastgauss.c:130
static void fgt_test_error_p(void)
Compares accuracy of the fast Gauss transform with increasing expansion degree and different periodis...
Definition: fastgauss.c:529
static NFFT_R fgt_test_measure_time(fgt_plan *ths, unsigned dgt)
Compares execution times for the fast and discrete Gauss transform.
Definition: fastgauss.c:340
#define FGT_NDFT
If this flag is set, the fast Gauss transform uses the discrete instead of the fast Fourier transform...
Definition: fastgauss.c:55
NFFT_C * alpha
source coefficients
Definition: fastgauss.c:73
NFFT_R * x
source nodes in
Definition: fastgauss.c:81
NFFT_C * pre_cexp
precomputed values for dgt
Definition: fastgauss.c:84
static void fgt_test_simple(int N, int M, NFFT_C sigma, NFFT_R eps)
Simple example that computes fast and discrete Gauss transforms.
Definition: fastgauss.c:375
int n
expansion degree
Definition: fastgauss.c:86
static void fgt_test_error(void)
Compares accuracy of the fast Gauss transform with increasing expansion degree.
Definition: fastgauss.c:477
static void dgt_trafo(fgt_plan *ths)
Executes the discrete Gauss transform.
Definition: fastgauss.c:103
static void fgt_init(fgt_plan *ths, int N, int M, NFFT_C sigma, NFFT_R eps)
Initialisation of a transform plan, simple.
Definition: fastgauss.c:242