32 #define NFFT_PRECISION_DOUBLE
44 #define DGT_PRE_CEXP (1U<< 0)
55 #define FGT_NDFT (1U<< 1)
65 #define FGT_APPROX_B (1U<< 2)
107 for (j = 0; j < ths->
M; j++)
108 ths->
f[j] = NFFT_K(0.0);
111 for (j = 0, l = 0; j < ths->
M; j++)
112 for (k = 0; k < ths->
N; k++, l++)
115 for (j = 0; j < ths->
M; j++)
116 for (k = 0; k < ths->
N; k++)
117 ths->
f[j] += ths->
alpha[k]
119 -ths->
sigma * (ths->
y[j] - ths->
x[k])
120 * (ths->
y[j] - ths->
x[k]));
136 NFFT(adjoint_direct)(ths->nplan1);
138 for (l = 0; l < ths->
n; l++)
139 ths->nplan1->f_hat[l] *= ths->
b[l];
141 NFFT(trafo_direct)(ths->nplan2);
145 NFFT(adjoint)(ths->nplan1);
147 for (l = 0; l < ths->
n; l++)
148 ths->nplan1->f_hat[l] *= ths->
b[l];
150 NFFT(trafo)(ths->nplan2);
179 ths->
x = (NFFT_R*) NFFT(malloc)((size_t)(ths->
N) *
sizeof(NFFT_R));
180 ths->
y = (NFFT_R*) NFFT(malloc)((size_t)(ths->
M) *
sizeof(NFFT_R));
181 ths->
alpha = (NFFT_C*) NFFT(malloc)((size_t)(ths->
N) *
sizeof(NFFT_C));
182 ths->
f = (NFFT_C*) NFFT(malloc)((size_t)(ths->
M) *
sizeof(NFFT_C));
187 ths->
b = (NFFT_C*) NFFT(malloc)((size_t)(ths->
n) *
sizeof(NFFT_C));
189 ths->nplan1 = (NFFT(plan)*) NFFT(malloc)(
sizeof(NFFT(plan)));
190 ths->nplan2 = (NFFT(plan)*) NFFT(malloc)(
sizeof(NFFT(plan)));
192 n_fftw = (int)NFFT(next_power_of_2)(2 * ths->
n);
195 NFFT(init_guru)(ths->nplan1, 1, &(ths->
n), ths->
N, &n_fftw, m, PRE_PHI_HUT |
196 PRE_PSI | MALLOC_X | MALLOC_F_HAT | FFTW_INIT, FFTW_MEASURE);
197 NFFT(init_guru)(ths->nplan2, 1, &(ths->
n), ths->
M, &n_fftw, m, PRE_PHI_HUT |
198 PRE_PSI | MALLOC_X | FFTW_INIT, FFTW_MEASURE);
201 ths->nplan1->
f = ths->
alpha;
202 ths->nplan2->f_hat = ths->nplan1->f_hat;
203 ths->nplan2->
f = ths->
f;
207 fplan = FFTW(plan_dft_1d)(ths->
n, ths->
b, ths->
b, FFTW_FORWARD,
210 for (j = 0; j < ths->
n; j++)
211 ths->
b[j] = NFFT_M(cexp)(
212 -ths->
p * ths->
p * ths->
sigma * ((NFFT_R)(j) - (NFFT_R)(ths->
n) / NFFT_K(2.0)) * ((NFFT_R)(j) - (NFFT_R)(ths->
n) / NFFT_K(2.0))
213 / ((NFFT_R) (ths->
n * ths->
n))) / ((NFFT_R)(ths->
n));
215 NFFT(fftshift_complex_int)(ths->
b, 1, &ths->
n);
216 FFTW(execute)(fplan);
217 NFFT(fftshift_complex_int)(ths->
b, 1, &ths->
n);
219 FFTW(destroy_plan)(fplan);
223 for (j = 0; j < ths->
n; j++)
224 ths->
b[j] = NFFT_K(1.0) / ths->
p * NFFT_M(csqrt)(NFFT_KPI / ths->
sigma)
226 -NFFT_KPI * NFFT_KPI * ((NFFT_R)(j) - (NFFT_R)(ths->
n) / NFFT_K(2.0)) * ((NFFT_R)(j) - (NFFT_R)(ths->
n) / NFFT_K(2.0))
227 / (ths->
p * ths->
p * ths->
sigma));
247 p = NFFT_K(0.5) + NFFT_M(sqrt)(-NFFT_M(log)(eps) / NFFT_M(creal)(sigma));
252 n = (int)(2 * (NFFT_M(lrint)(NFFT_M(ceil)(p * NFFT_M(cabs)(sigma) / NFFT_KPI * NFFT_M(sqrt)(-NFFT_M(log)(eps) / NFFT_M(creal)(sigma))))));
269 ths->
pre_cexp = (NFFT_C*) NFFT(malloc)((size_t)(ths->
M * ths->
N) *
sizeof(NFFT_C));
271 for (j = 0, l = 0; j < ths->
M; j++)
272 for (k = 0; k < ths->
N; k++, l++)
274 -ths->
sigma * (ths->
y[j] - ths->
x[k]) * (ths->
y[j] - ths->
x[k]));
277 for (j = 0; j < ths->nplan1->M_total; j++)
278 ths->nplan1->
x[j] = ths->
x[j] / ths->
p;
279 for (j = 0; j < ths->nplan2->M_total; j++)
280 ths->nplan2->
x[j] = ths->
y[j] / ths->
p;
282 if (ths->nplan1->
flags & PRE_PSI)
283 NFFT(precompute_psi)(ths->nplan1);
284 if (ths->nplan2->
flags & PRE_PSI)
285 NFFT(precompute_psi)(ths->nplan2);
296 NFFT(finalize)(ths->nplan2);
297 NFFT(finalize)(ths->nplan1);
299 NFFT(free)(ths->nplan2);
300 NFFT(free)(ths->nplan1);
307 NFFT(free)(ths->
alpha);
321 for (k = 0; k < ths->
N; k++)
322 ths->
x[k] = NFFT(drand48)() / NFFT_K(2.0) - NFFT_K(1.0) / NFFT_K(4.0);
324 for (j = 0; j < ths->
M; j++)
325 ths->
y[j] = NFFT(drand48)() / NFFT_K(2.0) - NFFT_K(1.0) / NFFT_K(4.0);
327 for (k = 0; k < ths->
N; k++)
328 ths->
alpha[k] = NFFT(drand48)() - NFFT_K(1.0) / NFFT_K(2.0)
329 + _Complex_I * (NFFT(drand48)() - NFFT_K(1.0) / NFFT_K(2.0));
343 NFFT_R t0, t1, time_diff;
345 NFFT_R tau = NFFT_K(0.01);
352 t0 = NFFT(clock_gettime_seconds)();
357 t1 = NFFT(clock_gettime_seconds)();
361 t_out /= (NFFT_R)(r);
380 fgt_init(&my_plan, N, M, sigma, eps);
381 swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(my_plan.
M) *
sizeof(NFFT_C));
386 NFFT_CSWAP(swap_dgt, my_plan.
f);
388 NFFT(vpr_complex)(my_plan.
f, my_plan.
M,
"discrete gauss transform");
389 NFFT_CSWAP(swap_dgt, my_plan.
f);
392 NFFT(vpr_complex)(my_plan.
f, my_plan.
M,
"fast gauss transform");
394 printf(
"\n relative error: %1.3" NFFT__FES__
"\n",
395 NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.
f, my_plan.
M,
396 my_plan.
alpha, my_plan.
N));
398 NFFT(free)(swap_dgt);
417 NFFT_C sigma = NFFT_K(4.0) * (NFFT_K(138.0) + _Complex_I * NFFT_K(100.0));
419 int N_dgt_pre_exp = (int) (1U << 11);
420 int N_dgt = (int) (1U << 19);
422 printf(
"n=%d, sigma=%1.3" NFFT__FES__
"+i%1.3" NFFT__FES__
"\n", n, NFFT_M(creal)(sigma), NFFT_M(cimag)(sigma));
424 for (N = ((NFFT_INT) (1U << 6)); N < ((NFFT_INT) (1U << 22)); N = N << 1)
426 printf(
"$%d$\t & ", N);
430 swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(my_plan.
M) *
sizeof(NFFT_C));
438 NFFT_CSWAP(swap_dgt, my_plan.
f);
439 if (N < N_dgt_pre_exp)
443 if (N < N_dgt_pre_exp)
445 NFFT_CSWAP(swap_dgt, my_plan.
f);
450 if (N < N_dgt_pre_exp)
461 printf(
"$%1.1" NFFT__FES__
"$\t \\\\ \n",
462 NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.
f, my_plan.
M, my_plan.
alpha, my_plan.
N));
465 NFFT(free)(swap_dgt);
483 NFFT_C sigma = NFFT_K(4.0) * (NFFT_K(138.0) + _Complex_I * NFFT_K(100.0));
488 printf(
"N=%d;\tM=%d;\nsigma=%1.3" NFFT__FES__
"+i*%1.3" NFFT__FES__
";\n", N, M, NFFT_M(creal)(sigma),
489 NFFT_M(cimag)(sigma));
492 swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(M) *
sizeof(NFFT_C));
494 for (n = 8; n <= 128; n += 4)
497 for (mi = 0; mi < 2; mi++)
503 NFFT_CSWAP(swap_dgt, my_plan.
f);
505 NFFT_CSWAP(swap_dgt, my_plan.
f);
509 printf(
"%1.3" NFFT__FES__
"\t",
510 NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.
f, my_plan.
M, my_plan.
alpha, my_plan.
N));
520 NFFT(free)(swap_dgt);
535 NFFT_C sigma = NFFT_K(20.0) + NFFT_K(40.0) * _Complex_I;
538 NFFT_R p[3] = {NFFT_K(1.0), NFFT_K(1.5), NFFT_K(2.0)};
540 printf(
"N=%d;\tM=%d;\nsigma=%1.3" NFFT__FES__
"+i*%1.3" NFFT__FES__
";\n", N, M, NFFT_M(creal)(sigma),
541 NFFT_M(cimag)(sigma));
544 swap_dgt = (NFFT_C*) NFFT(malloc)((size_t)(M) *
sizeof(NFFT_C));
546 for (n = 8; n <= 128; n += 4)
549 for (pi = 0; pi < 3; pi++)
555 NFFT_CSWAP(swap_dgt, my_plan.
f);
557 NFFT_CSWAP(swap_dgt, my_plan.
f);
561 printf(
"%1.3" NFFT__FES__
"\t",
562 NFFT(error_l_infty_1_complex)(swap_dgt, my_plan.
f, my_plan.
M, my_plan.
alpha, my_plan.
N));
578 int main(
int argc,
char **argv)
582 fprintf(stderr,
"fastgauss type\n");
583 fprintf(stderr,
" type\n");
584 fprintf(stderr,
" 0 - Simple test.\n");
585 fprintf(stderr,
" 1 - Compares accuracy and execution time.\n");
586 fprintf(stderr,
" Pipe to output_andersson.tex\n");
587 fprintf(stderr,
" 2 - Compares accuracy.\n");
588 fprintf(stderr,
" Pipe to output_error.m\n");
589 fprintf(stderr,
" 3 - Compares accuracy.\n");
590 fprintf(stderr,
" Pipe to output_error_p.m\n");
594 if (atoi(argv[1]) == 0)
595 fgt_test_simple(10, 10, NFFT_K(5.0) + NFFT_K(3.0) * _Complex_I, NFFT_K(0.001));
597 if (atoi(argv[1]) == 1)
600 if (atoi(argv[1]) == 2)
603 if (atoi(argv[1]) == 3)
static void fgt_test_andersson(void)
Compares accuracy and execution time of the fast Gauss transform with increasing expansion degree...
static void fgt_init_guru(fgt_plan *ths, int N, int M, NFFT_C sigma, int n, NFFT_R p, int m, unsigned flags)
Initialisation of a transform plan, guru.
unsigned flags
flags for precomputation and approximation type
#define FGT_APPROX_B
If this flag is set, the discrete Fourier coefficients of the uniformly sampled Gaussian are used ins...
NFFT_R p
period, at least 1
NFFT_C * f
target evaluations
#define DGT_PRE_CEXP
If this flag is set, the whole matrix is precomputed and stored for the discrete Gauss transfrom...
static void fgt_test_init_rand(fgt_plan *ths)
Random initialisation of a fgt plan.
int N
number of source nodes
static void fgt_finalize(fgt_plan *ths)
Destroys the transform plan.
Structure for the Gauss transform.
int M
number of target nodes
NFFT_C sigma
parameter of the Gaussian
NFFT_R * y
target nodes in
NFFT_C * b
expansion coefficients
static void fgt_init_node_dependent(fgt_plan *ths)
Initialisation of a transform plan, depends on source and target nodes.
static void fgt_trafo(fgt_plan *ths)
Executes the fast Gauss transform.
static void fgt_test_error_p(void)
Compares accuracy of the fast Gauss transform with increasing expansion degree and different periodis...
static NFFT_R fgt_test_measure_time(fgt_plan *ths, unsigned dgt)
Compares execution times for the fast and discrete Gauss transform.
#define FGT_NDFT
If this flag is set, the fast Gauss transform uses the discrete instead of the fast Fourier transform...
NFFT_C * alpha
source coefficients
NFFT_R * x
source nodes in
NFFT_C * pre_cexp
precomputed values for dgt
static void fgt_test_simple(int N, int M, NFFT_C sigma, NFFT_R eps)
Simple example that computes fast and discrete Gauss transforms.
static void fgt_test_error(void)
Compares accuracy of the fast Gauss transform with increasing expansion degree.
static void dgt_trafo(fgt_plan *ths)
Executes the discrete Gauss transform.
static void fgt_init(fgt_plan *ths, int N, int M, NFFT_C sigma, NFFT_R eps)
Initialisation of a transform plan, simple.