NFFT  3.3.0
fastsum_benchomp.c
1 /*
2  * Copyright (c) 2002, 2015 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <unistd.h>
22 
23 #include "config.h"
24 
25 #include "nfft3.h"
26 #include "infft.h"
27 
28 #define NREPEAT 5
29 
30 static FILE* file_out_tex = NULL;
31 
32 int get_nthreads_array(int **arr)
33 {
34  int max_threads = NFFT(get_num_threads)();
35  int alloc_num = 2;
36  int k;
37  int ret_number = 0;
38  int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
39 
40  if (max_threads <= 5)
41  {
42  *arr = (int*) NFFT(malloc)((size_t) (max_threads) * sizeof(int));
43  for (k = 0; k < max_threads; k++)
44  *(*arr + k) = k + 1;
45  return max_threads;
46  }
47 
48  for (k = 1; k <= max_threads; k *= 2, alloc_num++)
49  ;
50 
51  *arr = (int*) NFFT(malloc)((size_t)(alloc_num) * sizeof(int));
52 
53  for (k = 1; k <= max_threads; k *= 2)
54  {
55  if (k != max_threads && 2 * k > max_threads && max_threads_pw2)
56  {
57  *(*arr + ret_number) = max_threads / 2;
58  ret_number++;
59  }
60 
61  *(*arr + ret_number) = k;
62  ret_number++;
63 
64  if (k != max_threads && 2 * k > max_threads)
65  {
66  *(*arr + ret_number) = max_threads;
67  ret_number++;
68  break;
69  }
70  }
71 
72  return ret_number;
73 }
74 
75 void check_result_value(const int val, const int ok, const char *msg)
76 {
77  if (val != ok)
78  {
79  fprintf(stderr, "ERROR %s: %d not %d\n", msg, val, ok);
80 
81  exit(EXIT_FAILURE);
82  }
83 }
84 
85 void run_test_create(int d, int L, int M)
86 {
87  char cmd[1025];
88 
89  snprintf(cmd, 1024,
90  "./fastsum_benchomp_createdataset %d %d %d > fastsum_benchomp_test.data",
91  d, L, M);
92  fprintf(stderr, "%s\n", cmd);
93  check_result_value(system(cmd), 0, "createdataset");
94 }
95 
96 void run_test_init_output()
97 {
98  FILE *f = fopen("fastsum_benchomp_test.result", "w");
99  if (f != NULL)
100  fclose(f);
101 }
102 
103 typedef struct
104 {
105  int d;
106  int L;
107  int M;
108  int n;
109  int m;
110  int p;
111  char *kernel_name;
112  R c;
113  R eps_I;
114  R eps_B;
115 } s_param;
116 
117 typedef struct
118 {
119  R avg;
120  R min;
121  R max;
122 } s_resval;
123 
124 typedef struct
125 {
126  int nthreads;
127  s_resval resval[16];
128 } s_result;
129 
130 typedef struct
131 {
132  s_param param;
133  s_result *results;
134  int nresults;
135 } s_testset;
136 
137 void run_test(s_resval *res, int nrepeat, int n, int m, int p,
138  char *kernel_name, R c, R eps_I, R eps_B, int nthreads)
139 {
140  char cmd[1025];
141  int r, t;
142 
143  for (t = 0; t < 16; t++)
144  {
145  res[t].avg = K(0.0);
146  res[t].min = K(1.0) / K(0.0);
147  res[t].max = K(0.0);
148  }
149 
150  if (nthreads < 2)
151  snprintf(cmd, 1024,
152  "./fastsum_benchomp_detail_single %d %d %d %s " __FR__ " " __FR__ " " __FR__ " < fastsum_benchomp_test.data > fastsum_benchomp_test.out",
153  n, m, p, kernel_name, c, eps_I, eps_B);
154  else
155  snprintf(cmd, 1024,
156  "./fastsum_benchomp_detail_threads %d %d %d %s " __FR__ " " __FR__ " " __FR__ " %d < fastsum_benchomp_test.data > fastsum_benchomp_test.out",
157  n, m, p, kernel_name, c, eps_I, eps_B, nthreads);
158  fprintf(stderr, "%s\n", cmd);
159  check_result_value(system(cmd), 0, cmd);
160 
161  for (r = 0; r < nrepeat; r++)
162  {
163  int retval;
164  R v[16];
165  FILE *f;
166  check_result_value(system(cmd), 0, cmd);
167  f = fopen("fastsum_benchomp_test.out", "r");
168  retval = fscanf(f,
169  "" __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ " " __FR__ "", v,
170  v + 1, v + 2, v + 3, v + 4, v + 5, v + 6, v + 7, v + 8, v + 9, v + 10,
171  v + 11, v + 12, v + 13, v + 14, v + 15);
172  check_result_value(retval, 16, "read fastsum_benchomp_test.out");
173  fclose(f);
174 
175  for (t = 0; t < 16; t++)
176  {
177  res[t].avg += v[t];
178  if (res[t].min > v[t])
179  res[t].min = v[t];
180  if (res[t].max < v[t])
181  res[t].max = v[t];
182  }
183  }
184 
185  for (t = 0; t < 16; t++)
186  res[t].avg /= (R)(nrepeat);
187 
188  fprintf(stderr, "%d %d: ", nthreads, nrepeat);
189  for (t = 0; t < 16; t++)
190  fprintf(stderr, "%.3" __FES__ " %.3" __FES__ " %.3" __FES__ " | ", res[t].avg, res[t].min, res[t].max);
191  fprintf(stderr, "\n");
192 }
193 
194 const char *get_psi_string(int flags)
195 {
196  if (flags & PRE_PSI)
197  return "prepsi";
198  else if (flags & PRE_ONE_PSI)
199  return "unknownPSI";
200 
201  return "nopsi";
202 }
203 const char *get_sort_string(int flags)
204 {
205  if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
206  return "";
207 
208  if (flags & NFFT_SORT_NODES)
209  return "sorted";
210 
211  return "unsorted";
212 }
213 
214 const char *get_adjoint_omp_string(int flags)
215 {
216  if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
217  return "blockwise";
218 
219  return "";
220 }
221 
222 #define MASK_FSUM_D (1U<<0)
223 #define MASK_FSUM_L (1U<<1)
224 #define MASK_FSUM_M (1U<<2)
225 #define MASK_FSUM_MULTIBW (1U<<3)
226 #define MASK_FSUM_WINM (1U<<4)
227 #define MASK_FSUM_P (1U<<5)
228 #define MASK_FSUM_KERNEL (1U<<6)
229 #define MASK_FSUM_EPSI (1U<<7)
230 #define MASK_FSUM_EPSB (1U<<8)
231 
232 unsigned int fastsum_determine_different_parameters(s_testset *testsets,
233  int ntestsets)
234 {
235  int t;
236  unsigned int mask = 0;
237 
238  if (ntestsets < 2)
239  return 0;
240 
241  for (t = 1; t < ntestsets; t++)
242  {
243  if (testsets[t - 1].param.d != testsets[t].param.d)
244  mask |= MASK_FSUM_D;
245  if (testsets[t - 1].param.L != testsets[t].param.L)
246  mask |= MASK_FSUM_L;
247  if (testsets[t - 1].param.M != testsets[t].param.M)
248  mask |= MASK_FSUM_M;
249  if (testsets[t - 1].param.n != testsets[t].param.n)
250  mask |= MASK_FSUM_MULTIBW;
251  if (testsets[t - 1].param.m != testsets[t].param.m)
252  mask |= MASK_FSUM_WINM;
253  if (testsets[t - 1].param.p != testsets[t].param.p)
254  mask |= MASK_FSUM_P;
255  if (strcmp(testsets[t - 1].param.kernel_name, testsets[t].param.kernel_name)
256  != 0)
257  mask |= MASK_FSUM_KERNEL;
258  if (testsets[t - 1].param.eps_I != testsets[t].param.eps_I)
259  mask |= MASK_FSUM_EPSI;
260  if (testsets[t - 1].param.eps_B != testsets[t].param.eps_B)
261  mask |= MASK_FSUM_EPSB;
262  }
263 
264  return mask;
265 }
266 
267 void strEscapeUnderscore(char *dst, char *src, int maxlen)
268 {
269  int i = 0;
270  int len;
271  int offset = 0;
272 
273  while (src[i] != '\0' && len + offset < maxlen - 1)
274  {
275  if (src[i] == '_')
276  len = snprintf(dst + offset, maxlen - offset, "\\_{}");
277  else
278  len = snprintf(dst + offset, maxlen - offset, "%c", src[i]);
279  offset += len;
280  i++;
281  }
282 }
283 
284 void fastsum_get_plot_title_minus_indep(char *outstr, int maxlen,
285  char *hostname, s_param param, unsigned int diff_mask)
286 {
287  unsigned int mask = ~diff_mask;
288  int offset = 0;
289  int len;
290 
291  len = snprintf(outstr, maxlen, "%s", hostname);
292  if (len < 0 || len + offset >= maxlen - 1)
293  return;
294  offset += len;
295 
296  if (mask & MASK_FSUM_D)
297  {
298  len = snprintf(outstr + offset, maxlen - offset, " %dd fastsum", param.d);
299  if (len < 0 || len + offset >= maxlen - 1)
300  return;
301  offset += len;
302  }
303 
304  if ((mask & (MASK_FSUM_L | MASK_FSUM_M)) && param.L == param.M)
305  {
306  len = snprintf(outstr + offset, maxlen - offset, " L=M=%d", param.L);
307  if (len < 0 || len + offset >= maxlen - 1)
308  return;
309  offset += len;
310  }
311  else
312  {
313  if (mask & MASK_FSUM_L)
314  {
315  len = snprintf(outstr + offset, maxlen - offset, " L=%d", param.L);
316  if (len < 0 || len + offset >= maxlen - 1)
317  return;
318  offset += len;
319  }
320 
321  if (mask & MASK_FSUM_M)
322  {
323  len = snprintf(outstr + offset, maxlen - offset, " M=%d", param.M);
324  if (len < 0 || len + offset >= maxlen - 1)
325  return;
326  offset += len;
327  }
328  }
329 
330  if (mask & MASK_FSUM_MULTIBW)
331  {
332  len = snprintf(outstr + offset, maxlen - offset, " n=%d", param.n);
333  if (len < 0 || len + offset >= maxlen - 1)
334  return;
335  offset += len;
336  }
337 
338  if (mask & MASK_FSUM_WINM)
339  {
340  len = snprintf(outstr + offset, maxlen - offset, " m=%d", param.m);
341  if (len < 0 || len + offset >= maxlen - 1)
342  return;
343  offset += len;
344  }
345 
346  if (mask & MASK_FSUM_P)
347  {
348  len = snprintf(outstr + offset, maxlen - offset, " p=%d", param.p);
349  if (len < 0 || len + offset >= maxlen - 1)
350  return;
351  offset += len;
352  }
353 
354  if (mask & MASK_FSUM_KERNEL)
355  {
356  char tmp[maxlen];
357  strEscapeUnderscore(tmp, param.kernel_name, maxlen);
358 
359  len = snprintf(outstr + offset, maxlen - offset, " %s", tmp);
360  if (len < 0 || len + offset >= maxlen - 1)
361  return;
362  offset += len;
363  }
364 
365  if ((mask & (MASK_FSUM_EPSI | MASK_FSUM_EPSB)) && param.eps_I == param.eps_B)
366  {
367  len = snprintf(outstr + offset, maxlen - offset,
368  " $\\varepsilon_\\mathrm{I}$=$\\varepsilon_\\mathrm{B}$=%" __FGS__ "",
369  param.eps_I);
370  if (len < 0 || len + offset >= maxlen - 1)
371  return;
372  offset += len;
373  }
374  else
375  {
376  if (mask & MASK_FSUM_EPSI)
377  {
378  len = snprintf(outstr + offset, maxlen - offset,
379  " $\\varepsilon_\\mathrm{I}$=%" __FGS__ "", param.eps_I);
380  if (len < 0 || len + offset >= maxlen - 1)
381  return;
382  offset += len;
383  }
384 
385  if (mask & MASK_FSUM_EPSB)
386  {
387  len = snprintf(outstr + offset, maxlen - offset,
388  " $\\varepsilon_\\mathrm{B}$=%" __FGS__ "", param.eps_B);
389  if (len < 0 || len + offset >= maxlen - 1)
390  return;
391  offset += len;
392  }
393  }
394 }
395 
396 void nfft_adjoint_print_output_histo_DFBRT(FILE *out, s_testset testset)
397 {
398  int i, size = testset.nresults;
399  char hostname[1025];
400 
401  if (gethostname(hostname, 1024) != 0)
402  strncpy(hostname, "unnamed", 1024);
403 
404  fprintf(out, "\\begin{tikzpicture}\n");
405  fprintf(out, "\\begin{axis}[");
406  fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
407  fprintf(out, "symbolic x coords={");
408  for (i = 0; i < size; i++)
409  if (i > 0)
410  fprintf(out, ",%d", testset.results[i].nthreads);
411  else
412  fprintf(out, "%d", testset.results[i].nthreads);
413 
414  fprintf(out,
415  "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
416  fprintf(out,
417  " title={%s %dd $\\textrm{NFFT}^\\top$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}",
418  hostname, testset.param.d, testset.param.n, testset.param.M,
419  testset.param.m);
420  fprintf(out, " ]\n");
421  fprintf(out, "\\addplot coordinates {");
422  for (i = 0; i < size; i++)
423  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
424  testset.results[i].resval[10].avg);
425  fprintf(out, "};\n");
426 
427  fprintf(out, "\\addplot coordinates {");
428  for (i = 0; i < size; i++)
429  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
430  testset.results[i].resval[11].avg);
431  fprintf(out, "};\n");
432 
433  fprintf(out, "\\addplot coordinates {");
434  for (i = 0; i < size; i++)
435  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
436  testset.results[i].resval[12].avg);
437  fprintf(out, "};\n");
438 
439  fprintf(out, "\\addplot coordinates {");
440  for (i = 0; i < size; i++)
441  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
442  testset.results[i].resval[1].avg);
443  fprintf(out, "};\n");
444 
445  fprintf(out, "\\addplot coordinates {");
446  for (i = 0; i < size; i++)
447  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
448  testset.results[i].resval[4].avg + testset.results[i].resval[1].avg);
449  fprintf(out, "};\n");
450  fprintf(out,
451  "\\legend{D,$\\textrm{F}^\\top$,$\\textrm{B}^\\top$,prepsi,total}\n");
452  fprintf(out, "\\end{axis}\n");
453  fprintf(out, "\\end{tikzpicture}\n");
454  fprintf(out, "\n\n");
455 
456  fflush(out);
457 }
458 
459 void nfft_trafo_print_output_histo_DFBRT(FILE *out, s_testset testset)
460 {
461  int i, size = testset.nresults;
462  char hostname[1025];
463 
464  if (gethostname(hostname, 1024) != 0)
465  strncpy(hostname, "unnamed", 1024);
466 
467  fprintf(out, "\\begin{tikzpicture}\n");
468  fprintf(out, "\\begin{axis}[");
469  fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
470  fprintf(out, "symbolic x coords={");
471  for (i = 0; i < size; i++)
472  if (i > 0)
473  fprintf(out, ",%d", testset.results[i].nthreads);
474  else
475  fprintf(out, "%d", testset.results[i].nthreads);
476 
477  fprintf(out,
478  "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
479  fprintf(out,
480  " title={%s %dd $\\textrm{NFFT}$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}",
481  hostname, testset.param.d, testset.param.n, testset.param.M,
482  testset.param.m);
483  fprintf(out, " ]\n");
484  fprintf(out, "\\addplot coordinates {");
485  for (i = 0; i < size; i++)
486  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
487  testset.results[i].resval[13].avg);
488  fprintf(out, "};\n");
489 
490  fprintf(out, "\\addplot coordinates {");
491  for (i = 0; i < size; i++)
492  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
493  testset.results[i].resval[14].avg);
494  fprintf(out, "};\n");
495 
496  fprintf(out, "\\addplot coordinates {");
497  for (i = 0; i < size; i++)
498  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
499  testset.results[i].resval[15].avg);
500  fprintf(out, "};\n");
501 
502  fprintf(out, "\\addplot coordinates {");
503  for (i = 0; i < size; i++)
504  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
505  testset.results[i].resval[2].avg);
506  fprintf(out, "};\n");
507 
508  fprintf(out, "\\addplot coordinates {");
509  for (i = 0; i < size; i++)
510  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
511  testset.results[i].resval[6].avg + testset.results[i].resval[2].avg);
512  fprintf(out, "};\n");
513  fprintf(out, "\\legend{D,F,B,prepsi,total}\n");
514  fprintf(out, "\\end{axis}\n");
515  fprintf(out, "\\end{tikzpicture}\n");
516  fprintf(out, "\n\n");
517 
518  fflush(out);
519 }
520 
521 void fastsum_print_output_histo_PreRfNfT(FILE *out, s_testset testset)
522 {
523  int i, size = testset.nresults;
524  char hostname[1025];
525  char plottitle[1025];
526 
527  if (gethostname(hostname, 1024) != 0)
528  strncpy(hostname, "unnamed", 1024);
529 
530  fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname, testset.param,
531  0);
532 
533  fprintf(out, "\\begin{tikzpicture}\n");
534  fprintf(out, "\\begin{axis}[");
535  fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
536  fprintf(out, "symbolic x coords={");
537  for (i = 0; i < size; i++)
538  if (i > 0)
539  fprintf(out, ",%d", testset.results[i].nthreads);
540  else
541  fprintf(out, "%d", testset.results[i].nthreads);
542 
543  fprintf(out,
544  "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
545  fprintf(out, " title={%s}", plottitle);
546  fprintf(out, " ]\n");
547  fprintf(out, "\\addplot coordinates {");
548  for (i = 0; i < size; i++)
549  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
550  testset.results[i].resval[1].avg + testset.results[i].resval[2].avg);
551  fprintf(out, "};\n");
552 
553  fprintf(out, "\\addplot coordinates {");
554  for (i = 0; i < size; i++)
555  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
556  testset.results[i].resval[3].avg);
557  fprintf(out, "};\n");
558 
559  fprintf(out, "\\addplot coordinates {");
560  for (i = 0; i < size; i++)
561  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
562  testset.results[i].resval[4].avg + testset.results[i].resval[5].avg
563  + testset.results[i].resval[6].avg);
564  fprintf(out, "};\n");
565 
566  fprintf(out, "\\addplot coordinates {");
567  for (i = 0; i < size; i++)
568  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
569  testset.results[i].resval[7].avg);
570  fprintf(out, "};\n");
571 
572  fprintf(out, "\\addplot coordinates {");
573  for (i = 0; i < size; i++)
574  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
575  testset.results[i].resval[9].avg - testset.results[i].resval[0].avg);
576  fprintf(out, "};\n");
577  fprintf(out,
578  "\\legend{prepsi (step 1b),init nearfield (step 1c),far field (steps 2a-c),nearfield (step 2d),total $-$ step 1a}\n");
579  fprintf(out, "\\end{axis}\n");
580  fprintf(out, "\\end{tikzpicture}\n");
581  fprintf(out, "\n\n");
582 
583  fflush(out);
584 }
585 
586 void fastsum_print_output_speedup_total_minus_indep(FILE *out,
587  s_testset *testsets, int ntestsets)
588 {
589  int i, t;
590  char hostname[1025];
591  char plottitle[1025];
592  unsigned int diff_mask = fastsum_determine_different_parameters(testsets,
593  ntestsets);
594 
595  if (gethostname(hostname, 1024) != 0)
596  strncpy(hostname, "unnamed", 1024);
597 
598  fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname,
599  testsets[0].param, diff_mask | MASK_FSUM_WINM);
600 
601  fprintf(out, "\\begin{tikzpicture}\n");
602  fprintf(out, "\\begin{axis}[");
603  fprintf(out,
604  "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
605  fprintf(out, " title={%s}", plottitle);
606  fprintf(out, " ]\n");
607 
608  for (t = 0; t < ntestsets; t++)
609  {
610  s_testset testset = testsets[t];
611 
612  R tref = K(0.0);
613  for (i = 0; i < testset.nresults; i++)
614  if (testset.results[i].nthreads == 1)
615  tref = testset.results[i].resval[9].avg
616  - testset.results[i].resval[0].avg;
617 
618  fprintf(out, "\\addplot coordinates {");
619  for (i = 0; i < testset.nresults; i++)
620  fprintf(out, "(%d, %.6" __FES__ ") ", testset.results[i].nthreads,
621  tref
622  / (testset.results[i].resval[9].avg
623  - testset.results[i].resval[0].avg));
624  fprintf(out, "};\n");
625 
626  for (i = 0; i < testset.nresults; i++)
627  {
628  fprintf(stderr, "%d:%.3" __FIS__ " ", testset.results[i].nthreads,
629  tref
630  / (testset.results[i].resval[9].avg
631  - testset.results[i].resval[0].avg));
632  }
633  fprintf(stderr, "\n\n");
634  }
635 
636  fprintf(out, "\\legend{{");
637  for (t = 0; t < ntestsets; t++)
638  {
639  char title[256];
640  if (t > 0)
641  fprintf(out, "},{");
642  fastsum_get_plot_title_minus_indep(title, 255, "", testsets[t].param,
643  ~(diff_mask | MASK_FSUM_WINM));
644  fprintf(out, "%s", title);
645  }
646  fprintf(out, "}}\n");
647  fprintf(out, "\\end{axis}\n");
648  fprintf(out, "\\end{tikzpicture}\n");
649  fprintf(out, "\n\n");
650 
651  fflush(out);
652 }
653 
654 void run_testset(s_testset *testset, int d, int L, int M, int n, int m, int p,
655  char *kernel_name, R c, R eps_I, R eps_B,
656  int *nthreads_array, int n_threads_array_size)
657 {
658  int i;
659  testset->param.d = d;
660  testset->param.L = L;
661  testset->param.M = M;
662  testset->param.n = n;
663  testset->param.m = m;
664  testset->param.p = p;
665  testset->param.kernel_name = kernel_name;
666  testset->param.c = c;
667  testset->param.eps_I = eps_I;
668  testset->param.eps_B = eps_B;
669 
670  testset->results = (s_result*) NFFT(malloc)(
671  (size_t)(n_threads_array_size) * sizeof(s_result));
672  testset->nresults = n_threads_array_size;
673 
674  run_test_create(testset->param.d, testset->param.L, testset->param.M);
675  for (i = 0; i < n_threads_array_size; i++)
676  {
677  testset->results[i].nthreads = nthreads_array[i];
678  run_test(testset->results[i].resval, NREPEAT, testset->param.n,
679  testset->param.m, testset->param.p, testset->param.kernel_name,
680  testset->param.c, testset->param.eps_I, testset->param.eps_B,
681  testset->results[i].nthreads);
682  }
683 
684 }
685 
686 void test1(int *nthreads_array, int n_threads_array_size)
687 {
688  s_testset testsets[1];
689 
690 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
691  run_testset(&testsets[0], 3, 100000, 100000, 128, 4, 7, "one_over_x", K(0.0), K(0.03125), K(0.03125), nthreads_array, n_threads_array_size);
692 
693  fastsum_print_output_speedup_total_minus_indep(file_out_tex, testsets, 1);
694 
695  fastsum_print_output_histo_PreRfNfT(file_out_tex, testsets[0]);
696 
697  nfft_adjoint_print_output_histo_DFBRT(file_out_tex, testsets[0]);
698 
699  nfft_trafo_print_output_histo_DFBRT(file_out_tex, testsets[0]);
700 #endif
701 }
702 
703 int main(int argc, char** argv)
704 {
705  int *nthreads_array;
706  int n_threads_array_size = get_nthreads_array(&nthreads_array);
707  int k;
708 
709 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
710  fprintf(stderr, "WARNING: Detailed time measurements are not activated.\n");
711  fprintf(stderr, "Please re-run the configure script with options\n");
712  fprintf(stderr,
713  "--enable-measure-time --enable-measure-time-fftw --enable-openmp\n");
714  fprintf(stderr, "and run \"make clean all\"\n\n");
715 #endif
716 
717  for (k = 0; k < n_threads_array_size; k++)
718  fprintf(stderr, "%d ", nthreads_array[k]);
719  fprintf(stderr, "\n");
720 
721  file_out_tex = fopen("fastsum_benchomp_results_plots.tex", "w");
722 
723  test1(nthreads_array, n_threads_array_size);
724 
725  fclose(file_out_tex);
726 
727  return EXIT_SUCCESS;
728 }
729