NFFT  3.3.1
nfsft_benchomp.c
00001 /*
00002  * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 #include <stdio.h>
00019 #include <stdlib.h>
00020 #include <string.h>
00021 #include <unistd.h>
00022 
00023 #include "config.h"
00024 
00025 #include "nfft3.h"
00026 #include "infft.h"
00027 
00028 #define NREPEAT 5
00029 
00030 #if defined(_WIN32) || defined(_WIN64)
00031 const char *CMD_CREATEDATASET = "nfsft_benchomp_createdataset.exe";
00032 const char *CMD_DETAIL_SINGLE = "nfsft_benchomp_detail_single.exe";
00033 const char *CMD_DETAIL_THREADS = "nfsft_benchomp_detail_threads.exe";
00034 #else
00035 const char *CMD_CREATEDATASET = "./nfsft_benchomp_createdataset";
00036 const char *CMD_DETAIL_SINGLE = "./nfsft_benchomp_detail_single";
00037 const char *CMD_DETAIL_THREADS = "./nfsft_benchomp_detail_threads";
00038 #endif
00039 
00040 static FILE* file_out_tex = NULL;
00041 
00042 int get_nthreads_array(int **arr)
00043 {
00044   int max_threads = X(get_num_threads)();
00045   int alloc_num = 2;
00046   int k;
00047   int ret_number = 0;
00048   int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
00049 
00050   if (max_threads <= 5)
00051   {
00052     *arr = (int*) malloc(max_threads*sizeof(int));
00053     for (k = 0; k < max_threads; k++)
00054       *(*arr + k) = k+1;
00055     return max_threads;
00056   }
00057 
00058   for (k = 1; k <= max_threads; k*=2, alloc_num++);
00059 
00060   *arr = (int*) malloc(alloc_num*sizeof(int));
00061 
00062   for (k = 1; k <= max_threads; k*=2)
00063   {
00064     if (k != max_threads && 2*k > max_threads && max_threads_pw2)
00065     {
00066       *(*arr + ret_number) = max_threads/2;
00067       ret_number++;
00068     }
00069 
00070     *(*arr + ret_number) = k;
00071     ret_number++;
00072 
00073     if (k != max_threads && 2*k > max_threads)
00074     {
00075       *(*arr + ret_number) = max_threads;
00076       ret_number++;
00077       break;
00078     }
00079   }
00080 
00081   return ret_number;
00082 } 
00083   
00084 
00085 void check_result_value(const int val, const int ok, const char *msg)
00086 {
00087   if (val != ok)
00088   {
00089     fprintf(stderr, "ERROR %s: %d not %d\n", msg, val, ok);
00090 
00091     exit(1);
00092   }
00093 }
00094 
00095 void run_test_create(int trafo_adjoint, int N, int M)
00096 {
00097   char cmd[1025];
00098 
00099   snprintf(cmd, 1024, "%s %d %d %d > nfsft_benchomp_test.data", CMD_CREATEDATASET, trafo_adjoint, N, M);
00100   fprintf(stderr, "%s\n", cmd);
00101   check_result_value(system(cmd), 0, "createdataset");
00102 }
00103 
00104 void run_test_init_output()
00105 {
00106   FILE *f = fopen("nfsft_benchomp_test.result", "w");
00107   if (f!= NULL)
00108     fclose(f);
00109 }
00110 
00111 typedef struct
00112 {
00113   int trafo_adjoint;
00114   int N;
00115   int M;
00116   int m;
00117   int nfsft_flags;
00118   int psi_flags;
00119 } s_param;
00120 
00121 typedef struct
00122 {
00123   double avg;
00124   double min;
00125   double max;
00126 } s_resval;
00127 
00128 typedef struct
00129 {
00130   int nthreads;
00131   s_resval resval[6];
00132 } s_result;
00133 
00134 typedef struct
00135 {
00136   s_param param;
00137   s_result *results;
00138   int nresults;
00139 } s_testset;
00140 
00141 void run_test(s_resval *res, int nrepeat, int m, int nfsft_flags, int psi_flags, int nthreads)
00142 {
00143   FILE *f;
00144   char cmd[1025];
00145   int r,t;
00146   
00147   for (t = 0; t < 6; t++)
00148   {
00149     res[t].avg = 0.0; res[t].min = 1.0/0.0; res[t].max = 0.0;
00150   }
00151 
00152   if (nthreads < 2)
00153     snprintf(cmd, 1024, "%s %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", CMD_DETAIL_SINGLE, m, nfsft_flags, psi_flags, nrepeat);
00154   else
00155     snprintf(cmd, 1024, "%s %d %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", CMD_DETAIL_THREADS, m, nfsft_flags, psi_flags, nrepeat, nthreads);
00156   fprintf(stderr, "%s\n", cmd);
00157 
00158   check_result_value(system(cmd), 0, cmd);
00159 
00160   f = fopen("nfsft_benchomp_test.out", "r");
00161   for (r = 0; r < nrepeat; r++)
00162   {
00163     int retval;
00164     double v[6];
00165 //    FILE *f;
00166 //    check_result_value(system(cmd), 0, cmd);
00167 //    f = fopen("nfsft_benchomp_test.out", "r");
00168     retval = fscanf(f, "%lg %lg %lg %lg %lg %lg", v, v+1, v+2, v+3, v+4, v+5);
00169     check_result_value(retval, 6, "read nfsft_benchomp_test.out");
00170 //    fclose(f);
00171 //    fprintf(stderr, "%.3e %.3e %.3e %.3e %.3e %.3e\n", v[0], v[1], v[2], v[3], v[4], v[5]);
00172     for (t = 0; t < 6; t++)
00173     {
00174       res[t].avg += v[t];
00175       if (res[t].min > v[t])
00176         res[t].min = v[t];
00177       if (res[t].max < v[t])
00178         res[t].max = v[t];
00179     }
00180   }
00181   fclose(f);
00182 
00183   for (t = 0; t < 6; t++)
00184     res[t].avg /= nrepeat;
00185 
00186   fprintf(stderr, "%d %d: ", nthreads, nrepeat);
00187   for (t = 0; t < 6; t++)
00188     fprintf(stderr, "%.3e %.3e %.3e | ", res[t].avg, res[t].min, res[t].max);
00189   fprintf(stderr, "\n");
00190 }
00191 
00192 const char *get_psi_string(int flags)
00193 {
00194   if (flags & PRE_PSI)
00195     return "prepsi";
00196   else if (flags & PRE_ONE_PSI)
00197     return "unknownPSI";
00198 
00199   return "nopsi";
00200 }
00201 const char *get_sort_string(int flags)
00202 {
00203   if (flags & NFFT_SORT_NODES)
00204     return "sorted";
00205 
00206     return "unsorted";
00207 }
00208 
00209 const char *get_adjoint_omp_string(int flags)
00210 {
00211   if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
00212     return "blockwise";
00213 
00214     return "";
00215 }
00216 
00217 #define MASK_TA (1U<<1)
00218 #define MASK_N (1U<<2)
00219 #define MASK_M (1U<<4)
00220 #define MASK_WINM (1U<<5)
00221 #define MASK_FLAGS_PSI (1U<<6)
00222 #define MASK_FLAGS_SORT (1U<<7)
00223 #define MASK_FLAGS_BW (1U<<8)
00224 #define MASK_FLAGS_FPT (1U<<9)
00225 
00226 unsigned int determine_different_parameters(s_testset *testsets, int ntestsets)
00227 {
00228   int t;
00229   unsigned int mask = 0;
00230 
00231   if (ntestsets < 2)
00232     return 0;
00233 
00234   for (t = 1; t < ntestsets; t++)
00235   {
00236     if (testsets[t-1].param.trafo_adjoint != testsets[t].param.trafo_adjoint)
00237       mask |= MASK_TA;
00238     if (testsets[t-1].param.N != testsets[t].param.N)
00239       mask |= MASK_N;
00240     if (testsets[t-1].param.M != testsets[t].param.M)
00241       mask |= MASK_M;
00242     if (testsets[t-1].param.m != testsets[t].param.m)
00243       mask |= MASK_WINM;
00244     if ((testsets[t-1].param.psi_flags & PRE_ONE_PSI) != (testsets[t].param.psi_flags & PRE_ONE_PSI))
00245       mask |= MASK_FLAGS_PSI;
00246     if ((testsets[t-1].param.psi_flags & NFFT_SORT_NODES) != (testsets[t].param.psi_flags & NFFT_SORT_NODES))
00247       mask |= MASK_FLAGS_SORT;
00248     if ((testsets[t-1].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT) != (testsets[t].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT))
00249       mask |= MASK_FLAGS_BW;
00250     if ((testsets[t-1].param.nfsft_flags & NFSFT_USE_DPT) != (testsets[t].param.nfsft_flags & NFSFT_USE_DPT))
00251       mask |= MASK_FLAGS_FPT;
00252   }
00253 
00254   return mask;
00255 }
00256 
00257 void get_plot_title(char *outstr, int maxlen, char *hostname, s_param param, unsigned int diff_mask)
00258 {
00259   unsigned int mask = ~diff_mask;
00260   int offset = 0;
00261   int len;
00262 
00263   len = snprintf(outstr, maxlen, "%s", hostname);
00264   if (len < 0 || len+offset >= maxlen-1) return;
00265   offset += len;
00266 
00267   if (mask & MASK_TA)
00268   {
00269     len = snprintf(outstr+offset, maxlen-offset, " $\\mathrm{NFSFT}%s$", param.trafo_adjoint==0?"":"^\\top");
00270     if (len < 0 || len+offset >= maxlen-1) return;
00271     offset += len;
00272   }
00273 
00274   if (mask & MASK_N)
00275   {
00276     len = snprintf(outstr+offset, maxlen-offset, " N=%d", param.N);
00277     if (len < 0 || len+offset >= maxlen-1) return;
00278     offset += len;
00279   }
00280 
00281   if (mask & MASK_M)
00282   {
00283     len = snprintf(outstr+offset, maxlen-offset, " M=%d", param.M);
00284     if (len < 0 || len+offset >= maxlen-1) return;
00285     offset += len;
00286   }
00287 
00288   if (mask & MASK_WINM)
00289   {
00290     len = snprintf(outstr+offset, maxlen-offset, " m=%d", param.m);
00291     if (len < 0 || len+offset >= maxlen-1) return;
00292     offset += len;
00293   }
00294 
00295   if (mask & MASK_FLAGS_PSI)
00296   {
00297     len = snprintf(outstr+offset, maxlen-offset, " %s", get_psi_string(param.psi_flags));
00298     if (len < 0 || len+offset >= maxlen-1) return;
00299     offset += len;
00300   }
00301 
00302   if (mask & MASK_FLAGS_SORT)
00303   {
00304     len = snprintf(outstr+offset, maxlen-offset, " %s", get_sort_string(param.psi_flags));
00305     if (len < 0 || len+offset >= maxlen-1) return;
00306     offset += len;
00307   }
00308 
00309   if ((mask & MASK_FLAGS_BW) && strlen(get_adjoint_omp_string(param.psi_flags)) > 0)
00310   {
00311     len = snprintf(outstr+offset, maxlen-offset, " %s", get_adjoint_omp_string(param.psi_flags));
00312     if (len < 0 || len+offset >= maxlen-1) return;
00313     offset += len;
00314   }
00315 
00316   if (mask & MASK_FLAGS_FPT)
00317   {
00318     len = snprintf(outstr+offset, maxlen-offset, param.nfsft_flags & NFSFT_USE_DPT ? " DPT" : "");
00319     if (len < 0 || len+offset >= maxlen-1) return;
00320     offset += len;
00321   }
00322 
00323 }
00324 
00325 void print_output_speedup_total_tref(FILE *out, s_testset *testsets, int ntestsets, int use_tref, double tref)
00326 {
00327   int i, t;
00328   char hostname[1025];
00329   char plottitle[1025];
00330   unsigned int diff_mask = determine_different_parameters(testsets, ntestsets);
00331 
00332 #ifdef HAVE_GETHOSTNAME
00333   if (gethostname(hostname, 1024) != 0)
00334 #endif
00335     strncpy(hostname, "unnamed", 1024);
00336 
00337   get_plot_title(plottitle, 1024, hostname, testsets[0].param, diff_mask);
00338 
00339   fprintf(out, "\\begin{tikzpicture}\n");
00340   fprintf(out, "\\begin{axis}[");
00341   fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
00342   fprintf(out, " title={%s}", plottitle);
00343   fprintf(out, " ]\n");
00344 
00345   for (t = 0; t < ntestsets; t++)
00346   {
00347     s_testset testset = testsets[t];
00348     fprintf(stderr, "%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags));
00349     fprintf(stderr, "\n");
00350 
00351     fprintf(out, "\\addplot coordinates {");
00352     for (i = 0; i < testset.nresults; i++)
00353       if (use_tref == 1)
00354         fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
00355       else
00356         fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg);
00357     fprintf(out, "};\n");
00358 
00359     for (i = 0; i < testset.nresults; i++)
00360       if (use_tref == 1)
00361         fprintf(stderr, "%d:%.3f  ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
00362       else
00363         fprintf(stderr, "%d:%.3f  ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg);
00364     fprintf(stderr, "\n\n");
00365   }
00366 
00367   fprintf(out, "\\legend{{");
00368   for (t = 0; t < ntestsets; t++)
00369   {
00370     char title[256];
00371     if (t > 0)
00372       fprintf(out, "},{");
00373     get_plot_title(title, 255, "", testsets[t].param, ~(diff_mask));
00374     fprintf(out, "%s", title);
00375   }
00376   fprintf(out, "}}\n");
00377   fprintf(out, "\\end{axis}\n");
00378   fprintf(out, "\\end{tikzpicture}\n");
00379   fprintf(out, "\n\n");
00380 
00381   fflush(out);
00382 }
00383 
00384 void print_output_speedup_total(FILE *out, s_testset *testsets, int ntestsets, int use_tref)
00385 {
00386   double tref = 1.0/0.0;
00387   int t, k;
00388 
00389   if (use_tref == 1)
00390     for (t = 0; t < ntestsets; t++)
00391       for (k = 0; k < testsets[t].nresults; k++)
00392         if (testsets[t].results[k].nthreads == 1 && testsets[t].results[k].resval[5].avg < tref)
00393           tref = testsets[t].results[k].resval[5].avg;
00394 
00395   print_output_speedup_total_tref(out, testsets, ntestsets, use_tref, tref);
00396 }
00397 
00398 void print_output_histo_PENRT(FILE *out, s_testset testset)
00399 {
00400   int i, size = testset.nresults;
00401   char hostname[1025];
00402 
00403 #ifdef HAVE_GETHOSTNAME
00404   if (gethostname(hostname, 1024) != 0)
00405 #endif
00406     strncpy(hostname, "unnamed", 1024);
00407 
00408   fprintf(out, "\\begin{tikzpicture}\n");
00409   fprintf(out, "\\begin{axis}[");
00410   fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
00411   fprintf(out, "symbolic x coords={");
00412   for (i = 0; i < size; i++)
00413     if (i > 0)
00414       fprintf(out, ",%d", testset.results[i].nthreads);
00415     else
00416       fprintf(out, "%d", testset.results[i].nthreads);
00417 
00418   fprintf(out, "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
00419   fprintf(out, " title={%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags));
00420   fprintf(out, " ]\n");
00421   fprintf(out, "\\addplot coordinates {");
00422   for (i = 0; i < size; i++)
00423     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg);
00424   fprintf(out, "};\n");
00425 
00426   fprintf(out, "\\addplot coordinates {");
00427   for (i = 0; i < size; i++)
00428     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[2].avg);
00429   fprintf(out, "};\n");
00430 
00431   fprintf(out, "\\addplot coordinates {");
00432   for (i = 0; i < size; i++)
00433     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[3].avg);
00434   fprintf(out, "};\n");
00435 
00436   fprintf(out, "\\addplot coordinates {");
00437   for (i = 0; i < size; i++)
00438     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[0].avg + testset.results[i].resval[4].avg);
00439   fprintf(out, "};\n");
00440 
00441   fprintf(out, "\\addplot coordinates {");
00442   for (i = 0; i < size; i++)
00443     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[5].avg);
00444   fprintf(out, "};\n");
00445   fprintf(out, "\\legend{%s,%s,$\\mathrm{NFFT}%s$,rest,total}\n", testset.param.nfsft_flags & NFSFT_USE_DPT ? "DPT" : "FPT", testset.param.trafo_adjoint==0?"c2e":"$\\mathrm{c2e}^\\top$", testset.param.trafo_adjoint==0?"":"^\\top");
00446   fprintf(out, "\\end{axis}\n");
00447   fprintf(out, "\\end{tikzpicture}\n");
00448   fprintf(out, "\n\n");
00449 
00450   fflush(out);
00451 }
00452 
00453 void run_testset(s_testset *testset, int trafo_adjoint, int N, int M, int m, int nfsft_flags, int psi_flags, int *nthreads_array, int n_threads_array_size)
00454 {
00455   int i;
00456   testset->param.trafo_adjoint = trafo_adjoint;
00457   testset->param.N = N;
00458   testset->param.M = M;
00459   testset->param.m = m;
00460   testset->param.nfsft_flags = nfsft_flags;
00461   testset->param.psi_flags = psi_flags;
00462 
00463   testset->results = (s_result*) malloc(n_threads_array_size*sizeof(s_result));
00464   testset->nresults = n_threads_array_size;
00465 
00466   run_test_create(testset->param.trafo_adjoint, testset->param.N, testset->param.M);
00467   for (i = 0; i < n_threads_array_size; i++)
00468   {
00469     testset->results[i].nthreads = nthreads_array[i];
00470     run_test(testset->results[i].resval, NREPEAT, testset->param.m, testset->param.nfsft_flags, testset->param.psi_flags, testset->results[i].nthreads = nthreads_array[i]);
00471   }
00472 
00473 }
00474 
00475 void test1(int *nthreads_array, int n_threads_array_size, int m)
00476 {
00477   s_testset testsets[4];
00478 
00479   run_testset(&testsets[0], 0, 1024, 1000000, m, 0, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
00480 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00481   print_output_histo_PENRT(file_out_tex, testsets[0]);
00482 #endif
00483 
00484   run_testset(&testsets[1], 1, 1024, 1000000, m, 0, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
00485 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00486   print_output_histo_PENRT(file_out_tex, testsets[1]);
00487 #endif
00488 
00489   print_output_speedup_total(file_out_tex, testsets, 2, 0);
00490 
00491   run_testset(&testsets[2], 0, 1024, 1000000, m, NFSFT_USE_DPT, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
00492 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00493   print_output_histo_PENRT(file_out_tex, testsets[2]);
00494 #endif
00495 
00496   run_testset(&testsets[3], 1, 1024, 1000000, m, NFSFT_USE_DPT, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
00497 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00498   print_output_histo_PENRT(file_out_tex, testsets[3]);
00499 #endif
00500 
00501   print_output_speedup_total(file_out_tex, testsets+2, 2, 0);
00502 }
00503 
00504 int main(int argc, char** argv)
00505 {
00506   int *nthreads_array;
00507   int n_threads_array_size = get_nthreads_array(&nthreads_array);
00508   int k;
00509 
00510 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
00511   fprintf(stderr, "WARNING: Detailed time measurements for NFSFT are not activated.\n");
00512   fprintf(stderr, "For more detailed plots, please re-run the configure script with options\n");
00513   fprintf(stderr, "--enable-measure-time --enable-measure-time-fftw --enable-nfsft --enable-openmp\n");
00514   fprintf(stderr, "and run \"make clean all\"\n\n");
00515 #endif
00516 
00517   for (k = 0; k < n_threads_array_size; k++)
00518     fprintf(stderr, "%d ", nthreads_array[k]);
00519   fprintf(stderr, "\n");
00520 
00521   file_out_tex = fopen("nfsft_benchomp_results_plots.tex", "w");
00522 
00523   test1(nthreads_array, n_threads_array_size, 2);
00524   test1(nthreads_array, n_threads_array_size, 4);
00525   test1(nthreads_array, n_threads_array_size, 6);
00526   test1(nthreads_array, n_threads_array_size, 8);
00527 
00528   fclose(file_out_tex);
00529 
00530   return 0;
00531 }