Engauge Digitizer  2
GridClassifier.cpp
1 /******************************************************************************************************
2  * (C) 2014 markummitchell@github.com. This file is part of Engauge Digitizer, which is released *
3  * under GNU General Public License version 2 (GPLv2) or (at your option) any later version. See file *
4  * LICENSE or go to gnu.org/licenses for details. Distribution requires prior written permission. *
5  ******************************************************************************************************/
6 
7 #include "ColorFilter.h"
8 #include "Correlation.h"
9 #include "DocumentModelCoords.h"
10 #include "EngaugeAssert.h"
11 #include "GridClassifier.h"
12 #include <iostream>
13 #include "Logger.h"
14 #include <QDebug>
15 #include <QFile>
16 #include <QImage>
17 #include "QtToString.h"
18 #include "Transformation.h"
19 
20 int GridClassifier::NUM_PIXELS_PER_HISTOGRAM_BINS = 1;
21 double GridClassifier::PEAK_HALF_WIDTH = 4;
22 int GridClassifier::MIN_STEP_PIXELS = 4 * GridClassifier::PEAK_HALF_WIDTH; // Step includes down ramp, flat part, up ramp
23 const QString GNUPLOT_DELIMITER ("\t");
24 
25 // We set up the picket fence with binStart arbitrarily set close to zero. Peak is
26 // not exactly at zero since we want to include the left side of the first peak.
27 int GridClassifier::BIN_START_UNSHIFTED = GridClassifier::PEAK_HALF_WIDTH;
28 
29 using namespace std;
30 
32 {
33 }
34 
35 int GridClassifier::binFromCoordinate (double coord,
36  double coordMin,
37  double coordMax) const
38 {
39  ENGAUGE_ASSERT (coordMin < coordMax);
40  ENGAUGE_ASSERT (coordMin <= coord);
41  ENGAUGE_ASSERT (coord <= coordMax);
42 
43  int bin = 0.5 + (m_numHistogramBins - 1.0) * (coord - coordMin) / (coordMax - coordMin);
44 
45  return bin;
46 }
47 
48 void GridClassifier::classify (bool isGnuplot,
49  const QPixmap &originalPixmap,
50  const Transformation &transformation,
51  int &countX,
52  double &startX,
53  double &stepX,
54  int &countY,
55  double &startY,
56  double &stepY)
57 {
58  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::classify";
59 
60  QImage image = originalPixmap.toImage ();
61 
62  m_numHistogramBins = image.width() / NUM_PIXELS_PER_HISTOGRAM_BINS;
63 
64  double xMin, xMax, yMin, yMax;
65  double binStartX, binStepX, binStartY, binStepY;
66 
67  m_binsX = new double [m_numHistogramBins];
68  m_binsY = new double [m_numHistogramBins];
69 
70  computeGraphCoordinateLimits (image,
71  transformation,
72  xMin,
73  xMax,
74  yMin,
75  yMax);
76  initializeHistogramBins ();
77  populateHistogramBins (image,
78  transformation,
79  xMin,
80  xMax,
81  yMin,
82  yMax);
83  searchStartStepSpace (isGnuplot,
84  m_binsX,
85  "x",
86  xMin,
87  xMax,
88  startX,
89  stepX,
90  binStartX,
91  binStepX);
92  searchStartStepSpace (isGnuplot,
93  m_binsY,
94  "y",
95  yMin,
96  yMax,
97  startY,
98  stepY,
99  binStartY,
100  binStepY);
101  searchCountSpace (m_binsX,
102  binStartX,
103  binStepX,
104  countX);
105  searchCountSpace (m_binsY,
106  binStartY,
107  binStepY,
108  countY);
109 
110  delete [] m_binsX;
111  delete [] m_binsY;
112 }
113 
114 void GridClassifier::computeGraphCoordinateLimits (const QImage &image,
115  const Transformation &transformation,
116  double &xMin,
117  double &xMax,
118  double &yMin,
119  double &yMax)
120 {
121  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::computeGraphCoordinateLimits";
122 
123  // Project every pixel onto the x axis, and onto the y axis. One set of histogram bins will be
124  // set up along each of the axes. The range of bins will encompass every pixel in the image, and no more.
125 
126  QPointF posGraphTL, posGraphTR, posGraphBL, posGraphBR;
127  transformation.transformScreenToRawGraph (QPointF (0, 0) , posGraphTL);
128  transformation.transformScreenToRawGraph (QPointF (image.width(), 0) , posGraphTR);
129  transformation.transformScreenToRawGraph (QPointF (0, image.height()) , posGraphBL);
130  transformation.transformScreenToRawGraph (QPointF (image.width(), image.height()), posGraphBR);
131 
132  // Compute x and y ranges for setting up the histogram bins
133  if (transformation.modelCoords().coordsType() == COORDS_TYPE_CARTESIAN) {
134 
135  // For affine cartesian coordinates, we only need to look at the screen corners
136  xMin = qMin (qMin (qMin (posGraphTL.x(), posGraphTR.x()), posGraphBL.x()), posGraphBR.x());
137  xMax = qMax (qMax (qMax (posGraphTL.x(), posGraphTR.x()), posGraphBL.x()), posGraphBR.x());
138  yMin = qMin (qMin (qMin (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
139  yMax = qMax (qMax (qMax (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
140 
141  } else {
142 
143  // For affine polar coordinates, easiest approach is to assume the full circle
144  xMin = 0.0;
145  xMax = transformation.modelCoords().thetaPeriod();
146  yMin = transformation.modelCoords().originRadius();
147  yMax = qMax (qMax (qMax (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
148 
149  }
150 
151  ENGAUGE_ASSERT (xMin < xMax);
152  ENGAUGE_ASSERT (yMin < yMax);
153 }
154 
155 double GridClassifier::coordinateFromBin (int bin,
156  double coordMin,
157  double coordMax) const
158 {
159  ENGAUGE_ASSERT (bin < m_numHistogramBins);
160  ENGAUGE_ASSERT (coordMin < coordMax);
161 
162  return coordMin + (coordMax - coordMin) * (double) bin / ((double) m_numHistogramBins - 1.0);
163 }
164 
165 void GridClassifier::copyVectorToVector (const double from [],
166  double to []) const
167 {
168  for (int bin = 0; bin < m_numHistogramBins; bin++) {
169  to [bin] = from [bin];
170  }
171 }
172 
173 void GridClassifier::dumpGnuplotCoordinate (const QString &coordinateLabel,
174  double corr,
175  const double *bins,
176  double coordinateMin,
177  double coordinateMax,
178  int binStart,
179  int binStep) const
180 {
181  QString filename = QString ("gridclassifier_%1_corr%2_startMax%3_stepMax%4.gnuplot")
182  .arg (coordinateLabel)
183  .arg (corr, 8, 'f', 3, '0')
184  .arg (binStart)
185  .arg (binStep);
186 
187  cout << "Writing gnuplot file: " << filename.toLatin1().data() << "\n";
188 
189  QFile fileDump (filename);
190  fileDump.open (QIODevice::WriteOnly | QIODevice::Text);
191  QTextStream strDump (&fileDump);
192 
193  int bin;
194 
195  // For consistent scaling, get the max bin count
196  int binCountMax = 0;
197  for (bin = 0; bin < m_numHistogramBins; bin++) {
198  if (bins [bin] > binCountMax) {
199  binCountMax = qMax ((double) binCountMax,
200  bins [bin]);
201  }
202  }
203 
204  // Get picket fence
205  double *picketFence = new double [m_numHistogramBins];
206  loadPicketFence (picketFence,
207  binStart,
208  binStep,
209  0,
210  false);
211 
212  // Header
213  strDump << "bin"
214  << GNUPLOT_DELIMITER << "coordinate"
215  << GNUPLOT_DELIMITER << "binCount"
216  << GNUPLOT_DELIMITER << "startStep"
217  << GNUPLOT_DELIMITER << "picketFence" << "\n";
218 
219  // Data, with one row per coordinate value
220  for (bin = 0; bin < m_numHistogramBins; bin++) {
221 
222  double coordinate = coordinateFromBin (bin,
223  coordinateMin,
224  coordinateMax);
225  double startStepValue (((bin - binStart) % binStep == 0) ? 1 : 0);
226  strDump << bin
227  << GNUPLOT_DELIMITER << coordinate
228  << GNUPLOT_DELIMITER << bins [bin]
229  << GNUPLOT_DELIMITER << binCountMax * startStepValue
230  << GNUPLOT_DELIMITER << binCountMax * picketFence [bin] << "\n";
231  }
232 
233  delete [] picketFence;
234 }
235 
236 void GridClassifier::dumpGnuplotCorrelations (const QString &coordinateLabel,
237  double valueMin,
238  double valueMax,
239  const double signalA [],
240  const double signalB [],
241  const double correlations [])
242 {
243  QString filename = QString ("gridclassifier_%1_correlations.gnuplot")
244  .arg (coordinateLabel);
245 
246  cout << "Writing gnuplot file: " << filename.toLatin1().data() << "\n";
247 
248  QFile fileDump (filename);
249  fileDump.open (QIODevice::WriteOnly | QIODevice::Text);
250  QTextStream strDump (&fileDump);
251 
252  int bin;
253 
254  // Compute max values so curves can be normalized to the same heights
255  double signalAMax = 1, signalBMax = 1, correlationsMax = 1;
256  for (bin = 0; bin < m_numHistogramBins; bin++) {
257  if (bin == 0 || signalA [bin] > signalAMax) {
258  signalAMax = signalA [bin];
259  }
260  if (bin == 0 || signalB [bin] > signalBMax) {
261  signalBMax = signalB [bin];
262  }
263  if (bin == 0 || correlations [bin] > correlationsMax) {
264  correlationsMax = correlations [bin];
265  }
266  }
267 
268  // Output normalized curves
269  for (int bin = 0; bin < m_numHistogramBins; bin++) {
270 
271  strDump << coordinateFromBin (bin,
272  valueMin,
273  valueMax)
274  << GNUPLOT_DELIMITER << signalA [bin] / signalAMax
275  << GNUPLOT_DELIMITER << signalB [bin] / signalBMax
276  << GNUPLOT_DELIMITER << correlations [bin] / correlationsMax << "\n";
277  }
278 }
279 
280 void GridClassifier::initializeHistogramBins ()
281 {
282  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::initializeHistogramBins";
283 
284  for (int bin = 0; bin < m_numHistogramBins; bin++) {
285  m_binsX [bin] = 0;
286  m_binsY [bin] = 0;
287  }
288 }
289 
290 void GridClassifier::loadPicketFence (double picketFence [],
291  int binStart,
292  int binStep,
293  int count,
294  bool isCount) const
295 {
296  const double PEAK_HEIGHT = 1.0; // Arbitrary height, since normalization will counteract any change to this value
297 
298  // Count argument is optional. Note that binStart already excludes PEAK_HALF_WIDTH bins at start,
299  // and we also exclude PEAK_HALF_WIDTH bins at the end
300  ENGAUGE_ASSERT (binStart >= PEAK_HALF_WIDTH);
301  if (!isCount) {
302  count = 1 + (m_numHistogramBins - binStart - PEAK_HALF_WIDTH) / binStep;
303  }
304 
305  // Bins that we need to worry about
306  int binStartMinusHalfWidth = binStart - PEAK_HALF_WIDTH;
307  int binStopPlusHalfWidth = (binStart + (count - 1) * binStep) + PEAK_HALF_WIDTH;
308 
309  // To normalize, we compute the area under the picket fence. Constraint is
310  // areaUnnormalized + NUM_HISTOGRAM_BINS * normalizationOffset = 0
311  double areaUnnormalized = count * PEAK_HEIGHT * PEAK_HALF_WIDTH;
312  double normalizationOffset = -1.0 * areaUnnormalized / m_numHistogramBins;
313 
314  for (int bin = 0; bin < m_numHistogramBins; bin++) {
315 
316  // Outside of the peaks, bin is small negative so correlation with unit function is zero. In other
317  // words, the function is normalized
318  picketFence [bin] = normalizationOffset;
319 
320  if ((binStartMinusHalfWidth <= bin) &&
321  (bin <= binStopPlusHalfWidth)) {
322 
323  // Closest peak
324  int ordinalClosestPeak = (int) ((bin - binStart + binStep / 2) / binStep);
325  int binClosestPeak = binStart + ordinalClosestPeak * binStep;
326 
327  // Distance from closest peak is used to define an isosceles triangle
328  int distanceToClosestPeak = qAbs (bin - binClosestPeak);
329 
330  if (distanceToClosestPeak < PEAK_HALF_WIDTH) {
331 
332  // Map 0 to PEAK_HALF_WIDTH to 1 to 0
333  picketFence [bin] = 1.0 - (double) distanceToClosestPeak / PEAK_HALF_WIDTH + normalizationOffset;
334 
335  }
336  }
337  }
338 }
339 
340 void GridClassifier::populateHistogramBins (const QImage &image,
341  const Transformation &transformation,
342  double xMin,
343  double xMax,
344  double yMin,
345  double yMax)
346 {
347  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::populateHistogramBins";
348 
349  ColorFilter filter;
350  QRgb rgbBackground = filter.marginColor (&image);
351 
352  for (int x = 0; x < image.width(); x++) {
353  for (int y = 0; y < image.height(); y++) {
354 
355  QColor pixel = image.pixel (x, y);
356 
357  // Skip pixels with background color
358  if (!filter.colorCompare (rgbBackground,
359  pixel.rgb ())) {
360 
361  // Add this pixel to histograms
362  QPointF posGraph;
363  transformation.transformScreenToRawGraph (QPointF (x, y), posGraph);
364 
365  if (transformation.modelCoords().coordsType() == COORDS_TYPE_POLAR) {
366 
367  // If out of the 0 to period range, the theta value must shifted by the period to get into that range
368  while (posGraph.x() < xMin) {
369  posGraph.setX (posGraph.x() + transformation.modelCoords().thetaPeriod());
370  }
371  while (posGraph.x() > xMax) {
372  posGraph.setX (posGraph.x() - transformation.modelCoords().thetaPeriod());
373  }
374  }
375 
376  int binX = binFromCoordinate (posGraph.x(), xMin, xMax);
377  int binY = binFromCoordinate (posGraph.y(), yMin, yMax);
378 
379  ENGAUGE_ASSERT (0 <= binX);
380  ENGAUGE_ASSERT (0 <= binY);
381  ENGAUGE_ASSERT (binX < m_numHistogramBins);
382  ENGAUGE_ASSERT (binY < m_numHistogramBins);
383 
384  // Roundoff error in log scaling may let bin go just outside legal range
385  binX = qMin (binX, m_numHistogramBins - 1);
386  binY = qMin (binY, m_numHistogramBins - 1);
387 
388  ++m_binsX [binX];
389  ++m_binsY [binY];
390  }
391  }
392  }
393 }
394 
395 void GridClassifier::searchCountSpace (double bins [],
396  double binStart,
397  double binStep,
398  int &countMax)
399 {
400  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::searchCountSpace"
401  << " start=" << binStart
402  << " step=" << binStep;
403 
404  // Loop though the space of possible counts
405  Correlation correlation (m_numHistogramBins);
406  double *picketFence = new double [m_numHistogramBins];
407  double corr, corrMax;
408  bool isFirst = true;
409  int countStop = 1 + (m_numHistogramBins - binStart) / binStep;
410  for (int count = 2; count <= countStop; count++) {
411 
412  loadPicketFence (picketFence,
413  binStart,
414  binStep,
415  count,
416  true);
417 
418  correlation.correlateWithoutShift (m_numHistogramBins,
419  bins,
420  picketFence,
421  corr);
422  if (isFirst || (corr > corrMax)) {
423  countMax = count;
424  corrMax = corr;
425  }
426 
427  isFirst = false;
428  }
429 
430  delete [] picketFence;
431 }
432 
433 void GridClassifier::searchStartStepSpace (bool isGnuplot,
434  double bins [],
435  const QString &coordinateLabel,
436  double valueMin,
437  double valueMax,
438  double &start,
439  double &step,
440  double &binStartMax,
441  double &binStepMax)
442 {
443  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::searchStartStepSpace";
444 
445  // Correlations are tracked for logging
446  double *signalA = new double [m_numHistogramBins];
447  double *signalB = new double [m_numHistogramBins];
448  double *correlations = new double [m_numHistogramBins];
449  double *correlationsMax = new double [m_numHistogramBins];
450 
451  // Loop though the space of possible gridlines using the independent variables (start,step).
452  Correlation correlation (m_numHistogramBins);
453  double *picketFence = new double [m_numHistogramBins];
454  int binStart;
455  double corr = 0, corrMax = 0;
456  bool isFirst = true;
457 
458  // We do not explicitly search(=loop) through binStart here, since Correlation::correlateWithShift will take
459  // care of that for us
460 
461  // Step search starts out small, and stops at value that gives count substantially greater than 2. Freakishly small
462  // images need to have MIN_STEP_PIXELS overridden so the loop iterates at least once
463  binStartMax = BIN_START_UNSHIFTED + 1; // In case search below ever fails
464  binStepMax = qMin (MIN_STEP_PIXELS, m_numHistogramBins / 8); // In case search below ever fails
465  for (int binStep = qMin (MIN_STEP_PIXELS, m_numHistogramBins / 8); binStep < m_numHistogramBins / 4; binStep++) {
466 
467  loadPicketFence (picketFence,
468  BIN_START_UNSHIFTED,
469  binStep,
470  PEAK_HALF_WIDTH,
471  false);
472 
473  correlation.correlateWithShift (m_numHistogramBins,
474  bins,
475  picketFence,
476  binStart,
477  corr,
478  correlations);
479  if (isFirst || (corr > corrMax)) {
480 
481  int binStartMaxNext = binStart + BIN_START_UNSHIFTED + 1; // Compensate for the shift performed inside loadPicketFence
482 
483  // Make sure binStartMax never goes out of bounds
484  if (binStartMaxNext < m_numHistogramBins) {
485 
486  binStartMax = binStartMaxNext;
487  binStepMax = binStep;
488  corrMax = corr;
489  copyVectorToVector (bins, signalA);
490  copyVectorToVector (picketFence, signalB);
491  copyVectorToVector (correlations, correlationsMax);
492 
493  // Output a gnuplot file. We should see the correlation values consistently increasing
494  if (isGnuplot) {
495 
496  dumpGnuplotCoordinate(coordinateLabel,
497  corr,
498  bins,
499  valueMin,
500  valueMax,
501  binStart,
502  binStep);
503  }
504  }
505  }
506 
507  isFirst = false;
508  }
509 
510  // Convert from bins back to graph coordinates
511  start = coordinateFromBin (binStartMax,
512  valueMin,
513  valueMax);
514  if (binStartMax + binStepMax < m_numHistogramBins) {
515 
516  // Normal case where a reasonable step value is being calculated
517  double next = coordinateFromBin (binStartMax + binStepMax,
518  valueMin,
519  valueMax);
520  step = next - start;
521  } else {
522 
523  // Pathological case where step jumps to outside the legal range. We bring the step back into range
524  double next = coordinateFromBin (m_numHistogramBins - 1,
525  valueMin,
526  valueMax);
527  step = next - start;
528  }
529 
530  if (isGnuplot) {
531  dumpGnuplotCorrelations (coordinateLabel,
532  valueMin,
533  valueMax,
534  signalA,
535  signalB,
536  correlationsMax);
537  }
538 
539  delete [] signalA;
540  delete [] signalB;
541  delete [] correlations;
542  delete [] correlationsMax;
543  delete [] picketFence;
544 }
void correlateWithoutShift(int N, const double function1 [], const double function2 [], double &corrMax) const
Return the correlation of the two functions, without any shift.
QRgb marginColor(const QImage *image) const
Identify the margin color of the image, which is defined as the most common color in the four margins...
Definition: ColorFilter.cpp:73
bool colorCompare(QRgb rgb1, QRgb rgb2) const
See if the two color values are close enough to be considered to be the same.
Definition: ColorFilter.cpp:25
double thetaPeriod() const
Return the period of the theta value for polar coordinates, consistent with CoordThetaUnits.
Fast cross correlation between two functions.
Definition: Correlation.h:14
Class for filtering image to remove unimportant information.
Definition: ColorFilter.h:20
DocumentModelCoords modelCoords() const
Get method for DocumentModelCoords.
Affine transformation between screen and graph coordinates, based on digitized axis points...
GridClassifier()
Single constructor.
CoordsType coordsType() const
Get method for coordinates type.
void transformScreenToRawGraph(const QPointF &coordScreen, QPointF &coordGraph) const
Transform from cartesian pixel screen coordinates to cartesian/polar graph coordinates.
double originRadius() const
Get method for origin radius in polar mode.
void classify(bool isGnuplot, const QPixmap &originalPixmap, const Transformation &transformation, int &countX, double &startX, double &stepX, int &countY, double &startY, double &stepY)
Classify the specified image, and return the most probably x and y grid settings. ...
void correlateWithShift(int N, const double function1 [], const double function2 [], int &binStartMax, double &corrMax, double correlations []) const
Return the shift in function1 that best aligns that function with function2.
Definition: Correlation.cpp:44