001 // License: GPL. See LICENSE file for details. 002 package org.openstreetmap.josm.data.validation.tests; 003 004 import static org.openstreetmap.josm.tools.I18n.tr; 005 006 import java.awt.geom.Point2D; 007 import java.util.ArrayList; 008 import java.util.HashMap; 009 import java.util.List; 010 import java.util.Map; 011 012 import org.openstreetmap.josm.data.osm.OsmPrimitive; 013 import org.openstreetmap.josm.data.osm.Way; 014 import org.openstreetmap.josm.data.validation.Severity; 015 import org.openstreetmap.josm.data.validation.Test; 016 import org.openstreetmap.josm.data.validation.TestError; 017 import org.openstreetmap.josm.data.validation.util.ValUtil; 018 import org.openstreetmap.josm.gui.progress.ProgressMonitor; 019 import org.openstreetmap.josm.tools.MultiMap; 020 import org.openstreetmap.josm.tools.Utils; 021 022 /** 023 * Checks for similar named ways, symptom of a possible typo. It uses the 024 * Levenshtein distance to check for similarity 025 * 026 * @author frsantos 027 */ 028 public class SimilarNamedWays extends Test { 029 030 protected static final int SIMILAR_NAMED = 701; 031 032 /** All ways, grouped by cells */ 033 Map<Point2D,List<Way>> cellWays; 034 /** The already detected errors */ 035 MultiMap<Way, Way> errorWays; 036 037 /** 038 * Constructor 039 */ 040 public SimilarNamedWays() { 041 super(tr("Similarly named ways"), 042 tr("This test checks for ways with similar names that may have been misspelled.")); 043 } 044 045 @Override 046 public void startTest(ProgressMonitor monitor) { 047 super.startTest(monitor); 048 cellWays = new HashMap<Point2D,List<Way>>(1000); 049 errorWays = new MultiMap<Way, Way>(); 050 } 051 052 @Override 053 public void endTest() { 054 cellWays = null; 055 errorWays = null; 056 super.endTest(); 057 } 058 059 @Override 060 public void visit(Way w) { 061 if (!w.isUsable()) 062 return; 063 064 String name = w.get("name"); 065 if (name == null || name.length() < 6) 066 return; 067 068 List<List<Way>> theCellWays = ValUtil.getWaysInCell(w, cellWays); 069 for (List<Way> ways : theCellWays) { 070 for (Way w2 : ways) { 071 if (errorWays.contains(w, w2) || errorWays.contains(w2, w)) { 072 continue; 073 } 074 075 String name2 = w2.get("name"); 076 if (name2 == null || name2.length() < 6) { 077 continue; 078 } 079 080 int levenshteinDistance = getLevenshteinDistance(name, name2); 081 if (0 < levenshteinDistance && levenshteinDistance <= 2) { 082 List<OsmPrimitive> primitives = new ArrayList<OsmPrimitive>(); 083 primitives.add(w); 084 primitives.add(w2); 085 errors.add(new TestError(this, Severity.WARNING, tr("Similarly named ways"), SIMILAR_NAMED, primitives)); 086 errorWays.put(w, w2); 087 } 088 } 089 ways.add(w); 090 } 091 } 092 093 /** 094 * Compute Levenshtein distance 095 * 096 * @param s First word 097 * @param t Second word 098 * @return The distance between words 099 */ 100 public int getLevenshteinDistance(String s, String t) { 101 int d[][]; // matrix 102 int n; // length of s 103 int m; // length of t 104 int i; // iterates through s 105 int j; // iterates through t 106 char s_i; // ith character of s 107 char t_j; // jth character of t 108 int cost; // cost 109 110 // Step 1 111 n = s.length(); 112 m = t.length(); 113 if (n == 0) 114 return m; 115 if (m == 0) 116 return n; 117 d = new int[n + 1][m + 1]; 118 119 // Step 2 120 for (i = 0; i <= n; i++) { 121 d[i][0] = i; 122 } 123 for (j = 0; j <= m; j++) { 124 d[0][j] = j; 125 } 126 127 // Step 3 128 for (i = 1; i <= n; i++) { 129 130 s_i = s.charAt(i - 1); 131 132 // Step 4 133 for (j = 1; j <= m; j++) { 134 135 t_j = t.charAt(j - 1); 136 137 // Step 5 138 if (s_i == t_j) { 139 cost = 0; 140 } else { 141 cost = 1; 142 } 143 144 // Step 6 145 d[i][j] = Utils.min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost); 146 } 147 } 148 149 // Step 7 150 return d[n][m]; 151 } 152 }