001    // License: GPL. See LICENSE file for details.
002    package org.openstreetmap.josm.data.validation.tests;
003    
004    import static org.openstreetmap.josm.tools.I18n.tr;
005    
006    import java.awt.geom.Point2D;
007    import java.util.ArrayList;
008    import java.util.HashMap;
009    import java.util.List;
010    import java.util.Map;
011    
012    import org.openstreetmap.josm.data.osm.OsmPrimitive;
013    import org.openstreetmap.josm.data.osm.Way;
014    import org.openstreetmap.josm.data.validation.Severity;
015    import org.openstreetmap.josm.data.validation.Test;
016    import org.openstreetmap.josm.data.validation.TestError;
017    import org.openstreetmap.josm.data.validation.util.ValUtil;
018    import org.openstreetmap.josm.gui.progress.ProgressMonitor;
019    import org.openstreetmap.josm.tools.MultiMap;
020    import org.openstreetmap.josm.tools.Utils;
021    
022    /**
023     * Checks for similar named ways, symptom of a possible typo. It uses the
024     * Levenshtein distance to check for similarity
025     *
026     * @author frsantos
027     */
028    public class SimilarNamedWays extends Test {
029    
030        protected static final int SIMILAR_NAMED = 701;
031    
032        /** All ways, grouped by cells */
033        Map<Point2D,List<Way>> cellWays;
034        /** The already detected errors */
035        MultiMap<Way, Way> errorWays;
036    
037        /**
038         * Constructor
039         */
040        public SimilarNamedWays() {
041            super(tr("Similarly named ways"),
042                    tr("This test checks for ways with similar names that may have been misspelled."));
043        }
044    
045        @Override
046        public void startTest(ProgressMonitor monitor) {
047            super.startTest(monitor);
048            cellWays = new HashMap<Point2D,List<Way>>(1000);
049            errorWays = new MultiMap<Way, Way>();
050        }
051    
052        @Override
053        public void endTest() {
054            cellWays = null;
055            errorWays = null;
056            super.endTest();
057        }
058    
059        @Override
060        public void visit(Way w) {
061            if (!w.isUsable())
062                return;
063    
064            String name = w.get("name");
065            if (name == null || name.length() < 6)
066                return;
067    
068            List<List<Way>> theCellWays = ValUtil.getWaysInCell(w, cellWays);
069            for (List<Way> ways : theCellWays) {
070                for (Way w2 : ways) {
071                    if (errorWays.contains(w, w2) || errorWays.contains(w2, w)) {
072                        continue;
073                    }
074    
075                    String name2 = w2.get("name");
076                    if (name2 == null || name2.length() < 6) {
077                        continue;
078                    }
079    
080                    int levenshteinDistance = getLevenshteinDistance(name, name2);
081                    if (0 < levenshteinDistance && levenshteinDistance <= 2) {
082                        List<OsmPrimitive> primitives = new ArrayList<OsmPrimitive>();
083                        primitives.add(w);
084                        primitives.add(w2);
085                        errors.add(new TestError(this, Severity.WARNING, tr("Similarly named ways"), SIMILAR_NAMED, primitives));
086                        errorWays.put(w, w2);
087                    }
088                }
089                ways.add(w);
090            }
091        }
092    
093        /**
094         * Compute Levenshtein distance
095         *
096         * @param s First word
097         * @param t Second word
098         * @return The distance between words
099         */
100        public int getLevenshteinDistance(String s, String t) {
101            int d[][]; // matrix
102            int n; // length of s
103            int m; // length of t
104            int i; // iterates through s
105            int j; // iterates through t
106            char s_i; // ith character of s
107            char t_j; // jth character of t
108            int cost; // cost
109    
110            // Step 1
111            n = s.length();
112            m = t.length();
113            if (n == 0)
114                return m;
115            if (m == 0)
116                return n;
117            d = new int[n + 1][m + 1];
118    
119            // Step 2
120            for (i = 0; i <= n; i++) {
121                d[i][0] = i;
122            }
123            for (j = 0; j <= m; j++) {
124                d[0][j] = j;
125            }
126    
127            // Step 3
128            for (i = 1; i <= n; i++) {
129    
130                s_i = s.charAt(i - 1);
131    
132                // Step 4
133                for (j = 1; j <= m; j++) {
134    
135                    t_j = t.charAt(j - 1);
136    
137                    // Step 5
138                    if (s_i == t_j) {
139                        cost = 0;
140                    } else {
141                        cost = 1;
142                    }
143    
144                    // Step 6
145                    d[i][j] = Utils.min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);
146                }
147            }
148    
149            // Step 7
150            return d[n][m];
151        }
152    }