001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2015 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.api;
021
022import java.io.BufferedReader;
023import java.io.File;
024import java.io.FileInputStream;
025import java.io.FileNotFoundException;
026import java.io.IOException;
027import java.io.InputStreamReader;
028import java.io.Reader;
029import java.io.StringReader;
030import java.io.UnsupportedEncodingException;
031import java.nio.charset.Charset;
032import java.nio.charset.CharsetDecoder;
033import java.nio.charset.CodingErrorAction;
034import java.nio.charset.UnsupportedCharsetException;
035import java.util.AbstractList;
036import java.util.ArrayList;
037import java.util.Arrays;
038import java.util.List;
039import java.util.regex.Matcher;
040import java.util.regex.Pattern;
041
042import org.apache.commons.lang3.ArrayUtils;
043
044import com.google.common.io.Closeables;
045
046/**
047 * Represents the text contents of a file of arbitrary plain text type.
048 * <p>
049 * This class will be passed to instances of class FileSetCheck by
050 * Checker. It implements a string list to ensure backwards
051 * compatibility, but can be extended in the future to allow more
052 * flexible, more powerful or more efficient handling of certain
053 * situations.
054 * </p>
055 *
056 * @author Martin von Gagern
057 */
058public final class FileText extends AbstractList<String> {
059
060    /**
061     * The number of characters to read in one go.
062     */
063    private static final int READ_BUFFER_SIZE = 1024;
064
065    /**
066     * Regular expression pattern matching all line terminators.
067     */
068    private static final Pattern LINE_TERMINATOR = Pattern.compile("\\n|\\r\\n?");
069
070    // For now, we always keep both full text and lines array.
071    // In the long run, however, the one passed at initialization might be
072    // enough, while the other could be lazily created when requested.
073    // This would save memory but cost CPU cycles.
074
075    /**
076     * The name of the file.
077     * {@code null} if no file name is available for whatever reason.
078     */
079    private final File file;
080
081    /**
082     * The charset used to read the file.
083     * {@code null} if the file was reconstructed from a list of lines.
084     */
085    private final Charset charset;
086
087    /**
088     * The full text contents of the file.
089     */
090    private final String fullText;
091
092    /**
093     * The lines of the file, without terminators.
094     */
095    private final String[] lines;
096
097    /**
098     * The first position of each line within the full text.
099     */
100    private int[] lineBreaks;
101
102    /**
103     * Creates a new file text representation.
104     *
105     * <p>The file will be read using the specified encoding, replacing
106     * malformed input and unmappable characters with the default
107     * replacement character.
108     *
109     * @param file the name of the file
110     * @param charsetName the encoding to use when reading the file
111     * @throws NullPointerException if the text is null
112     * @throws IOException if the file could not be read
113     */
114    public FileText(File file, String charsetName) throws IOException {
115        this.file = file;
116
117        // We use our own decoder, to be sure we have complete control
118        // about replacements.
119        final CharsetDecoder decoder;
120        try {
121            charset = Charset.forName(charsetName);
122            decoder = charset.newDecoder();
123            decoder.onMalformedInput(CodingErrorAction.REPLACE);
124            decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
125        }
126        catch (final UnsupportedCharsetException ex) {
127            final String message = "Unsupported charset: " + charsetName;
128            final UnsupportedEncodingException ex2 = new UnsupportedEncodingException(message);
129            ex2.initCause(ex);
130            throw ex2;
131        }
132
133        fullText = readFile(file, decoder);
134
135        // Use the BufferedReader to break down the lines as this
136        // is about 30% faster than using the
137        // LINE_TERMINATOR.split(fullText, -1) method
138        final ArrayList<String> textLines = new ArrayList<>();
139        final BufferedReader reader =
140            new BufferedReader(new StringReader(fullText));
141        while (true) {
142            final String line = reader.readLine();
143            if (line == null) {
144                break;
145            }
146            textLines.add(line);
147        }
148        lines = textLines.toArray(new String[textLines.size()]);
149    }
150
151    /**
152     * Compatibility constructor.
153     *
154     * <p>This constructor reconstructs the text of the file by joining
155     * lines with linefeed characters. This process does not restore
156     * the original line terminators and should therefore be avoided.
157     *
158     * @param file the name of the file
159     * @param lines the lines of the text, without terminators
160     * @throws NullPointerException if the lines array is null
161     */
162    private FileText(File file, List<String> lines) {
163        final StringBuilder buf = new StringBuilder();
164        for (final String line : lines) {
165            buf.append(line).append('\n');
166        }
167        buf.trimToSize();
168
169        this.file = file;
170        charset = null;
171        fullText = buf.toString();
172        this.lines = lines.toArray(new String[lines.size()]);
173    }
174
175    /**
176     * Copy constructor.
177     * @param fileText to make copy of
178     */
179    public FileText(FileText fileText) {
180        file = fileText.file;
181        charset = fileText.charset;
182        fullText = fileText.fullText;
183        lines = fileText.lines.clone();
184        lineBreaks = ArrayUtils.clone(fileText.lineBreaks);
185    }
186
187    /**
188     * Reads file using specific decoder and returns all its content as a String.
189     * @param inputFile File to read
190     * @param decoder Charset decoder
191     * @return File's text
192     * @throws IOException Unable to open or read the file
193     */
194    private static String readFile(final File inputFile, final CharsetDecoder decoder)
195            throws IOException {
196        if (!inputFile.exists()) {
197            throw new FileNotFoundException(inputFile.getPath() + " (No such file or directory)");
198        }
199        final StringBuilder buf = new StringBuilder();
200        final FileInputStream stream = new FileInputStream(inputFile);
201        final Reader reader = new InputStreamReader(stream, decoder);
202        try {
203            final char[] chars = new char[READ_BUFFER_SIZE];
204            while (true) {
205                final int len = reader.read(chars);
206                if (len < 0) {
207                    break;
208                }
209                buf.append(chars, 0, len);
210            }
211        }
212        finally {
213            Closeables.closeQuietly(reader);
214        }
215        return buf.toString();
216    }
217
218    /**
219     * Compatibility conversion.
220     *
221     * <p>This method can be used to convert the arguments passed to
222     * {@link FileSetCheck#process(File,List)} to a FileText
223     * object. If the list of lines already is a FileText, it is
224     * returned as is. Otherwise, a new FileText is constructed by
225     * joining the lines using line feed characters.
226     *
227     * @param file the name of the file
228     * @param lines the lines of the text, without terminators
229     * @return an object representing the denoted text file
230     */
231    public static FileText fromLines(File file, List<String> lines) {
232        if (lines instanceof FileText) {
233            return (FileText) lines;
234        }
235        else {
236            return new FileText(file, lines);
237        }
238    }
239
240    /**
241     * Get the name of the file.
242     * @return an object containing the name of the file
243     */
244    public File getFile() {
245        return file;
246    }
247
248    /**
249     * Get the character set which was used to read the file.
250     * Will be {@code null} for a file reconstructed from its lines.
251     * @return the charset used when the file was read
252     */
253    public Charset getCharset() {
254        return charset;
255    }
256
257    /**
258     * Retrieve the full text of the file.
259     * @return the full text of the file
260     */
261    public CharSequence getFullText() {
262        return fullText;
263    }
264
265    /**
266     * Returns an array of all lines.
267     * {@code text.toLinesArray()} is equivalent to
268     * {@code text.toArray(new String[text.size()])}.
269     * @return an array of all lines of the text
270     */
271    public String[] toLinesArray() {
272        return lines.clone();
273    }
274
275    /**
276     * Find positions of line breaks in the full text.
277     * @return an array giving the first positions of each line.
278     */
279    private int[] findLineBreaks() {
280        if (lineBreaks == null) {
281            final int[] lineBreakPositions = new int[size() + 1];
282            lineBreakPositions[0] = 0;
283            int lineNo = 1;
284            final Matcher matcher = LINE_TERMINATOR.matcher(fullText);
285            while (matcher.find()) {
286                lineBreakPositions[lineNo] = matcher.end();
287                lineNo++;
288            }
289            if (lineNo < lineBreakPositions.length) {
290                lineBreakPositions[lineNo] = fullText.length();
291            }
292            lineBreaks = lineBreakPositions;
293        }
294        return lineBreaks;
295    }
296
297    /**
298     * Determine line and column numbers in full text.
299     * @param pos the character position in the full text
300     * @return the line and column numbers of this character
301     */
302    public LineColumn lineColumn(int pos) {
303        final int[] lineBreakPositions = findLineBreaks();
304        int lineNo = Arrays.binarySearch(lineBreakPositions, pos);
305        if (lineNo < 0) {
306            // we have: lineNo = -(insertion point) - 1
307            // we want: lineNo =  (insertion point) - 1
308            lineNo = -lineNo - 2;
309        }
310        final int startOfLine = lineBreakPositions[lineNo];
311        final int columnNo = pos - startOfLine;
312        // now we have lineNo and columnNo, both starting at zero.
313        return new LineColumn(lineNo + 1, columnNo);
314    }
315
316    /**
317     * Retrieves a line of the text by its number.
318     * The returned line will not contain a trailing terminator.
319     * @param lineNo the number of the line to get, starting at zero
320     * @return the line with the given number
321     */
322    @Override
323    public String get(final int lineNo) {
324        return lines[lineNo];
325    }
326
327    /**
328     * Counts the lines of the text.
329     * @return the number of lines in the text
330     */
331    @Override
332    public int size() {
333        return lines.length;
334    }
335
336}