001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.io;
003
004import java.io.IOException;
005import java.io.InputStream;
006import java.io.InputStreamReader;
007import java.io.PushbackInputStream;
008import java.io.UnsupportedEncodingException;
009
010/**
011 * Detects the different UTF encodings from byte order mark.
012 * @since 3372
013 */
014public final class UTFInputStreamReader extends InputStreamReader {
015
016    private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException {
017        super(in, cs);
018    }
019
020    /**
021     * Creates a new {@link InputStreamReader} from the {@link InputStream} with UTF-8 as default encoding.
022     * @param input input stream
023     * @return A reader with the correct encoding. Starts to read after the BOM.
024     * @throws IOException if any I/O error occurs
025     * @see #create(java.io.InputStream, String)
026     */
027    public static UTFInputStreamReader create(InputStream input) throws IOException {
028        return create(input, "UTF-8");
029    }
030
031    /**
032     * Creates a new {@link InputStreamReader} from the {@link InputStream}.
033     * @param input input stream
034     * @param defaultEncoding Used, when no BOM was recognized. Can be null.
035     * @return A reader with the correct encoding. Starts to read after the BOM.
036     * @throws IOException if any I/O error occurs
037     */
038    public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException {
039        byte[] bom = new byte[4];
040        String encoding = defaultEncoding;
041        int unread;
042        PushbackInputStream pushbackStream = new PushbackInputStream(input, 4);
043        int n = pushbackStream.read(bom, 0, 4);
044
045        if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
046            encoding = "UTF-8";
047            unread = n - 3;
048        } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
049            encoding = "UTF-32BE";
050            unread = n - 4;
051        } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
052            encoding = "UTF-32LE";
053            unread = n - 4;
054        } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
055            encoding = "UTF-16BE";
056            unread = n - 2;
057        } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
058            encoding = "UTF-16LE";
059            unread = n - 2;
060        } else {
061            unread = n;
062        }
063
064        if (unread > 0) {
065            pushbackStream.unread(bom, n - unread, unread);
066        } else if (unread < -1) {
067            pushbackStream.unread(bom, 0, 0);
068        }
069
070        if (encoding == null) {
071            encoding = "UTF-8";
072        }
073        return new UTFInputStreamReader(pushbackStream, encoding);
074    }
075}