001 // License: GPL. See LICENSE file for details. 002 package org.openstreetmap.josm.io; 003 004 import java.io.InputStream; 005 import java.io.InputStreamReader; 006 import java.io.IOException; 007 import java.io.PushbackInputStream; 008 import java.io.UnsupportedEncodingException; 009 010 /** 011 * Detects the different UTF encodings from byte order mark 012 */ 013 public class UTFInputStreamReader extends InputStreamReader { 014 /** 015 * converts input stream to reader 016 * @param defaultEncoding Used, when no BOM was recognized. Can be null. 017 * @return A reader with the correct encoding. Starts to read after the BOM. 018 */ 019 public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException { 020 byte bom[] = new byte[4]; 021 String encoding = defaultEncoding; 022 int unread; 023 PushbackInputStream pushbackStream = new PushbackInputStream(input, 4); 024 int n = pushbackStream.read(bom, 0, 4); 025 026 if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) { 027 encoding = "UTF-8"; 028 unread = n - 3; 029 } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) { 030 encoding = "UTF-32BE"; 031 unread = n - 4; 032 } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) { 033 encoding = "UTF-32LE"; 034 unread = n - 4; 035 } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) { 036 encoding = "UTF-16BE"; 037 unread = n - 2; 038 } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) { 039 encoding = "UTF-16LE"; 040 unread = n - 2; 041 } else { 042 unread = n; 043 } 044 045 if (unread > 0) { 046 pushbackStream.unread(bom, (n - unread), unread); 047 } else if (unread < -1) { 048 pushbackStream.unread(bom, 0, 0); 049 } 050 051 if (encoding == null) { 052 return new UTFInputStreamReader(pushbackStream); 053 } else { 054 return new UTFInputStreamReader(pushbackStream, encoding); 055 } 056 } 057 058 private UTFInputStreamReader(InputStream in) { 059 super(in); 060 } 061 private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException { 062 super(in, cs); 063 } 064 }