001 /* URLEncoder.java -- Class to convert strings to a properly encoded URL 002 Copyright (C) 1998, 1999, 2001, 2002, 2003 Free Software Foundation, Inc. 003 004 This file is part of GNU Classpath. 005 006 GNU Classpath is free software; you can redistribute it and/or modify 007 it under the terms of the GNU General Public License as published by 008 the Free Software Foundation; either version 2, or (at your option) 009 any later version. 010 011 GNU Classpath is distributed in the hope that it will be useful, but 012 WITHOUT ANY WARRANTY; without even the implied warranty of 013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 General Public License for more details. 015 016 You should have received a copy of the GNU General Public License 017 along with GNU Classpath; see the file COPYING. If not, write to the 018 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 019 02110-1301 USA. 020 021 Linking this library statically or dynamically with other modules is 022 making a combined work based on this library. Thus, the terms and 023 conditions of the GNU General Public License cover the whole 024 combination. 025 026 As a special exception, the copyright holders of this library give you 027 permission to link this library with independent modules to produce an 028 executable, regardless of the license terms of these independent 029 modules, and to copy and distribute the resulting executable under 030 terms of your choice, provided that you also meet, for each linked 031 independent module, the terms and conditions of the license of that 032 module. An independent module is a module which is not derived from 033 or based on this library. If you modify this library, you may extend 034 this exception to your version of the library, but you are not 035 obligated to do so. If you do not wish to do so, delete this 036 exception statement from your version. */ 037 038 package java.net; 039 040 import java.io.UnsupportedEncodingException; 041 042 043 /* 044 * Written using on-line Java Platform 1.2/1.4 API Specification, as well 045 * as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998). 046 * Status: Believed complete and correct. 047 */ 048 049 /** 050 * This utility class contains static methods that converts a 051 * string into a fully encoded URL string in x-www-form-urlencoded 052 * format. This format replaces certain disallowed characters with 053 * encoded equivalents. All upper case and lower case letters in the 054 * US alphabet remain as is, the space character (' ') is replaced with 055 * '+' sign, and all other characters are converted to a "%XX" format 056 * where XX is the hexadecimal representation of that character in a 057 * certain encoding (by default, the platform encoding, though the 058 * standard is "UTF-8"). 059 * <p> 060 * This method is very useful for encoding strings to be sent to CGI scripts 061 * 062 * @author Aaron M. Renn (arenn@urbanophile.com) 063 * @author Warren Levy (warrenl@cygnus.com) 064 * @author Mark Wielaard (mark@klomp.org) 065 */ 066 public class URLEncoder 067 { 068 /** 069 * This method translates the passed in string into x-www-form-urlencoded 070 * format using the default encoding. The standard encoding is 071 * "UTF-8", and the two-argument form of this method should be used 072 * instead. 073 * 074 * @param s The String to convert 075 * 076 * @return The converted String 077 * 078 * @deprecated 079 */ 080 public static String encode(String s) 081 { 082 try 083 { 084 // We default to 8859_1 for compatibility with the same 085 // default elsewhere in the library. 086 return encode(s, System.getProperty("file.encoding", "8859_1")); 087 } 088 catch (UnsupportedEncodingException uee) 089 { 090 // Should never happen since default should always be supported 091 return s; 092 } 093 } 094 095 /** 096 * This method translates the passed in string into x-www-form-urlencoded 097 * format using the character encoding to hex-encode the unsafe characters. 098 * 099 * @param s The String to convert 100 * @param encoding The encoding to use for unsafe characters 101 * 102 * @return The converted String 103 * 104 * @exception UnsupportedEncodingException If the named encoding is not 105 * supported 106 * 107 * @since 1.4 108 */ 109 public static String encode(String s, String encoding) 110 throws UnsupportedEncodingException 111 { 112 int length = s.length(); 113 int start = 0; 114 int i = 0; 115 116 StringBuffer result = new StringBuffer(length); 117 while (true) 118 { 119 while (i < length && isSafe(s.charAt(i))) 120 i++; 121 122 // Safe character can just be added 123 result.append(s.substring(start, i)); 124 125 // Are we done? 126 if (i >= length) 127 return result.toString(); 128 else if (s.charAt(i) == ' ') 129 { 130 result.append('+'); // Replace space char with plus symbol. 131 i++; 132 } 133 else 134 { 135 // Get all unsafe characters 136 start = i; 137 char c; 138 while (i < length && (c = s.charAt(i)) != ' ' && ! isSafe(c)) 139 i++; 140 141 // Convert them to %XY encoded strings 142 String unsafe = s.substring(start, i); 143 byte[] bytes = unsafe.getBytes(encoding); 144 for (int j = 0; j < bytes.length; j++) 145 { 146 result.append('%'); 147 int val = bytes[j]; 148 result.append(hex.charAt((val & 0xf0) >> 4)); 149 result.append(hex.charAt(val & 0x0f)); 150 } 151 } 152 start = i; 153 } 154 } 155 156 /** 157 * Private static method that returns true if the given char is either 158 * a uppercase or lowercase letter from 'a' till 'z', or a digit froim 159 * '0' till '9', or one of the characters '-', '_', '.' or '*'. Such 160 * 'safe' character don't have to be url encoded. 161 */ 162 private static boolean isSafe(char c) 163 { 164 return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') 165 || (c >= '0' && c <= '9') || c == '-' || c == '_' || c == '.' 166 || c == '*'); 167 } 168 169 /** 170 * Private constructor that does nothing. Included to avoid a default 171 * public constructor being created by the compiler. 172 */ 173 private URLEncoder() 174 { 175 } 176 177 /** 178 * Used to convert to hex. We don't use Integer.toHexString, since 179 * it converts to lower case (and the Sun docs pretty clearly 180 * specify upper case here), and because it doesn't provide a 181 * leading 0. 182 */ 183 private static final String hex = "0123456789ABCDEF"; 184 }