001 /* Charset.java --
002 Copyright (C) 2002, 2004, 2005, 2007 Free Software Foundation, Inc.
003
004 This file is part of GNU Classpath.
005
006 GNU Classpath is free software; you can redistribute it and/or modify
007 it under the terms of the GNU General Public License as published by
008 the Free Software Foundation; either version 2, or (at your option)
009 any later version.
010
011 GNU Classpath is distributed in the hope that it will be useful, but
012 WITHOUT ANY WARRANTY; without even the implied warranty of
013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 General Public License for more details.
015
016 You should have received a copy of the GNU General Public License
017 along with GNU Classpath; see the file COPYING. If not, write to the
018 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019 02110-1301 USA.
020
021 Linking this library statically or dynamically with other modules is
022 making a combined work based on this library. Thus, the terms and
023 conditions of the GNU General Public License cover the whole
024 combination.
025
026 As a special exception, the copyright holders of this library give you
027 permission to link this library with independent modules to produce an
028 executable, regardless of the license terms of these independent
029 modules, and to copy and distribute the resulting executable under
030 terms of your choice, provided that you also meet, for each linked
031 independent module, the terms and conditions of the license of that
032 module. An independent module is a module which is not derived from
033 or based on this library. If you modify this library, you may extend
034 this exception to your version of the library, but you are not
035 obligated to do so. If you do not wish to do so, delete this
036 exception statement from your version. */
037
038
039 package java.nio.charset;
040
041 import gnu.classpath.ServiceFactory;
042 import gnu.classpath.SystemProperties;
043 import gnu.java.nio.charset.Provider;
044
045 import java.io.BufferedReader;
046 import java.io.InputStreamReader;
047 import java.net.URL;
048 import java.nio.ByteBuffer;
049 import java.nio.CharBuffer;
050 import java.nio.charset.spi.CharsetProvider;
051 import java.util.Collections;
052 import java.util.Enumeration;
053 import java.util.HashSet;
054 import java.util.Iterator;
055 import java.util.LinkedHashSet;
056 import java.util.Locale;
057 import java.util.Set;
058 import java.util.SortedMap;
059 import java.util.TreeMap;
060
061 /**
062 * @author Jesse Rosenstock
063 * @since 1.4
064 * @status updated to 1.5
065 */
066 public abstract class Charset implements Comparable<Charset>
067 {
068 private CharsetEncoder cachedEncoder;
069 private CharsetDecoder cachedDecoder;
070
071 /**
072 * Extra Charset providers.
073 */
074 private static CharsetProvider[] providers;
075
076 private final String canonicalName;
077 private final String[] aliases;
078
079 protected Charset (String canonicalName, String[] aliases)
080 {
081 checkName (canonicalName);
082 if (aliases != null)
083 {
084 int n = aliases.length;
085 for (int i = 0; i < n; ++i)
086 checkName (aliases[i]);
087 }
088
089 cachedEncoder = null;
090 cachedDecoder = null;
091 this.canonicalName = canonicalName;
092 this.aliases = aliases;
093 }
094
095 /**
096 * @throws IllegalCharsetNameException if the name is illegal
097 */
098 private static void checkName (String name)
099 {
100 int n = name.length ();
101
102 if (n == 0)
103 throw new IllegalCharsetNameException (name);
104
105 char ch = name.charAt (0);
106 if (!(('A' <= ch && ch <= 'Z')
107 || ('a' <= ch && ch <= 'z')
108 || ('0' <= ch && ch <= '9')))
109 throw new IllegalCharsetNameException (name);
110
111 for (int i = 1; i < n; ++i)
112 {
113 ch = name.charAt (i);
114 if (!(('A' <= ch && ch <= 'Z')
115 || ('a' <= ch && ch <= 'z')
116 || ('0' <= ch && ch <= '9')
117 || ch == '-' || ch == '.' || ch == ':' || ch == '_'))
118 throw new IllegalCharsetNameException (name);
119 }
120 }
121
122 /**
123 * Returns the system default charset.
124 *
125 * This may be set by the user or VM with the file.encoding
126 * property.
127 *
128 * @since 1.5
129 */
130 public static Charset defaultCharset()
131 {
132 String encoding;
133
134 try
135 {
136 encoding = SystemProperties.getProperty("file.encoding");
137 }
138 catch(SecurityException e)
139 {
140 // Use fallback.
141 encoding = "ISO-8859-1";
142 }
143 catch(IllegalArgumentException e)
144 {
145 // Use fallback.
146 encoding = "ISO-8859-1";
147 }
148
149 try
150 {
151 return forName(encoding);
152 }
153 catch(UnsupportedCharsetException e)
154 {
155 // Ignore.
156 }
157 catch(IllegalCharsetNameException e)
158 {
159 // Ignore.
160 }
161 catch(IllegalArgumentException e)
162 {
163 // Ignore.
164 }
165
166 throw new IllegalStateException("Can't get default charset!");
167 }
168
169 public static boolean isSupported (String charsetName)
170 {
171 return charsetForName (charsetName) != null;
172 }
173
174 /**
175 * Returns the Charset instance for the charset of the given name.
176 *
177 * @param charsetName
178 * @return the Charset instance for the indicated charset
179 * @throws UnsupportedCharsetException if this VM does not support
180 * the charset of the given name.
181 * @throws IllegalCharsetNameException if the given charset name is
182 * legal.
183 * @throws IllegalArgumentException if <code>charsetName</code> is null.
184 */
185 public static Charset forName (String charsetName)
186 {
187 // Throws IllegalArgumentException as the JDK does.
188 if(charsetName == null)
189 throw new IllegalArgumentException("Charset name must not be null.");
190
191 Charset cs = charsetForName (charsetName);
192 if (cs == null)
193 throw new UnsupportedCharsetException (charsetName);
194 return cs;
195 }
196
197 /**
198 * Retrieves a charset for the given charset name.
199 *
200 * @return A charset object for the charset with the specified name, or
201 * <code>null</code> if no such charset exists.
202 *
203 * @throws IllegalCharsetNameException if the name is illegal
204 */
205 private static Charset charsetForName(String charsetName)
206 {
207 checkName (charsetName);
208 // Try the default provider first
209 // (so we don't need to load external providers unless really necessary)
210 // if it is an exotic charset try loading the external providers.
211 Charset cs = provider().charsetForName(charsetName);
212 if (cs == null)
213 {
214 CharsetProvider[] providers = providers2();
215 for (int i = 0; i < providers.length; i++)
216 {
217 cs = providers[i].charsetForName(charsetName);
218 if (cs != null)
219 break;
220 }
221 }
222 return cs;
223 }
224
225 public static SortedMap<String, Charset> availableCharsets()
226 {
227 TreeMap<String, Charset> charsets
228 = new TreeMap(String.CASE_INSENSITIVE_ORDER);
229 for (Iterator<Charset> i = provider().charsets(); i.hasNext(); )
230 {
231 Charset cs = i.next();
232 charsets.put(cs.name(), cs);
233 }
234
235 CharsetProvider[] providers = providers2();
236 for (int j = 0; j < providers.length; j++)
237 {
238 for (Iterator<Charset> i = providers[j].charsets(); i.hasNext(); )
239 {
240 Charset cs = (Charset) i.next();
241 charsets.put(cs.name(), cs);
242 }
243 }
244
245 return Collections.unmodifiableSortedMap(charsets);
246 }
247
248 private static CharsetProvider provider()
249 {
250 try
251 {
252 String s = System.getProperty("charset.provider");
253 if (s != null)
254 {
255 CharsetProvider p =
256 (CharsetProvider) ((Class.forName(s)).newInstance());
257 return p;
258 }
259 }
260 catch (Exception e)
261 {
262 // Ignore.
263 }
264
265 return Provider.provider();
266 }
267
268 /**
269 * We need to support multiple providers, reading them from
270 * java.nio.charset.spi.CharsetProvider in the resource directory
271 * META-INF/services. This returns the "extra" charset providers.
272 */
273 private static CharsetProvider[] providers2()
274 {
275 if (providers == null)
276 {
277 try
278 {
279 Iterator i = ServiceFactory.lookupProviders(CharsetProvider.class);
280 LinkedHashSet set = new LinkedHashSet();
281 while (i.hasNext())
282 set.add(i.next());
283
284 providers = new CharsetProvider[set.size()];
285 set.toArray(providers);
286 }
287 catch (Exception e)
288 {
289 throw new RuntimeException(e);
290 }
291 }
292 return providers;
293 }
294
295 public final String name ()
296 {
297 return canonicalName;
298 }
299
300 public final Set<String> aliases ()
301 {
302 if (aliases == null)
303 return Collections.<String>emptySet();
304
305 // should we cache the aliasSet instead?
306 int n = aliases.length;
307 HashSet<String> aliasSet = new HashSet<String> (n);
308 for (int i = 0; i < n; ++i)
309 aliasSet.add (aliases[i]);
310 return Collections.unmodifiableSet (aliasSet);
311 }
312
313 public String displayName ()
314 {
315 return canonicalName;
316 }
317
318 public String displayName (Locale locale)
319 {
320 return canonicalName;
321 }
322
323 public final boolean isRegistered ()
324 {
325 return (!canonicalName.startsWith ("x-")
326 && !canonicalName.startsWith ("X-"));
327 }
328
329 public abstract boolean contains (Charset cs);
330
331 public abstract CharsetDecoder newDecoder ();
332
333 public abstract CharsetEncoder newEncoder ();
334
335 public boolean canEncode ()
336 {
337 return true;
338 }
339
340 // NB: This implementation serializes different threads calling
341 // Charset.encode(), a potential performance problem. It might
342 // be better to remove the cache, or use ThreadLocal to cache on
343 // a per-thread basis.
344 public final synchronized ByteBuffer encode (CharBuffer cb)
345 {
346 try
347 {
348 if (cachedEncoder == null)
349 {
350 cachedEncoder = newEncoder ()
351 .onMalformedInput (CodingErrorAction.REPLACE)
352 .onUnmappableCharacter (CodingErrorAction.REPLACE);
353 } else
354 cachedEncoder.reset();
355 return cachedEncoder.encode (cb);
356 }
357 catch (CharacterCodingException e)
358 {
359 throw new AssertionError (e);
360 }
361 }
362
363 public final ByteBuffer encode (String str)
364 {
365 return encode (CharBuffer.wrap (str));
366 }
367
368 // NB: This implementation serializes different threads calling
369 // Charset.decode(), a potential performance problem. It might
370 // be better to remove the cache, or use ThreadLocal to cache on
371 // a per-thread basis.
372 public final synchronized CharBuffer decode (ByteBuffer bb)
373 {
374 try
375 {
376 if (cachedDecoder == null)
377 {
378 cachedDecoder = newDecoder ()
379 .onMalformedInput (CodingErrorAction.REPLACE)
380 .onUnmappableCharacter (CodingErrorAction.REPLACE);
381 } else
382 cachedDecoder.reset();
383
384 return cachedDecoder.decode (bb);
385 }
386 catch (CharacterCodingException e)
387 {
388 throw new AssertionError (e);
389 }
390 }
391
392 public final int compareTo (Charset other)
393 {
394 return canonicalName.compareToIgnoreCase (other.canonicalName);
395 }
396
397 public final int hashCode ()
398 {
399 return canonicalName.hashCode ();
400 }
401
402 public final boolean equals (Object ob)
403 {
404 if (ob instanceof Charset)
405 return canonicalName.equalsIgnoreCase (((Charset) ob).canonicalName);
406 else
407 return false;
408 }
409
410 public final String toString ()
411 {
412 return canonicalName;
413 }
414 }