001 /* CharsetEncoder.java --
002 Copyright (C) 2002 Free Software Foundation, Inc.
003
004 This file is part of GNU Classpath.
005
006 GNU Classpath is free software; you can redistribute it and/or modify
007 it under the terms of the GNU General Public License as published by
008 the Free Software Foundation; either version 2, or (at your option)
009 any later version.
010
011 GNU Classpath is distributed in the hope that it will be useful, but
012 WITHOUT ANY WARRANTY; without even the implied warranty of
013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 General Public License for more details.
015
016 You should have received a copy of the GNU General Public License
017 along with GNU Classpath; see the file COPYING. If not, write to the
018 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019 02110-1301 USA.
020
021 Linking this library statically or dynamically with other modules is
022 making a combined work based on this library. Thus, the terms and
023 conditions of the GNU General Public License cover the whole
024 combination.
025
026 As a special exception, the copyright holders of this library give you
027 permission to link this library with independent modules to produce an
028 executable, regardless of the license terms of these independent
029 modules, and to copy and distribute the resulting executable under
030 terms of your choice, provided that you also meet, for each linked
031 independent module, the terms and conditions of the license of that
032 module. An independent module is a module which is not derived from
033 or based on this library. If you modify this library, you may extend
034 this exception to your version of the library, but you are not
035 obligated to do so. If you do not wish to do so, delete this
036 exception statement from your version. */
037
038 package java.nio.charset;
039
040 import java.nio.ByteBuffer;
041 import java.nio.CharBuffer;
042
043 /**
044 * @author Jesse Rosenstock
045 * @since 1.4
046 */
047 public abstract class CharsetEncoder
048 {
049 private static final int STATE_RESET = 0;
050 private static final int STATE_CODING = 1;
051 private static final int STATE_END = 2;
052 private static final int STATE_FLUSHED = 3;
053
054 private static final byte[] DEFAULT_REPLACEMENT = {(byte)'?'};
055
056 private final Charset charset;
057 private final float averageBytesPerChar;
058 private final float maxBytesPerChar;
059 private byte[] replacement;
060
061 private int state = STATE_RESET;
062
063 private CodingErrorAction malformedInputAction
064 = CodingErrorAction.REPORT;
065 private CodingErrorAction unmappableCharacterAction
066 = CodingErrorAction.REPORT;
067
068 protected CharsetEncoder (Charset cs, float averageBytesPerChar,
069 float maxBytesPerChar)
070 {
071 this (cs, averageBytesPerChar, maxBytesPerChar, DEFAULT_REPLACEMENT);
072 }
073
074 protected CharsetEncoder (Charset cs, float averageBytesPerChar,
075 float maxBytesPerChar, byte[] replacement)
076 {
077 if (averageBytesPerChar <= 0.0f)
078 throw new IllegalArgumentException ("Non-positive averageBytesPerChar");
079 if (maxBytesPerChar <= 0.0f)
080 throw new IllegalArgumentException ("Non-positive maxBytesPerChar");
081
082 this.charset = cs;
083 this.averageBytesPerChar
084 = averageBytesPerChar;
085 this.maxBytesPerChar
086 = maxBytesPerChar;
087 this.replacement = replacement;
088 implReplaceWith (replacement);
089 }
090
091 public final float averageBytesPerChar ()
092 {
093 return averageBytesPerChar;
094 }
095
096 public boolean canEncode (char c)
097 {
098 CharBuffer cb = CharBuffer.allocate (1).put (c);
099 cb.flip ();
100 return canEncode (cb);
101 }
102
103 public boolean canEncode (CharSequence cs)
104 {
105 CharBuffer cb;
106 if (cs instanceof CharBuffer)
107 cb = ((CharBuffer) cs).duplicate ();
108 else
109 cb = CharBuffer.wrap (cs);
110 return canEncode (cb);
111 }
112
113 private boolean canEncode (CharBuffer cb)
114 {
115 // It is an error if a coding operation is "in progress"
116 // I take that to mean the state is not reset or flushed.
117 // XXX: check "in progress" everywhere
118 if (state == STATE_FLUSHED)
119 reset ();
120 else if (state != STATE_RESET)
121 throw new IllegalStateException ();
122
123 CodingErrorAction oldMalformedInputAction = malformedInputAction;
124 CodingErrorAction oldUnmappableCharacterAction
125 = unmappableCharacterAction;
126
127 try
128 {
129 if (oldMalformedInputAction != CodingErrorAction.REPORT)
130 onMalformedInput (CodingErrorAction.REPORT);
131 if (oldUnmappableCharacterAction != CodingErrorAction.REPORT)
132 onUnmappableCharacter (CodingErrorAction.REPORT);
133 }
134 catch (Exception e)
135 {
136 return false;
137 }
138 finally
139 {
140 if (oldMalformedInputAction != CodingErrorAction.REPORT)
141 onMalformedInput (oldMalformedInputAction);
142 if (oldUnmappableCharacterAction != CodingErrorAction.REPORT)
143 onUnmappableCharacter (oldUnmappableCharacterAction);
144 }
145
146 return true;
147 }
148
149 public final Charset charset ()
150 {
151 return charset;
152 }
153
154 public final ByteBuffer encode (CharBuffer in)
155 throws CharacterCodingException
156 {
157 // XXX: Sun's Javadoc seems to contradict itself saying an
158 // IllegalStateException is thrown "if a decoding operation is already
159 // in progress" and also that "it resets this Encoder".
160 // Should we check to see that the state is reset, or should we
161 // call reset()?
162 if (state != STATE_RESET)
163 throw new IllegalStateException ();
164
165 // REVIEW: Using max instead of average may allocate a very large
166 // buffer. Maybe we should do something more efficient?
167 int remaining = in.remaining ();
168 int n = (int) (remaining * maxBytesPerChar ());
169 ByteBuffer out = ByteBuffer.allocate (n);
170
171 if (remaining == 0)
172 {
173 state = STATE_FLUSHED;
174 return out;
175 }
176
177 CoderResult cr = encode (in, out, true);
178 if (cr.isError ())
179 cr.throwException ();
180
181 cr = flush (out);
182 if (cr.isError ())
183 cr.throwException ();
184
185 out.flip ();
186
187 // Unfortunately, resizing the actual bytebuffer array is required.
188 byte[] resized = new byte[out.remaining()];
189 out.get(resized);
190 return ByteBuffer.wrap(resized);
191 }
192
193 public final CoderResult encode (CharBuffer in, ByteBuffer out,
194 boolean endOfInput)
195 {
196 int newState = endOfInput ? STATE_END : STATE_CODING;
197 // XXX: Need to check for "previous step was an invocation [not] of
198 // this method with a value of true for the endOfInput parameter but
199 // a return value indicating an incomplete decoding operation"
200 // XXX: We will not check the previous return value, just
201 // that the previous call passed true for endOfInput
202 if (state != STATE_RESET && state != STATE_CODING
203 && !(endOfInput && state == STATE_END))
204 throw new IllegalStateException ();
205 state = newState;
206
207 for (;;)
208 {
209 CoderResult cr;
210 try
211 {
212 cr = encodeLoop (in, out);
213 }
214 catch (RuntimeException e)
215 {
216 throw new CoderMalfunctionError (e);
217 }
218
219 if (cr.isOverflow ())
220 return cr;
221
222 if (cr.isUnderflow ())
223 {
224 if (endOfInput && in.hasRemaining ())
225 cr = CoderResult.malformedForLength (in.remaining ());
226 else
227 return cr;
228 }
229
230 CodingErrorAction action = cr.isMalformed ()
231 ? malformedInputAction
232 : unmappableCharacterAction;
233
234 if (action == CodingErrorAction.REPORT)
235 return cr;
236
237 if (action == CodingErrorAction.REPLACE)
238 {
239 if (out.remaining () < replacement.length)
240 return CoderResult.OVERFLOW;
241 out.put (replacement);
242 }
243
244 in.position (in.position () + cr.length ());
245 }
246 }
247
248 protected abstract CoderResult encodeLoop (CharBuffer in, ByteBuffer out);
249
250 public final CoderResult flush (ByteBuffer out)
251 {
252 // It seems weird that you can flush after reset, but Sun's javadoc
253 // says an IllegalStateException is thrown "If the previous step of the
254 // current decoding operation was an invocation neither of the reset
255 // method nor ... of the three-argument encode method with a value of
256 // true for the endOfInput parameter."
257 // Further note that flush() only requires that there not be
258 // an IllegalStateException if the previous step was a call to
259 // encode with true as the last argument. It does not require
260 // that the call succeeded. encode() does require that it succeeded.
261 // XXX: test this to see if reality matches javadoc
262 if (state != STATE_RESET && state != STATE_END)
263 throw new IllegalStateException ();
264
265 state = STATE_FLUSHED;
266 return implFlush (out);
267 }
268
269 protected CoderResult implFlush (ByteBuffer out)
270 {
271 return CoderResult.UNDERFLOW;
272 }
273
274 protected void implOnMalformedInput (CodingErrorAction newAction)
275 {
276 // default implementation does nothing
277 }
278
279 protected void implOnUnmappableCharacter (CodingErrorAction newAction)
280 {
281 // default implementation does nothing
282 }
283
284 protected void implReplaceWith (byte[] newReplacement)
285 {
286 // default implementation does nothing
287 }
288
289 protected void implReset ()
290 {
291 // default implementation does nothing
292 }
293
294 public boolean isLegalReplacement (byte[] replacement)
295 {
296 // TODO: cache the decoder
297 // error actions will be REPORT after construction
298 CharsetDecoder decoder = charset.newDecoder ();
299 ByteBuffer bb = ByteBuffer.wrap (replacement);
300 CharBuffer cb
301 = CharBuffer.allocate ((int) (replacement.length
302 * decoder.maxCharsPerByte ()));
303 return !decoder.decode (bb, cb, true).isError ();
304 }
305
306 public CodingErrorAction malformedInputAction ()
307 {
308 return malformedInputAction;
309 }
310
311 public final float maxBytesPerChar ()
312 {
313 return maxBytesPerChar;
314 }
315
316 public final CharsetEncoder onMalformedInput (CodingErrorAction newAction)
317 {
318 if (newAction == null)
319 throw new IllegalArgumentException ("Null action");
320
321 malformedInputAction = newAction;
322 implOnMalformedInput (newAction);
323 return this;
324 }
325
326 public CodingErrorAction unmappableCharacterAction ()
327 {
328 return unmappableCharacterAction;
329 }
330
331 public final CharsetEncoder onUnmappableCharacter
332 (CodingErrorAction newAction)
333 {
334 if (newAction == null)
335 throw new IllegalArgumentException ("Null action");
336
337 unmappableCharacterAction = newAction;
338 implOnUnmappableCharacter (newAction);
339 return this;
340 }
341
342 public final byte[] replacement ()
343 {
344 return replacement;
345 }
346
347 public final CharsetEncoder replaceWith (byte[] newReplacement)
348 {
349 if (newReplacement == null)
350 throw new IllegalArgumentException ("Null replacement");
351 if (newReplacement.length == 0)
352 throw new IllegalArgumentException ("Empty replacement");
353 // XXX: what about maxBytesPerChar?
354
355 if (!isLegalReplacement (newReplacement))
356 throw new IllegalArgumentException ("Illegal replacement");
357
358 this.replacement = newReplacement;
359 implReplaceWith (newReplacement);
360 return this;
361 }
362
363 public final CharsetEncoder reset ()
364 {
365 state = STATE_RESET;
366 implReset ();
367 return this;
368 }
369 }