001 /* GZIPInputStream.java - Input filter for reading gzip file
002 Copyright (C) 1999, 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
003
004 This file is part of GNU Classpath.
005
006 GNU Classpath is free software; you can redistribute it and/or modify
007 it under the terms of the GNU General Public License as published by
008 the Free Software Foundation; either version 2, or (at your option)
009 any later version.
010
011 GNU Classpath is distributed in the hope that it will be useful, but
012 WITHOUT ANY WARRANTY; without even the implied warranty of
013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 General Public License for more details.
015
016 You should have received a copy of the GNU General Public License
017 along with GNU Classpath; see the file COPYING. If not, write to the
018 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019 02110-1301 USA.
020
021 Linking this library statically or dynamically with other modules is
022 making a combined work based on this library. Thus, the terms and
023 conditions of the GNU General Public License cover the whole
024 combination.
025
026 As a special exception, the copyright holders of this library give you
027 permission to link this library with independent modules to produce an
028 executable, regardless of the license terms of these independent
029 modules, and to copy and distribute the resulting executable under
030 terms of your choice, provided that you also meet, for each linked
031 independent module, the terms and conditions of the license of that
032 module. An independent module is a module which is not derived from
033 or based on this library. If you modify this library, you may extend
034 this exception to your version of the library, but you are not
035 obligated to do so. If you do not wish to do so, delete this
036 exception statement from your version. */
037
038
039 package java.util.zip;
040
041 import java.io.EOFException;
042 import java.io.IOException;
043 import java.io.InputStream;
044
045 /**
046 * This filter stream is used to decompress a "GZIP" format stream.
047 * The "GZIP" format is described in RFC 1952.
048 *
049 * @author John Leuner
050 * @author Tom Tromey
051 * @since JDK 1.1
052 */
053 public class GZIPInputStream
054 extends InflaterInputStream
055 {
056 /**
057 * The magic number found at the start of a GZIP stream.
058 */
059 public static final int GZIP_MAGIC = 0x8b1f;
060
061 /**
062 * The mask for bit 0 of the flag byte.
063 */
064 static final int FTEXT = 0x1;
065
066 /**
067 * The mask for bit 1 of the flag byte.
068 */
069 static final int FHCRC = 0x2;
070
071 /**
072 * The mask for bit 2 of the flag byte.
073 */
074 static final int FEXTRA = 0x4;
075
076 /**
077 * The mask for bit 3 of the flag byte.
078 */
079 static final int FNAME = 0x8;
080
081 /**
082 * The mask for bit 4 of the flag byte.
083 */
084 static final int FCOMMENT = 0x10;
085
086 /**
087 * The CRC-32 checksum value for uncompressed data.
088 */
089 protected CRC32 crc;
090
091 /**
092 * Indicates whether or not the end of the stream has been reached.
093 */
094 protected boolean eos;
095
096 /**
097 * Indicates whether or not the GZIP header has been read in.
098 */
099 private boolean readGZIPHeader;
100
101 /**
102 * Creates a GZIPInputStream with the default buffer size.
103 *
104 * @param in The stream to read compressed data from
105 * (in GZIP format).
106 *
107 * @throws IOException if an error occurs during an I/O operation.
108 */
109 public GZIPInputStream(InputStream in)
110 throws IOException
111 {
112 this(in, 4096);
113 }
114
115 /**
116 * Creates a GZIPInputStream with the specified buffer size.
117 *
118 * @param in The stream to read compressed data from
119 * (in GZIP format).
120 * @param size The size of the buffer to use.
121 *
122 * @throws IOException if an error occurs during an I/O operation.
123 * @throws IllegalArgumentException if <code>size</code>
124 * is less than or equal to 0.
125 */
126 public GZIPInputStream(InputStream in, int size)
127 throws IOException
128 {
129 super(in, new Inflater(true), size);
130 crc = new CRC32();
131 readHeader();
132 }
133
134 /**
135 * Closes the input stream.
136 *
137 * @throws IOException if an error occurs during an I/O operation.
138 */
139 public void close()
140 throws IOException
141 {
142 // Nothing to do here.
143 super.close();
144 }
145
146 /**
147 * Reads in GZIP-compressed data and stores it in uncompressed form
148 * into an array of bytes. The method will block until either
149 * enough input data becomes available or the compressed stream
150 * reaches its end.
151 *
152 * @param buf the buffer into which the uncompressed data will
153 * be stored.
154 * @param offset the offset indicating where in <code>buf</code>
155 * the uncompressed data should be placed.
156 * @param len the number of uncompressed bytes to be read.
157 */
158 public int read(byte[] buf, int offset, int len) throws IOException
159 {
160 // We first have to slurp in the GZIP header, then we feed all the
161 // rest of the data to the superclass.
162 //
163 // As we do that we continually update the CRC32. Once the data is
164 // finished, we check the CRC32.
165 //
166 // This means we don't need our own buffer, as everything is done
167 // in the superclass.
168 if (!readGZIPHeader)
169 readHeader();
170
171 if (eos)
172 return -1;
173
174 // System.err.println("GZIPIS.read(byte[], off, len ... " + offset + " and len " + len);
175
176 /* We don't have to read the header,
177 * so we just grab data from the superclass.
178 */
179 int numRead = super.read(buf, offset, len);
180 if (numRead > 0)
181 crc.update(buf, offset, numRead);
182
183 if (inf.finished())
184 readFooter();
185 return numRead;
186 }
187
188
189 /**
190 * Reads in the GZIP header.
191 */
192 private void readHeader() throws IOException
193 {
194 /* 1. Check the two magic bytes */
195 CRC32 headCRC = new CRC32();
196 int magic = in.read();
197 if (magic < 0)
198 {
199 eos = true;
200 return;
201 }
202 int magic2 = in.read();
203 if ((magic + (magic2 << 8)) != GZIP_MAGIC)
204 throw new IOException("Error in GZIP header, bad magic code");
205 headCRC.update(magic);
206 headCRC.update(magic2);
207
208 /* 2. Check the compression type (must be 8) */
209 int CM = in.read();
210 if (CM != Deflater.DEFLATED)
211 throw new IOException("Error in GZIP header, data not in deflate format");
212 headCRC.update(CM);
213
214 /* 3. Check the flags */
215 int flags = in.read();
216 if (flags < 0)
217 throw new EOFException("Early EOF in GZIP header");
218 headCRC.update(flags);
219
220 /* This flag byte is divided into individual bits as follows:
221
222 bit 0 FTEXT
223 bit 1 FHCRC
224 bit 2 FEXTRA
225 bit 3 FNAME
226 bit 4 FCOMMENT
227 bit 5 reserved
228 bit 6 reserved
229 bit 7 reserved
230 */
231
232 /* 3.1 Check the reserved bits are zero */
233 if ((flags & 0xd0) != 0)
234 throw new IOException("Reserved flag bits in GZIP header != 0");
235
236 /* 4.-6. Skip the modification time, extra flags, and OS type */
237 for (int i=0; i< 6; i++)
238 {
239 int readByte = in.read();
240 if (readByte < 0)
241 throw new EOFException("Early EOF in GZIP header");
242 headCRC.update(readByte);
243 }
244
245 /* 7. Read extra field */
246 if ((flags & FEXTRA) != 0)
247 {
248 /* Skip subfield id */
249 for (int i=0; i< 2; i++)
250 {
251 int readByte = in.read();
252 if (readByte < 0)
253 throw new EOFException("Early EOF in GZIP header");
254 headCRC.update(readByte);
255 }
256 if (in.read() < 0 || in.read() < 0)
257 throw new EOFException("Early EOF in GZIP header");
258
259 int len1, len2, extraLen;
260 len1 = in.read();
261 len2 = in.read();
262 if ((len1 < 0) || (len2 < 0))
263 throw new EOFException("Early EOF in GZIP header");
264 headCRC.update(len1);
265 headCRC.update(len2);
266
267 extraLen = (len1 << 8) | len2;
268 for (int i = 0; i < extraLen;i++)
269 {
270 int readByte = in.read();
271 if (readByte < 0)
272 throw new EOFException("Early EOF in GZIP header");
273 headCRC.update(readByte);
274 }
275 }
276
277 /* 8. Read file name */
278 if ((flags & FNAME) != 0)
279 {
280 int readByte;
281 while ( (readByte = in.read()) > 0)
282 headCRC.update(readByte);
283 if (readByte < 0)
284 throw new EOFException("Early EOF in GZIP file name");
285 headCRC.update(readByte);
286 }
287
288 /* 9. Read comment */
289 if ((flags & FCOMMENT) != 0)
290 {
291 int readByte;
292 while ( (readByte = in.read()) > 0)
293 headCRC.update(readByte);
294
295 if (readByte < 0)
296 throw new EOFException("Early EOF in GZIP comment");
297 headCRC.update(readByte);
298 }
299
300 /* 10. Read header CRC */
301 if ((flags & FHCRC) != 0)
302 {
303 int tempByte;
304 int crcval = in.read();
305 if (crcval < 0)
306 throw new EOFException("Early EOF in GZIP header");
307
308 tempByte = in.read();
309 if (tempByte < 0)
310 throw new EOFException("Early EOF in GZIP header");
311
312 crcval = (crcval << 8) | tempByte;
313 if (crcval != ((int) headCRC.getValue() & 0xffff))
314 throw new IOException("Header CRC value mismatch");
315 }
316
317 readGZIPHeader = true;
318 //System.err.println("Read GZIP header");
319 }
320
321 private void readFooter() throws IOException
322 {
323 byte[] footer = new byte[8];
324 int avail = inf.getRemaining();
325 if (avail > 8)
326 avail = 8;
327 System.arraycopy(buf, len - inf.getRemaining(), footer, 0, avail);
328 int needed = 8 - avail;
329 while (needed > 0)
330 {
331 int count = in.read(footer, 8-needed, needed);
332 if (count <= 0)
333 throw new EOFException("Early EOF in GZIP footer");
334 needed -= count; //Jewel Jan 16
335 }
336
337 int crcval = (footer[0] & 0xff) | ((footer[1] & 0xff) << 8)
338 | ((footer[2] & 0xff) << 16) | (footer[3] << 24);
339 if (crcval != (int) crc.getValue())
340 throw new IOException("GZIP crc sum mismatch, theirs \""
341 + Integer.toHexString(crcval)
342 + "\" and ours \""
343 + Integer.toHexString( (int) crc.getValue()));
344
345 int total = (footer[4] & 0xff) | ((footer[5] & 0xff) << 8)
346 | ((footer[6] & 0xff) << 16) | (footer[7] << 24);
347 if (total != inf.getTotalOut())
348 throw new IOException("Number of bytes mismatch");
349
350 /* FIXME" XXX Should we support multiple members.
351 * Difficult, since there may be some bytes still in buf
352 */
353 eos = true;
354 }
355 }