001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: *
017: */
018:
019: package org.apache.tools.zip;
020:
021: import java.io.File;
022: import java.io.IOException;
023: import java.io.InputStream;
024: import java.io.RandomAccessFile;
025: import java.io.UnsupportedEncodingException;
026: import java.util.Calendar;
027: import java.util.Date;
028: import java.util.Enumeration;
029: import java.util.Hashtable;
030: import java.util.zip.Inflater;
031: import java.util.zip.InflaterInputStream;
032: import java.util.zip.ZipException;
033:
034: /**
035: * Replacement for <code>java.util.ZipFile</code>.
036: *
037: * <p>This class adds support for file name encodings other than UTF-8
038: * (which is required to work on ZIP files created by native zip tools
039: * and is able to skip a preamble like the one found in self
040: * extracting archives. Furthermore it returns instances of
041: * <code>org.apache.tools.zip.ZipEntry</code> instead of
042: * <code>java.util.zip.ZipEntry</code>.</p>
043: *
044: * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
045: * have to reimplement all methods anyway. Like
046: * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
047: * covers and supports compressed and uncompressed entries.</p>
048: *
049: * <p>The method signatures mimic the ones of
050: * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
051: *
052: * <ul>
053: * <li>There is no getName method.</li>
054: * <li>entries has been renamed to getEntries.</li>
055: * <li>getEntries and getEntry return
056: * <code>org.apache.tools.zip.ZipEntry</code> instances.</li>
057: * <li>close is allowed to throw IOException.</li>
058: * </ul>
059: *
060: */
061: public class ZipFile {
062:
063: /**
064: * Maps ZipEntrys to Longs, recording the offsets of the local
065: * file headers.
066: */
067: private Hashtable entries = new Hashtable(509);
068:
069: /**
070: * Maps String to ZipEntrys, name -> actual entry.
071: */
072: private Hashtable nameMap = new Hashtable(509);
073:
074: private static final class OffsetEntry {
075: private long headerOffset = -1;
076: private long dataOffset = -1;
077: }
078:
079: /**
080: * The encoding to use for filenames and the file comment.
081: *
082: * <p>For a list of possible values see <a
083: * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
084: * Defaults to the platform's default character encoding.</p>
085: */
086: private String encoding = null;
087:
088: /**
089: * The actual data source.
090: */
091: private RandomAccessFile archive;
092:
093: /**
094: * Opens the given file for reading, assuming the platform's
095: * native encoding for file names.
096: *
097: * @param f the archive.
098: *
099: * @throws IOException if an error occurs while reading the file.
100: */
101: public ZipFile(File f) throws IOException {
102: this (f, null);
103: }
104:
105: /**
106: * Opens the given file for reading, assuming the platform's
107: * native encoding for file names.
108: *
109: * @param name name of the archive.
110: *
111: * @throws IOException if an error occurs while reading the file.
112: */
113: public ZipFile(String name) throws IOException {
114: this (new File(name), null);
115: }
116:
117: /**
118: * Opens the given file for reading, assuming the specified
119: * encoding for file names.
120: *
121: * @param name name of the archive.
122: * @param encoding the encoding to use for file names
123: *
124: * @throws IOException if an error occurs while reading the file.
125: */
126: public ZipFile(String name, String encoding) throws IOException {
127: this (new File(name), encoding);
128: }
129:
130: /**
131: * Opens the given file for reading, assuming the specified
132: * encoding for file names.
133: *
134: * @param f the archive.
135: * @param encoding the encoding to use for file names
136: *
137: * @throws IOException if an error occurs while reading the file.
138: */
139: public ZipFile(File f, String encoding) throws IOException {
140: this .encoding = encoding;
141: archive = new RandomAccessFile(f, "r");
142: try {
143: populateFromCentralDirectory();
144: resolveLocalFileHeaderData();
145: } catch (IOException e) {
146: try {
147: archive.close();
148: } catch (IOException e2) {
149: // swallow, throw the original exception instead
150: }
151: throw e;
152: }
153: }
154:
155: /**
156: * The encoding to use for filenames and the file comment.
157: *
158: * @return null if using the platform's default character encoding.
159: */
160: public String getEncoding() {
161: return encoding;
162: }
163:
164: /**
165: * Closes the archive.
166: * @throws IOException if an error occurs closing the archive.
167: */
168: public void close() throws IOException {
169: archive.close();
170: }
171:
172: /**
173: * close a zipfile quietly; throw no io fault, do nothing
174: * on a null parameter
175: * @param zipfile file to close, can be null
176: */
177: public static void closeQuietly(ZipFile zipfile) {
178: if (zipfile != null) {
179: try {
180: zipfile.close();
181: } catch (IOException e) {
182: //ignore
183: }
184: }
185: }
186:
187: /**
188: * Returns all entries.
189: * @return all entries as {@link ZipEntry} instances
190: */
191: public Enumeration getEntries() {
192: return entries.keys();
193: }
194:
195: /**
196: * Returns a named entry - or <code>null</code> if no entry by
197: * that name exists.
198: * @param name name of the entry.
199: * @return the ZipEntry corresponding to the given name - or
200: * <code>null</code> if not present.
201: */
202: public ZipEntry getEntry(String name) {
203: return (ZipEntry) nameMap.get(name);
204: }
205:
206: /**
207: * Returns an InputStream for reading the contents of the given entry.
208: * @param ze the entry to get the stream for.
209: * @return a stream to read the entry from.
210: * @throws IOException if unable to create an input stream from the zipenty
211: * @throws ZipException if the zipentry has an unsupported compression method
212: */
213: public InputStream getInputStream(ZipEntry ze) throws IOException,
214: ZipException {
215: OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
216: if (offsetEntry == null) {
217: return null;
218: }
219: long start = offsetEntry.dataOffset;
220: BoundedInputStream bis = new BoundedInputStream(start, ze
221: .getCompressedSize());
222: switch (ze.getMethod()) {
223: case ZipEntry.STORED:
224: return bis;
225: case ZipEntry.DEFLATED:
226: bis.addDummy();
227: return new InflaterInputStream(bis, new Inflater(true));
228: default:
229: throw new ZipException(
230: "Found unsupported compression method "
231: + ze.getMethod());
232: }
233: }
234:
235: private static final int CFH_LEN =
236: /* version made by */2
237: /* version needed to extract */+ 2
238: /* general purpose bit flag */+ 2
239: /* compression method */+ 2
240: /* last mod file time */+ 2
241: /* last mod file date */+ 2
242: /* crc-32 */+ 4
243: /* compressed size */+ 4
244: /* uncompressed size */+ 4
245: /* filename length */+ 2
246: /* extra field length */+ 2
247: /* file comment length */+ 2
248: /* disk number start */+ 2
249: /* internal file attributes */+ 2
250: /* external file attributes */+ 4
251: /* relative offset of local header */+ 4;
252:
253: /**
254: * Reads the central directory of the given archive and populates
255: * the internal tables with ZipEntry instances.
256: *
257: * <p>The ZipEntrys will know all data that can be obtained from
258: * the central directory alone, but not the data that requires the
259: * local file header or additional data to be read.</p>
260: */
261: private void populateFromCentralDirectory() throws IOException {
262: positionAtCentralDirectory();
263:
264: byte[] cfh = new byte[CFH_LEN];
265:
266: byte[] signatureBytes = new byte[4];
267: archive.readFully(signatureBytes);
268: long sig = ZipLong.getValue(signatureBytes);
269: final long cfhSig = ZipLong.getValue(ZipOutputStream.CFH_SIG);
270: while (sig == cfhSig) {
271: archive.readFully(cfh);
272: int off = 0;
273: ZipEntry ze = new ZipEntry();
274:
275: int versionMadeBy = ZipShort.getValue(cfh, off);
276: off += 2;
277: ze.setPlatform((versionMadeBy >> 8) & 0x0F);
278:
279: off += 4; // skip version info and general purpose byte
280:
281: ze.setMethod(ZipShort.getValue(cfh, off));
282: off += 2;
283:
284: // FIXME this is actually not very cpu cycles friendly as we are converting from
285: // dos to java while the underlying Sun implementation will convert
286: // from java to dos time for internal storage...
287: long time = dosToJavaTime(ZipLong.getValue(cfh, off));
288: ze.setTime(time);
289: off += 4;
290:
291: ze.setCrc(ZipLong.getValue(cfh, off));
292: off += 4;
293:
294: ze.setCompressedSize(ZipLong.getValue(cfh, off));
295: off += 4;
296:
297: ze.setSize(ZipLong.getValue(cfh, off));
298: off += 4;
299:
300: int fileNameLen = ZipShort.getValue(cfh, off);
301: off += 2;
302:
303: int extraLen = ZipShort.getValue(cfh, off);
304: off += 2;
305:
306: int commentLen = ZipShort.getValue(cfh, off);
307: off += 2;
308:
309: off += 2; // disk number
310:
311: ze.setInternalAttributes(ZipShort.getValue(cfh, off));
312: off += 2;
313:
314: ze.setExternalAttributes(ZipLong.getValue(cfh, off));
315: off += 4;
316:
317: byte[] fileName = new byte[fileNameLen];
318: archive.readFully(fileName);
319: ze.setName(getString(fileName));
320:
321: // LFH offset,
322: OffsetEntry offset = new OffsetEntry();
323: offset.headerOffset = ZipLong.getValue(cfh, off);
324: // data offset will be filled later
325: entries.put(ze, offset);
326:
327: nameMap.put(ze.getName(), ze);
328:
329: archive.skipBytes(extraLen);
330:
331: byte[] comment = new byte[commentLen];
332: archive.readFully(comment);
333: ze.setComment(getString(comment));
334:
335: archive.readFully(signatureBytes);
336: sig = ZipLong.getValue(signatureBytes);
337: }
338: }
339:
340: private static final int MIN_EOCD_SIZE =
341: /* end of central dir signature */4
342: /* number of this disk */+ 2
343: /* number of the disk with the */
344: /* start of the central directory */+ 2
345: /* total number of entries in */
346: /* the central dir on this disk */+ 2
347: /* total number of entries in */
348: /* the central dir */+ 2
349: /* size of the central directory */+ 4
350: /* offset of start of central */
351: /* directory with respect to */
352: /* the starting disk number */+ 4
353: /* zipfile comment length */+ 2;
354:
355: private static final int CFD_LOCATOR_OFFSET =
356: /* end of central dir signature */4
357: /* number of this disk */+ 2
358: /* number of the disk with the */
359: /* start of the central directory */+ 2
360: /* total number of entries in */
361: /* the central dir on this disk */+ 2
362: /* total number of entries in */
363: /* the central dir */+ 2
364: /* size of the central directory */+ 4;
365:
366: /**
367: * Searches for the "End of central dir record", parses
368: * it and positions the stream at the first central directory
369: * record.
370: */
371: private void positionAtCentralDirectory() throws IOException {
372: boolean found = false;
373: long off = archive.length() - MIN_EOCD_SIZE;
374: if (off >= 0) {
375: archive.seek(off);
376: byte[] sig = ZipOutputStream.EOCD_SIG;
377: int curr = archive.read();
378: while (curr != -1) {
379: if (curr == sig[0]) {
380: curr = archive.read();
381: if (curr == sig[1]) {
382: curr = archive.read();
383: if (curr == sig[2]) {
384: curr = archive.read();
385: if (curr == sig[3]) {
386: found = true;
387: break;
388: }
389: }
390: }
391: }
392: archive.seek(--off);
393: curr = archive.read();
394: }
395: }
396: if (!found) {
397: throw new ZipException("archive is not a ZIP archive");
398: }
399: archive.seek(off + CFD_LOCATOR_OFFSET);
400: byte[] cfdOffset = new byte[4];
401: archive.readFully(cfdOffset);
402: archive.seek(ZipLong.getValue(cfdOffset));
403: }
404:
405: /**
406: * Number of bytes in local file header up to the "length of
407: * filename" entry.
408: */
409: private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
410: /* local file header signature */4
411: /* version needed to extract */+ 2
412: /* general purpose bit flag */+ 2
413: /* compression method */+ 2
414: /* last mod file time */+ 2
415: /* last mod file date */+ 2
416: /* crc-32 */+ 4
417: /* compressed size */+ 4
418: /* uncompressed size */+ 4;
419:
420: /**
421: * Walks through all recorded entries and adds the data available
422: * from the local file header.
423: *
424: * <p>Also records the offsets for the data to read from the
425: * entries.</p>
426: */
427: private void resolveLocalFileHeaderData() throws IOException {
428: Enumeration e = getEntries();
429: while (e.hasMoreElements()) {
430: ZipEntry ze = (ZipEntry) e.nextElement();
431: OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
432: long offset = offsetEntry.headerOffset;
433: archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
434: byte[] b = new byte[2];
435: archive.readFully(b);
436: int fileNameLen = ZipShort.getValue(b);
437: archive.readFully(b);
438: int extraFieldLen = ZipShort.getValue(b);
439: archive.skipBytes(fileNameLen);
440: byte[] localExtraData = new byte[extraFieldLen];
441: archive.readFully(localExtraData);
442: ze.setExtra(localExtraData);
443: /*dataOffsets.put(ze,
444: new Long(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
445: + 2 + 2 + fileNameLen + extraFieldLen));
446: */
447: offsetEntry.dataOffset = offset
448: + LFH_OFFSET_FOR_FILENAME_LENGTH + 2 + 2
449: + fileNameLen + extraFieldLen;
450: }
451: }
452:
453: /**
454: * Convert a DOS date/time field to a Date object.
455: *
456: * @param zipDosTime contains the stored DOS time.
457: * @return a Date instance corresponding to the given time.
458: */
459: protected static Date fromDosTime(ZipLong zipDosTime) {
460: long dosTime = zipDosTime.getValue();
461: return new Date(dosToJavaTime(dosTime));
462: }
463:
464: /*
465: * Converts DOS time to Java time (number of milliseconds since epoch).
466: */
467: private static long dosToJavaTime(long dosTime) {
468: Calendar cal = Calendar.getInstance();
469: cal.set(Calendar.YEAR, (int) ((dosTime >> 25) & 0x7f) + 1980);
470: cal.set(Calendar.MONTH, (int) ((dosTime >> 21) & 0x0f) - 1);
471: cal.set(Calendar.DATE, (int) (dosTime >> 16) & 0x1f);
472: cal.set(Calendar.HOUR_OF_DAY, (int) (dosTime >> 11) & 0x1f);
473: cal.set(Calendar.MINUTE, (int) (dosTime >> 5) & 0x3f);
474: cal.set(Calendar.SECOND, (int) (dosTime << 1) & 0x3e);
475: return cal.getTime().getTime();
476: }
477:
478: /**
479: * Retrieve a String from the given bytes using the encoding set
480: * for this ZipFile.
481: *
482: * @param bytes the byte array to transform
483: * @return String obtained by using the given encoding
484: * @throws ZipException if the encoding cannot be recognized.
485: */
486: protected String getString(byte[] bytes) throws ZipException {
487: if (encoding == null) {
488: return new String(bytes);
489: } else {
490: try {
491: return new String(bytes, encoding);
492: } catch (UnsupportedEncodingException uee) {
493: throw new ZipException(uee.getMessage());
494: }
495: }
496: }
497:
498: /**
499: * InputStream that delegates requests to the underlying
500: * RandomAccessFile, making sure that only bytes from a certain
501: * range can be read.
502: */
503: private class BoundedInputStream extends InputStream {
504: private long remaining;
505: private long loc;
506: private boolean addDummyByte = false;
507:
508: BoundedInputStream(long start, long remaining) {
509: this .remaining = remaining;
510: loc = start;
511: }
512:
513: public int read() throws IOException {
514: if (remaining-- <= 0) {
515: if (addDummyByte) {
516: addDummyByte = false;
517: return 0;
518: }
519: return -1;
520: }
521: synchronized (archive) {
522: archive.seek(loc++);
523: return archive.read();
524: }
525: }
526:
527: public int read(byte[] b, int off, int len) throws IOException {
528: if (remaining <= 0) {
529: if (addDummyByte) {
530: addDummyByte = false;
531: b[off] = 0;
532: return 1;
533: }
534: return -1;
535: }
536:
537: if (len <= 0) {
538: return 0;
539: }
540:
541: if (len > remaining) {
542: len = (int) remaining;
543: }
544: int ret = -1;
545: synchronized (archive) {
546: archive.seek(loc);
547: ret = archive.read(b, off, len);
548: }
549: if (ret > 0) {
550: loc += ret;
551: remaining -= ret;
552: }
553: return ret;
554: }
555:
556: /**
557: * Inflater needs an extra dummy byte for nowrap - see
558: * Inflater's javadocs.
559: */
560: void addDummy() {
561: addDummyByte = true;
562: }
563: }
564:
565: }
|