001: /*
002: * Copyright 2003-2004 The Apache Software Foundation
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: *
016: */
017:
018: package org.apache.tools.zip;
019:
020: import java.io.File;
021: import java.io.IOException;
022: import java.io.InputStream;
023: import java.io.RandomAccessFile;
024: import java.io.UnsupportedEncodingException;
025: import java.util.Calendar;
026: import java.util.Date;
027: import java.util.Enumeration;
028: import java.util.Hashtable;
029: import java.util.zip.Inflater;
030: import java.util.zip.InflaterInputStream;
031: import java.util.zip.ZipException;
032:
033: /**
034: * Replacement for <code>java.util.ZipFile</code>.
035: *
036: * <p>This class adds support for file name encodings other than UTF-8
037: * (which is required to work on ZIP files created by native zip tools
038: * and is able to skip a preamble like the one found in self
039: * extracting archives. Furthermore it returns instances of
040: * <code>org.apache.tools.zip.ZipEntry</code> instead of
041: * <code>java.util.zip.ZipEntry</code>.</p>
042: *
043: * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
044: * have to reimplement all methods anyway. Like
045: * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
046: * covers and supports compressed and uncompressed entries.</p>
047: *
048: * <p>The method signatures mimic the ones of
049: * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
050: *
051: * <ul>
052: * <li>There is no getName method.</li>
053: * <li>entries has been renamed to getEntries.</li>
054: * <li>getEntries and getEntry return
055: * <code>org.apache.tools.zip.ZipEntry</code> instances.</li>
056: * <li>close is allowed to throw IOException.</li>
057: * </ul>
058: *
059: * @author Stefan Bodewig
060: * @version $Revision: 1.8.2.3 $
061: */
062: public class ZipFile {
063:
064: /**
065: * Maps ZipEntrys to Longs, recording the offsets of the local
066: * file headers.
067: */
068: private Hashtable<ZipEntry, Long> entries = new Hashtable<ZipEntry, Long>();
069:
070: /**
071: * Maps String to ZipEntrys, name -> actual entry.
072: */
073: private Hashtable<String, ZipEntry> nameMap = new Hashtable<String, ZipEntry>();
074:
075: /**
076: * Maps ZipEntrys to Longs, recording the offsets of the actual file data.
077: */
078: private Hashtable<ZipEntry, Long> dataOffsets = new Hashtable<ZipEntry, Long>();
079:
080: /**
081: * The encoding to use for filenames and the file comment.
082: *
083: * <p>For a list of possible values see <a
084: * href="http://java.sun.com/products/jdk/1.2/docs/guide/internat/encoding.doc.html">http://java.sun.com/products/jdk/1.2/docs/guide/internat/encoding.doc.html</a>.
085: * Defaults to the platform's default character encoding.</p>
086: */
087: private String encoding = null;
088:
089: /**
090: * The actual data source.
091: */
092: private RandomAccessFile archive;
093:
094: /**
095: * Opens the given file for reading, assuming the platform's
096: * native encoding for file names.
097: *
098: * @param f the archive.
099: *
100: * @throws IOException if an error occurs while reading the file.
101: */
102: public ZipFile(File f) throws IOException {
103: this (f, null);
104: }
105:
106: /**
107: * Opens the given file for reading, assuming the platform's
108: * native encoding for file names.
109: *
110: * @param name name of the archive.
111: *
112: * @throws IOException if an error occurs while reading the file.
113: */
114: public ZipFile(String name) throws IOException {
115: this (new File(name), null);
116: }
117:
118: /**
119: * Opens the given file for reading, assuming the specified
120: * encoding for file names.
121: *
122: * @param name name of the archive.
123: * @param encoding the encoding to use for file names
124: *
125: * @throws IOException if an error occurs while reading the file.
126: */
127: public ZipFile(String name, String encoding) throws IOException {
128: this (new File(name), encoding);
129: }
130:
131: /**
132: * Opens the given file for reading, assuming the specified
133: * encoding for file names.
134: *
135: * @param f the archive.
136: * @param encoding the encoding to use for file names
137: *
138: * @throws IOException if an error occurs while reading the file.
139: */
140: public ZipFile(File f, String encoding) throws IOException {
141: this .encoding = encoding;
142: archive = new RandomAccessFile(f, "r");
143: populateFromCentralDirectory();
144: resolveLocalFileHeaderData();
145: }
146:
147: /**
148: * The encoding to use for filenames and the file comment.
149: *
150: * @return null if using the platform's default character encoding.
151: */
152: public String getEncoding() {
153: return encoding;
154: }
155:
156: /**
157: * Closes the archive.
158: * @throws IOException if an error occurs closing the archive.
159: */
160: public void close() throws IOException {
161: archive.close();
162: }
163:
164: /**
165: * Returns all entries as {@link org.apache.tools.zip} instances.
166: * @return all entries as ZipEntry instances.
167: */
168: public Enumeration<ZipEntry> getEntries() {
169: return entries.keys();
170: }
171:
172: /**
173: * Returns a named entry - or <code>null</code> if no entry by
174: * that name exists.
175: * @param name name of the entry.
176: * @return the ZipEntry corresponding to the given name - or
177: * <code>null</code> if not present.
178: */
179: public ZipEntry getEntry(String name) {
180: return nameMap.get(name);
181: }
182:
183: /**
184: * Returns an InputStream for reading the contents of the given entry.
185: * @param ze the entry to get the stream for.
186: * @return a stream to read the entry from.
187: */
188: public InputStream getInputStream(ZipEntry ze) throws IOException,
189: ZipException {
190: Long start = dataOffsets.get(ze);
191: if (start == null) {
192: return null;
193: }
194: BoundedInputStream bis = new BoundedInputStream(start
195: .longValue(), ze.getCompressedSize());
196: switch (ze.getMethod()) {
197: case ZipEntry.STORED:
198: return bis;
199: case ZipEntry.DEFLATED:
200: bis.addDummy();
201: return new InflaterInputStream(bis, new Inflater(true));
202: default:
203: throw new ZipException(
204: "Found unsupported compression method "
205: + ze.getMethod());
206: }
207: }
208:
209: private static final int CFH_LEN =
210: /* version made by */2 +
211: /* version needed to extract */2 +
212: /* general purpose bit flag */2 +
213: /* compression method */2 +
214: /* last mod file time */2 +
215: /* last mod file date */2 +
216: /* crc-32 */4 +
217: /* compressed size */4 +
218: /* uncompressed size */4 +
219: /* filename length */2 +
220: /* extra field length */2 +
221: /* file comment length */2 +
222: /* disk number start */2 +
223: /* internal file attributes */2 +
224: /* external file attributes */4 +
225: /* relative offset of local header */4;
226:
227: /**
228: * Reads the central directory of the given archive and populates
229: * the internal tables with ZipEntry instances.
230: *
231: * <p>The ZipEntrys will know all data that can be obtained from
232: * the central directory alone, but not the data that requires the
233: * local file header or additional data to be read.</p>
234: */
235: private void populateFromCentralDirectory() throws IOException {
236: positionAtCentralDirectory();
237:
238: byte[] cfh = new byte[CFH_LEN];
239:
240: byte[] signatureBytes = new byte[4];
241: archive.readFully(signatureBytes);
242: ZipLong sig = new ZipLong(signatureBytes);
243: while (sig.equals(ZipOutputStream.CFH_SIG)) {
244: archive.readFully(cfh);
245: int off = 0;
246: ZipEntry ze = new ZipEntry();
247:
248: ZipShort versionMadeBy = new ZipShort(cfh, off);
249: off += 2;
250: ze.setPlatform((versionMadeBy.getValue() >> 8) & 0x0F);
251:
252: off += 4; // skip version info and general purpose byte
253:
254: ze.setMethod((new ZipShort(cfh, off)).getValue());
255: off += 2;
256:
257: ze.setTime(fromDosTime(new ZipLong(cfh, off)).getTime());
258: off += 4;
259:
260: ze.setCrc((new ZipLong(cfh, off)).getValue());
261: off += 4;
262:
263: ze.setCompressedSize((new ZipLong(cfh, off)).getValue());
264: off += 4;
265:
266: ze.setSize((new ZipLong(cfh, off)).getValue());
267: off += 4;
268:
269: int fileNameLen = (new ZipShort(cfh, off)).getValue();
270: off += 2;
271:
272: int extraLen = (new ZipShort(cfh, off)).getValue();
273: off += 2;
274:
275: int commentLen = (new ZipShort(cfh, off)).getValue();
276: off += 2;
277:
278: off += 2; // disk number
279:
280: ze.setInternalAttributes((new ZipShort(cfh, off))
281: .getValue());
282: off += 2;
283:
284: ze
285: .setExternalAttributes((new ZipLong(cfh, off))
286: .getValue());
287: off += 4;
288:
289: // LFH offset
290: entries.put(ze,
291: new Long((new ZipLong(cfh, off)).getValue()));
292:
293: byte[] fileName = new byte[fileNameLen];
294: archive.readFully(fileName);
295: ze.setName(getString(fileName));
296:
297: nameMap.put(ze.getName(), ze);
298:
299: archive.skipBytes(extraLen);
300:
301: byte[] comment = new byte[commentLen];
302: archive.readFully(comment);
303: ze.setComment(getString(comment));
304:
305: archive.readFully(signatureBytes);
306: sig = new ZipLong(signatureBytes);
307: }
308: }
309:
310: private static final int MIN_EOCD_SIZE =
311: /* end of central dir signature */4 +
312: /* number of this disk */2 +
313: /* number of the disk with the */+
314: /* start of the central directory */2 +
315: /* total number of entries in */+
316: /* the central dir on this disk */2 +
317: /* total number of entries in */+
318: /* the central dir */2 +
319: /* size of the central directory */4 +
320: /* offset of start of central */+
321: /* directory with respect to */+
322: /* the starting disk number */4 +
323: /* zipfile comment length */2;
324:
325: private static final int CFD_LOCATOR_OFFSET =
326: /* end of central dir signature */4 +
327: /* number of this disk */2 +
328: /* number of the disk with the */+
329: /* start of the central directory */2 +
330: /* total number of entries in */+
331: /* the central dir on this disk */2 +
332: /* total number of entries in */+
333: /* the central dir */2 +
334: /* size of the central directory */4;
335:
336: /**
337: * Searches for the "End of central dir record", parses
338: * it and positions the stream at the first central directory
339: * record.
340: */
341: private void positionAtCentralDirectory() throws IOException {
342: long off = archive.length() - MIN_EOCD_SIZE;
343: archive.seek(off);
344: byte[] sig = ZipOutputStream.EOCD_SIG.getBytes();
345: int curr = archive.read();
346: boolean found = false;
347: while (curr != -1) {
348: if (curr == sig[0]) {
349: curr = archive.read();
350: if (curr == sig[1]) {
351: curr = archive.read();
352: if (curr == sig[2]) {
353: curr = archive.read();
354: if (curr == sig[3]) {
355: found = true;
356: break;
357: }
358: }
359: }
360: }
361: archive.seek(--off);
362: curr = archive.read();
363: }
364: if (!found) {
365: throw new ZipException("archive is not a ZIP archive");
366: }
367: archive.seek(off + CFD_LOCATOR_OFFSET);
368: byte[] cfdOffset = new byte[4];
369: archive.readFully(cfdOffset);
370: archive.seek((new ZipLong(cfdOffset)).getValue());
371: }
372:
373: /**
374: * Number of bytes in local file header up to the "length of
375: * filename" entry.
376: */
377: private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
378: /* local file header signature */4 +
379: /* version needed to extract */2 +
380: /* general purpose bit flag */2 +
381: /* compression method */2 +
382: /* last mod file time */2 +
383: /* last mod file date */2 +
384: /* crc-32 */4 +
385: /* compressed size */4 +
386: /* uncompressed size */4;
387:
388: /**
389: * Walks through all recorded entries and adds the data available
390: * from the local file header.
391: *
392: * <p>Also records the offsets for the data to read from the
393: * entries.</p>
394: */
395: private void resolveLocalFileHeaderData() throws IOException {
396: Enumeration<ZipEntry> e = getEntries();
397: while (e.hasMoreElements()) {
398: ZipEntry ze = e.nextElement();
399: long offset = entries.get(ze).longValue();
400: archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
401: byte[] b = new byte[2];
402: archive.readFully(b);
403: int fileNameLen = (new ZipShort(b)).getValue();
404: archive.readFully(b);
405: int extraFieldLen = (new ZipShort(b)).getValue();
406: archive.skipBytes(fileNameLen);
407: byte[] localExtraData = new byte[extraFieldLen];
408: archive.readFully(localExtraData);
409: ze.setExtra(localExtraData);
410: dataOffsets.put(ze, new Long(offset
411: + LFH_OFFSET_FOR_FILENAME_LENGTH + 2 + 2
412: + fileNameLen + extraFieldLen));
413: }
414: }
415:
416: /**
417: * Convert a DOS date/time field to a Date object.
418: *
419: * @param l contains the stored DOS time.
420: * @return a Date instance corresponding to the given time.
421: */
422: protected static Date fromDosTime(ZipLong l) {
423: long dosTime = l.getValue();
424: Calendar cal = Calendar.getInstance();
425: cal.set(Calendar.YEAR, (int) ((dosTime >> 25) & 0x7f) + 1980);
426: cal.set(Calendar.MONTH, (int) ((dosTime >> 21) & 0x0f) - 1);
427: cal.set(Calendar.DATE, (int) (dosTime >> 16) & 0x1f);
428: cal.set(Calendar.HOUR_OF_DAY, (int) (dosTime >> 11) & 0x1f);
429: cal.set(Calendar.MINUTE, (int) (dosTime >> 5) & 0x3f);
430: cal.set(Calendar.SECOND, (int) (dosTime << 1) & 0x3e);
431: return cal.getTime();
432: }
433:
434: /**
435: * Retrieve a String from the given bytes using the encoding set
436: * for this ZipFile.
437: *
438: * @param bytes the byte array to transform
439: * @return String obtained by using the given encoding
440: * @throws ZipException if the encoding cannot be recognized.
441: */
442: protected String getString(byte[] bytes) throws ZipException {
443: if (encoding == null) {
444: return new String(bytes);
445: } else {
446: try {
447: return new String(bytes, encoding);
448: } catch (UnsupportedEncodingException uee) {
449: throw new ZipException("Unsupported char encoding: "
450: + uee.getMessage());
451: }
452: }
453: }
454:
455: /**
456: * InputStream that delegates requests to the underlying
457: * RandomAccessFile, making sure that only bytes from a certain
458: * range can be read.
459: */
460: private class BoundedInputStream extends InputStream {
461: private long remaining;
462: private long loc;
463: private boolean addDummyByte = false;
464:
465: BoundedInputStream(long start, long remaining) {
466: this .remaining = remaining;
467: loc = start;
468: }
469:
470: @Override
471: public int read() throws IOException {
472: if (remaining-- <= 0) {
473: if (addDummyByte) {
474: addDummyByte = false;
475: return 0;
476: }
477: return -1;
478: }
479: synchronized (archive) {
480: archive.seek(loc++);
481: return archive.read();
482: }
483: }
484:
485: @Override
486: public int read(byte[] b, int off, int len) throws IOException {
487: if (remaining <= 0) {
488: if (addDummyByte) {
489: addDummyByte = false;
490: b[off] = 0;
491: return 1;
492: }
493: return -1;
494: }
495:
496: if (len <= 0) {
497: return 0;
498: }
499:
500: if (len > remaining) {
501: len = (int) remaining;
502: }
503: int ret = -1;
504: synchronized (archive) {
505: archive.seek(loc);
506: ret = archive.read(b, off, len);
507: }
508: if (ret > 0) {
509: loc += ret;
510: remaining -= ret;
511: }
512: return ret;
513: }
514:
515: /**
516: * Inflater needs an extra dummy byte for nowrap - see
517: * Inflater's javadocs.
518: */
519: void addDummy() {
520: addDummyByte = true;
521: }
522: }
523:
524: }
|