001: /*
002: * Copyright (c) 2007, intarsys consulting GmbH
003: *
004: * Redistribution and use in source and binary forms, with or without
005: * modification, are permitted provided that the following conditions are met:
006: *
007: * - Redistributions of source code must retain the above copyright notice,
008: * this list of conditions and the following disclaimer.
009: *
010: * - Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: *
014: * - Neither the name of intarsys nor the names of its contributors may be used
015: * to endorse or promote products derived from this software without specific
016: * prior written permission.
017: *
018: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
019: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
020: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
021: * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
022: * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
023: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
024: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
025: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
026: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
027: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
028: * POSSIBILITY OF SUCH DAMAGE.
029: */
030: package de.intarsys.tools.file;
031:
032: import java.io.UnsupportedEncodingException;
033: import java.util.HashMap;
034: import java.util.Iterator;
035: import java.util.Map;
036:
037: import de.intarsys.tools.hex.HexTools;
038:
039: public class MagicFileNumberTest {
040: public static class Type {
041: private String suffix;
042:
043: private byte[] magicBytes;
044:
045: public Type(String suffix, byte[] magic) {
046: this .suffix = suffix;
047: this .magicBytes = magic;
048: }
049:
050: public String getKey() {
051: return HexTools.bytesToHexString(magicBytes);
052: }
053:
054: public String getSuffix() {
055: return suffix;
056: }
057:
058: public byte[] getMagicBytes() {
059: return magicBytes;
060: }
061:
062: public boolean matches(byte[] data) {
063: for (int i = 0; i < magicBytes.length; i++) {
064: if (magicBytes[i] != data[i]) {
065: return false;
066: }
067: }
068: return true;
069: }
070:
071: public String toString() {
072: return suffix;
073: }
074: }
075:
076: private Map types;
077:
078: public MagicFileNumberTest() {
079: types = new HashMap();
080: addTypes();
081: }
082:
083: private void addTypes() {
084: /*
085: * known types, found on
086: * http://www.astro.keele.ac.uk/oldusers/rno/Computing/File_magic.html
087: */
088: /* image files */
089: /* Bitmap format */
090: addType("bmp", new byte[] { 0x42, 0x4d }); //$NON-NLS-1$
091: /* FITS format */
092: addType(
093: "fits", new byte[] { 0x53, 0x49, 0x4d, 0x50, 0x4c, 0x45 }); //$NON-NLS-1$
094: /* GIF format */
095: addType("gif", new byte[] { 0x47, 0x49, 0x46, 0x38 }); //$NON-NLS-1$
096: /* Graphics Kernel System */
097: addType("gks", new byte[] { 0x47, 0x4b, 0x53, 0x4d }); //$NON-NLS-1$
098: /* IRIS rgb format */
099: addType("rgb", new byte[] { 0x01, (byte) 0xda }); //$NON-NLS-1$
100: /* ITC (CMU WM) format */
101: addType(
102: "itc", new byte[] { (byte) 0xf1, 0x00, 0x40, (byte) 0xbb }); //$NON-NLS-1$
103: /* JPEG File Interchange Format */
104: addType(
105: "jpg", new byte[] { (byte) 0xff, (byte) 0xD8, (byte) 0xff }); //$NON-NLS-1$
106: /* NIFF (Navy TIFF) */
107: addType("nif", new byte[] { 0x49, 0x49, 0x4e, 0x31 }); //$NON-NLS-1$
108: /* PM format */
109: addType("pm", new byte[] { 0x56, 0x49, 0x45, 0x57 }); //$NON-NLS-1$
110: /* PNG format */
111: addType("png", new byte[] { (byte) 0x89, 0x50, 0x4e, 0x47 }); //$NON-NLS-1$
112: /* Postscript format */
113: addType("ps", new byte[] { 0x25, 0x21 }); //$NON-NLS-1$
114: /* Sun Rasterfile */
115: addType(
116: "ras", new byte[] { 0x59, (byte) 0xa6, 0x6a, (byte) 0x95 }); //$NON-NLS-1$
117: /* TIFF format (Motorola - big endian) */
118: addType("tif", new byte[] { 0x4d, 0x4d, 0x00, 0x2a }); //$NON-NLS-1$
119: /* TIFF format (Intel - little endian) */
120: addType("tif", new byte[] { 0x49, 0x49, 0x2a, 0x00 }); //$NON-NLS-1$
121: /* XCF Gimp file structure */
122: addType(
123: "xcf", new byte[] { 0x67, 0x69, 0x6d, 0x70, 0x20, 0x78, 0x63, //$NON-NLS-1$
124: 0x66, 0x20, 0x76 });
125: /* Xfig format */
126: addType("fig", new byte[] { 0x23, 0x46, 0x49, 0x47 }); //$NON-NLS-1$
127: /* XPM format */
128: addType(
129: "xpm", new byte[] { 0x2f, 0x2a, 0x20, 0x58, 0x50, 0x4d, 0x20, //$NON-NLS-1$
130: 0x2a, 0x2f });
131:
132: /* compressed files */
133: /* Bzip */
134: addType("bz", new byte[] { 0x42, 0x5a }); //$NON-NLS-1$
135: /* Compress */
136: addType("Z", new byte[] { 0x1f, (byte) 0x9d }); //$NON-NLS-1$
137: /* gzip format */
138: addType("gz", new byte[] { 0x1f, (byte) 0x8b }); //$NON-NLS-1$
139: /* pkzip format */
140: addType("zip", new byte[] { 0x50, 0x4b, 0x03, 0x04 }); //$NON-NLS-1$
141:
142: /* archive files */
143: /* TAR */
144: addType("tar", new byte[] { 0x75, 0x73, 0x74, 0x61, 0x72 }); //$NON-NLS-1$
145:
146: /* excecutable files */
147: /* MS-DOS, OS/2 or MS Windows */
148: addType("exe", new byte[] { 0x4d, 0x5a }); //$NON-NLS-1$
149: /* Unix elf */
150: addType("unix elf", new byte[] { 0x7f, 0x45, 0x4c, 0x46 }); //$NON-NLS-1$
151:
152: /* pgp */
153: /* pgp public ring */
154: addType("pgp public ring", new byte[] { (byte) 0x99, 0x00 }); //$NON-NLS-1$
155: /* pgp security ring */
156: addType("pgp security ring", new byte[] { (byte) 0x95, 0x01 }); //$NON-NLS-1$
157: /* pgp security ring */
158: addType("pgp security ring", new byte[] { (byte) 0x95, 0x00 }); //$NON-NLS-1$
159: /* pgp encrypted data */
160: addType("pgp encrypted data", new byte[] { (byte) 0xA6, 0x00 }); //$NON-NLS-1$
161:
162: /* other */
163: try {
164: addType("pdf", "%PDF".getBytes("ASCII")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
165: } catch (UnsupportedEncodingException e) {
166: // will not happen
167: }
168: }
169:
170: /**
171: * @param fileSuffix
172: * just the suffix without any starting suffix delimiters (e.g.:
173: * 'pdf')
174: * @param magicBytes
175: * any bytes, not <code>null</code> or zero size
176: */
177: public void addType(String fileSuffix, byte[] magicBytes) {
178: if ((fileSuffix == null) || (magicBytes == null)
179: || (magicBytes.length == 0)) {
180: return;
181: }
182: Type type = new Type(fileSuffix, magicBytes);
183: if (!types.containsKey(type.getKey())) {
184: types.put(type.getKey(), type);
185: }
186: }
187:
188: /**
189: * @param data
190: * any not null or zero size data
191: * @return a file suffix without a delimiter (e.g.: 'pdf') or
192: * <code>null</code>
193: */
194: public String guessFileSuffix(byte[] data) {
195: if (data == null) {
196: return null;
197: }
198: for (Iterator i = types.entrySet().iterator(); i.hasNext();) {
199: Type type = (Type) ((Map.Entry) i.next()).getValue();
200: if (type.matches(data)) {
201: return type.getSuffix();
202: }
203: }
204: return null;
205: }
206:
207: /**
208: * does the data contain only ISO-8819-x printable characters ?
209: */
210: public boolean isText(byte[] data) {
211: if (data == null) {
212: return false;
213: }
214: int num = data.length;
215: if (num > 1024) {
216: num = 1024;
217: }
218: for (int i = 0; i < num; i++) {
219: int c = data[i] & 0xFF;
220: if (c >= 0x20 && c <= 0x7E) {
221: // ASCII printable
222: continue;
223: }
224: if (c >= 0xA0) {
225: // ISO 8819 extension
226: continue;
227: }
228: switch (c) {
229: case 0x09: // HORIZONTAL TABULATION
230: case 0x0A: // LINE FEED
231: case 0x0B: // VERTICAL TABULATION
232: case 0x0C: // FORM FEED
233: case 0x0D: // CARRIAGE RETURN
234: continue;
235: default:
236: }
237: return false;
238: }
239: return true;
240: }
241:
242: }
|