Source Code Cross Referenced for Detector.java in » Database-ORM » MMBase » org » mmbase » util » magicfile » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Database ORM » MMBase » org.mmbase.util.magicfile
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /*
002:
003:        This software is OSI Certified Open Source Software.
004:        OSI Certified is a certification mark of the Open Source Initiative.
005:
006:        The license (Mozilla version 1.0) can be read at the MMBase site.
007:        See http://www.MMBase.org/license
008:
009:         */
010:
011:        package org.mmbase.util.magicfile;
012:
013:        import java.util.*;
014:        import java.io.*;
015:        import org.mmbase.util.logging.*;
016:
017:        /**
018:         * A Detector stores one entry from the magic.xml file, and contains
019:         * the functionality to determines if a certain byte[] satisfies it.
020:         *
021:         * Implementation made on the basis of actual magic file and its manual.<br />
022:         *
023:         * TODO:<br />
024:         * - link the info with mimetypes<br />
025:         * - add test modifiers<br />
026:         * - add commandline switches for warning, error and debugging messages<br />
027:         *<br />
028:         * Ignored features of magic:<br />
029:         * - date types<br />
030:         * - indirect offsets (prefix of '&' in sublevel match or (address+bytes) where offset = value of address plus bytes<br />
031:         * - AND'ing of type<br />
032:         *<br />
033:         * BUGS:<br />
034:         * - test string isn't read when end of line is reached in absence of a message string<br />
035:         * <br />
036:         *
037:         * Tested:<br />
038:         * - .doc<br />
039:         * - .rtf<br />
040:         * - .pdf<br />
041:         * - .sh<br />
042:         * - .gz<br />
043:         * - .bz2<br />
044:         * - .html<br />
045:         * - .rpm<br />
046:         * - .wav<br />
047:         *<br />
048:         * Not supported by magic file:<br />
049:         * - StarOffice<br />
050:         * @version $Id: Detector.java,v 1.13 2007/02/24 21:57:50 nklasens Exp $
051:         */
052:
053:        public class Detector {
054:            private static final Logger log = Logging
055:                    .getLoggerInstance(Detector.class);
056:
057:            // No configuration below
058:            private static final int BIG_ENDIAN = 0;
059:            private static final int LITTLE_ENDIAN = 1;
060:            private static final String[] label = new String[] { "big endian",
061:                    "little endian" };
062:
063:            private String rawinput; // Original input line
064:            private int offset = -1;
065:            private String type;
066:            // types: byte, short, long, string, date, beshort, belong, bedate, leshort, lelong, ledate
067:            private String typeAND;
068:            // Some types are defined as e.g. "belong&0x0000ff70", then typeAND=0x0000ff70 (NOT IMPLEMENTED!)
069:            private String test; // Test value
070:            private char testComparator; // What the test is like,
071:            private String message; // Designation for this type in 'magic' file
072:            private List<String> extensions; // Possible file extensions for this type
073:            private String mimetype; // MimeType for this type
074:
075:            // What are these?
076:            private String xString;
077:            private int xInt;
078:            private char xChar;
079:
080:            private List<Detector> childList;
081:
082:            private boolean valid; // Set this if parsing of magic file fails
083:            private boolean hasX; // Is set when an 'x' value is matched
084:
085:            /**
086:             * Add an embedded detector object that searches for more details after an initial match.
087:             */
088:            public void addChild(Detector detector, int level) {
089:                if (level == 1) {
090:                    childList.add(detector);
091:                } else if (level > 1) {
092:                    if (childList.size() == 0) {
093:                        log.debug("Hm. level = " + level
094:                                + ", but childList is empty");
095:                    } else {
096:                        (childList.get(childList.size() - 1)).addChild(
097:                                detector, level - 1);
098:                    }
099:                }
100:            }
101:
102:            /**
103:             * Detectors are instanciated by MagicXMLReader, and by Parser.
104:             */
105:            Detector() {
106:                childList = new ArrayList<Detector>();
107:                extensions = new ArrayList<String>();
108:                mimetype = "application/octet-stream";
109:                message = "Unknown";
110:                valid = true;
111:            }
112:
113:            /**
114:             * Adds a possible extension. The last added one is the default (returned by 'getExtension').
115:             */
116:            public void setExtension(String extension) {
117:                extensions.add(0, extension);
118:            }
119:
120:            public String getExtension() {
121:                if (extensions.size() == 0) {
122:                    return "";
123:                }
124:                return extensions.get(0);
125:            }
126:
127:            public List<String> getExtensions() {
128:                return extensions;
129:            }
130:
131:            public void setMimeType(String mimetype) {
132:                this .mimetype = mimetype;
133:            }
134:
135:            public String getMimeType() {
136:                if (mimetype.equals("???")) {
137:                    return "application/octet-stream";
138:                } else {
139:                    return mimetype;
140:                }
141:            }
142:
143:            public void setDesignation(String designation) {
144:                this .message = designation;
145:            }
146:
147:            public void setOffset(String offset) {
148:                this .offset = Integer.parseInt(offset);
149:            }
150:
151:            public int getOffset() {
152:                return offset;
153:            }
154:
155:            public void setType(String type) {
156:                this .type = type;
157:            }
158:
159:            public String getType() {
160:                return type;
161:            }
162:
163:            public void setTest(String test) {
164:                this .test = test;
165:            }
166:
167:            public String getTest() {
168:                return test;
169:            }
170:
171:            public void setComparator(char comparator) {
172:                this .testComparator = comparator;
173:            }
174:
175:            public char getComparator() {
176:                return testComparator;
177:            }
178:
179:            /**
180:             * @return Whether detector matches the prefix/lithmus of the file
181:             */
182:            public boolean test(byte[] lithmus) {
183:                if (lithmus == null || lithmus.length == 0 || offset == -1) {
184:                    return false;
185:                }
186:                boolean hit;
187:                //log.debug("TESTING "+rawinput);
188:                if (type.equals("string")) {
189:                    hit = testString(lithmus);
190:                } else if (type.equals("beshort")) {
191:                    hit = testShort(lithmus, BIG_ENDIAN);
192:                } else if (type.equals("belong")) {
193:                    hit = testLong(lithmus, BIG_ENDIAN);
194:                } else if (type.equals("leshort")) {
195:                    hit = testShort(lithmus, LITTLE_ENDIAN);
196:                } else if (type.equals("lelong")) {
197:                    hit = testLong(lithmus, LITTLE_ENDIAN);
198:                } else if (type.equals("byte")) {
199:                    hit = testByte(lithmus);
200:                } else {
201:                    // Date types are not supported
202:                    hit = false;
203:                }
204:                if (hit) {
205:                    log.debug("Detector " + this  + " hit");
206:                    for (int i = 0; i < childList.size(); i++) {
207:                        Detector child = childList.get(i);
208:                        if (child.test(lithmus)) {
209:                            String s = child.getDesignation();
210:                            if (s.startsWith("\\b")) {
211:                                s = s.substring(2);
212:                            }
213:                            this .message = this .message + " " + s;
214:                        }
215:                    }
216:                }
217:                return hit;
218:            }
219:
220:            /**
221:             * todo: I noticed there is also a %5.5s variation in magic...
222:             */
223:            public String getDesignation() {
224:                if (hasX) {
225:                    int n = message.indexOf("%d");
226:                    if (n >= 0) {
227:                        return message.substring(0, n) + xInt
228:                                + message.substring(n + 2);
229:                    }
230:
231:                    n = message.indexOf("%s");
232:                    if (n >= 0) {
233:                        return message.substring(0, n) + xString
234:                                + message.substring(n + 2);
235:                    }
236:
237:                    n = message.indexOf("%c");
238:                    if (n >= 0) {
239:                        return message.substring(0, n) + xChar
240:                                + message.substring(n + 2);
241:                    }
242:                }
243:                return message;
244:            }
245:
246:            public void setInvalid() {
247:                valid = false;
248:            }
249:
250:            /**
251:             * @return Whether parsing of magic line for this detector succeeded
252:             */
253:            public boolean valid() {
254:                return valid;
255:            }
256:
257:            /**
258:             * @return Conversion of 2 byte array to integer
259:             */
260:            private int byteArrayToInt(byte[] ar) {
261:                StringBuffer buf = new StringBuffer();
262:                for (byte element : ar) {
263:                    buf.append(Integer.toHexString(element & 0x000000ff));
264:                }
265:                return Integer.decode("0x" + buf.toString()).intValue();
266:            }
267:
268:            /**
269:             * @return Conversion of 4 byte array to long
270:             */
271:            private long byteArrayToLong(byte[] ar) {
272:                StringBuffer buf = new StringBuffer();
273:                for (byte element : ar) {
274:                    buf.append(Integer.toHexString(element & 0x000000ff));
275:                }
276:                return Long.decode("0x" + buf.toString()).longValue();
277:            }
278:
279:            /**
280:             * Test whether a string matches
281:             */
282:            protected boolean testString(byte[] lithmus) {
283:
284:                if (test.length() == 0) {
285:                    log.warn("TEST STRING LENGTH ZERO FOR [" + rawinput + "]");
286:                    return false;
287:                }
288:
289:                int maxNeeded = offset + test.length();
290:
291:                if (maxNeeded > lithmus.length) {
292:                    return false;
293:                }
294:
295:                try {
296:                    xString = new String(lithmus, offset, test.length(),
297:                            "US-ASCII");
298:                    // US-ASCII: fixate the charset, do not depend on platform default:
299:                    //           US-ASCCII: one byte = one char, so length can be predicted
300:                } catch (java.io.UnsupportedEncodingException usee) { // could not happen: US-ASCII is supported
301:                }
302:
303:                log.debug("test string = '" + test + "' (" + message
304:                        + ") comparing with '" + xString + "'");
305:                int n = xString.compareTo(test);
306:                switch (testComparator) {
307:                case '=':
308:                    return n == 0;
309:                case '>':
310:                    hasX = true;
311:                    return n > 0;
312:                case '<':
313:                    hasX = true;
314:                    return n < 0;
315:                default:
316:                    return false;
317:                }
318:            }
319:
320:            /**
321:             * Test whether a short matches
322:             */
323:            protected boolean testShort(byte[] lithmus, int endian) {
324:                log
325:                        .debug("testing " + label[endian] + " short for "
326:                                + rawinput);
327:                int found = 0;
328:                if (endian == BIG_ENDIAN) {
329:                    found = byteArrayToInt(new byte[] { lithmus[offset],
330:                            lithmus[offset + 1] });
331:                } else if (endian == LITTLE_ENDIAN) {
332:                    found = byteArrayToInt(new byte[] { lithmus[offset + 1],
333:                            lithmus[offset] });
334:                }
335:                xInt = found;
336:
337:                if (test.equals("x")) {
338:                    hasX = true;
339:                    return true;
340:                } else if (test.equals("")) {
341:                    return false;
342:                } else {
343:                    int v = Integer.decode(test).intValue();
344:                    // Hm. How did that binary arithmatic go?
345:                    log
346:                            .debug("dumb string conversion: 0x"
347:                                    + Integer
348:                                            .toHexString(lithmus[offset] & 0x000000ff)
349:                                    + Integer
350:                                            .toHexString(lithmus[offset + 1] & 0x000000ff));
351:
352:                    switch (testComparator) {
353:                    case '=':
354:                        log.debug(Integer.toHexString(v) + " = "
355:                                + Integer.toHexString(found));
356:                        return v == found;
357:                    case '>':
358:                        hasX = true;
359:                        return found > v;
360:                    case '<':
361:                        hasX = true;
362:                        return found < v;
363:                    default:
364:                        return false;
365:                    }
366:                }
367:            }
368:
369:            /**
370:             * Test whether a long matches
371:             */
372:            protected boolean testLong(byte[] lithmus, int endian) {
373:                log.debug("testing " + label[endian] + " long for " + rawinput);
374:                long found = 0;
375:                try {
376:                    if (endian == BIG_ENDIAN) {
377:                        found = byteArrayToLong(new byte[] { lithmus[offset],
378:                                lithmus[offset + 1], lithmus[offset + 2],
379:                                lithmus[offset + 3] });
380:                    } else if (endian == LITTLE_ENDIAN) {
381:                        found = byteArrayToLong(new byte[] {
382:                                lithmus[offset + 3], lithmus[offset + 2],
383:                                lithmus[offset + 1], lithmus[offset] });
384:                    }
385:                } catch (ArrayIndexOutOfBoundsException e) {
386:                    if (!message.equals("")) {
387:                        log.error("Failed to test " + label[endian]
388:                                + " long for " + message);
389:                    } else {
390:                        log.error("Failed to test " + label[endian] + " long:");
391:                    }
392:                    log.error("Offset out of bounds: " + offset
393:                            + " while max is " /*+BUFSIZE*/);
394:                    return false;
395:                }
396:                xInt = (int) found;
397:                // If it really is a long, we wouldn't want to know about it
398:
399:                if (test.equals("x")) {
400:                    hasX = true;
401:                    return true;
402:                } else if (test.equals("")) {
403:                    return false;
404:                } else {
405:                    long v = Long.decode(test).longValue();
406:
407:                    // Hm. How did that binary arithmatic go?
408:
409:                    switch (testComparator) {
410:                    case '=':
411:                        log.debug("checking " + label[endian] + " long: "
412:                                + Long.toHexString(v) + " = "
413:                                + Long.toHexString(found));
414:                        return v == found;
415:                    case '>':
416:                        hasX = true;
417:                        return found > v;
418:                    case '<':
419:                        hasX = true;
420:                        return found < v;
421:                    default:
422:                        return false;
423:                    }
424:                }
425:            }
426:
427:            /**
428:             * Test whether a byte matches
429:             */
430:            protected boolean testByte(byte[] lithmus) {
431:                log.debug("testing byte for " + rawinput);
432:                if (test.equals("x")) {
433:                    hasX = true;
434:                    xInt = lithmus[offset];
435:                    xChar = (char) lithmus[offset];
436:                    xString = "" + xChar;
437:                    return true;
438:                } else if (test.equals("")) {
439:                    return false;
440:                } else {
441:                    byte b = (byte) Integer.decode(test).intValue();
442:                    switch (testComparator) {
443:                    // DOES THIS MAKE ANY SENSE AT ALL!!
444:                    case '=':
445:                        return b == lithmus[offset];
446:                    case '&':
447:                        // All bits in the test byte should be set in the found byte
448:                        //log.debug("byte test as string = '"+test+"'");
449:                        byte filter = (byte) (lithmus[offset] & b);
450:                        //log.debug("lithmus = "+lithmus[offset]+"; test = "+b+"; filter = "+filter);
451:                        return filter == b;
452:                    default:
453:                        return false;
454:                    }
455:                }
456:            }
457:
458:            /**
459:             * @return Original unprocessed input line
460:             * @since MMBase-1.7
461:             */
462:            public String getRawInput() {
463:                return rawinput;
464:            }
465:
466:            protected String xmlEntities(String s) {
467:                StringBuffer res = new StringBuffer();
468:                for (int i = 0; i < s.length(); i++) {
469:                    char c = s.charAt(i);
470:                    switch (c) {
471:                    case '>':
472:                        res.append("&gt;");
473:                        break;
474:                    case '<':
475:                        res.append("&lt;");
476:                        break;
477:                    case '&':
478:                        res.append("&amp;");
479:                        break;
480:                    default:
481:                        // Convert all characters not in the allowed XML character set
482:                        int n = c;
483:                        /* -- below is actual xml standard definition of allowed characters
484:                           if (n == 0x9 || n == 0xA || n == 0xD || (n >= 0x20 && n <= 0xD7FF) || (n >= 0xE000 && n <= 0xFFFD) ||
485:                           (n >= 0x10000 && n <= 0x10FFFF)) {
486:                         */
487:                        if (n == 0x9 || n == 0xA || n == 0xD
488:                                || (n >= 0x20 && n < 128)) {
489:                            res.append(c);
490:                        } else {
491:                            // octal representation of number; pad with zeros
492:                            String oct = Integer.toOctalString(n);
493:                            res.append("\\");
494:                            for (int j = 3; j > oct.length(); j--) {
495:                                res.append("0");
496:                            }
497:                            res.append(oct);
498:                        }
499:                    }
500:                }
501:                return res.toString();
502:            }
503:
504:            /**
505:             * XML notatie:
506:             * <detector>
507:             *   <mimetype>foo/bar</mimetype>
508:             *   <extension>bar</extension>
509:             *   <designation>blablabla</designation>
510:             *   <test offset="bla" type="bla" comparator="=">test string</test>
511:             *   <childlist>
512:             *     <detector>etc</detector>
513:             *   </childlist>
514:             * </detector>
515:             *
516:             */
517:            public void toXML(FileWriter f) throws IOException {
518:                toXML(f, 0);
519:            }
520:
521:            /**
522:             * @param level Indicates depth of (child) element
523:             */
524:            public void toXML(FileWriter f, int level) throws IOException {
525:                StringBuffer s = new StringBuffer();
526:                String comparatorEntity;
527:
528:                char[] pad;
529:                if (level > 0) {
530:                    pad = new char[level * 4];
531:                    for (int i = 0; i < level * 4; i++) {
532:                        pad[i] = ' ';
533:                    }
534:                } else {
535:                    pad = new char[] {};
536:                }
537:                String padStr = new String(pad);
538:
539:                if (testComparator == '>') {
540:                    comparatorEntity = "&gt;";
541:                } else if (testComparator == '<') {
542:                    comparatorEntity = "&lt;";
543:                } else if (testComparator == '&') {
544:                    comparatorEntity = "&amp;";
545:                } else {
546:                    comparatorEntity = "" + testComparator;
547:                }
548:                s.append(padStr + "<detector>\n" + padStr + "  <mimetype>"
549:                        + getMimeType() + "</mimetype>\n" + padStr
550:                        + "  <extension>" + getExtension() + "</extension>\n"
551:                        + padStr + "  <designation>" + xmlEntities(message)
552:                        + "</designation>\n" + padStr + "  <test offset=\""
553:                        + offset + "\" type=\"" + type + "\" comparator=\""
554:                        + comparatorEntity + "\">" + xmlEntities(test)
555:                        + "</test>\n");
556:                f.write(s.toString());
557:                if (childList.size() > 0) {
558:                    f.write(padStr + "  <childlist>\n");
559:                    for (Detector detector : childList) {
560:                        detector.toXML(f, level + 1);
561:                    }
562:                    f.write(padStr + "  </childlist>\n");
563:                }
564:                f.write(padStr + "</detector>\n");
565:
566:            }
567:
568:            /**
569:             * @return String representation of Detector object.
570:             */
571:            public String toString() {
572:                if (!valid) {
573:                    return "parse error";
574:                } else {
575:                    StringBuffer res = new StringBuffer("[" + offset + "] {"
576:                            + type);
577:                    if (typeAND != null) {
578:                        res.append("[" + typeAND + "]");
579:                    }
580:                    res.append("} " + testComparator + "(" + test + ") "
581:                            + message);
582:                    if (childList.size() > 0) {
583:                        res.append("\n");
584:                        for (int i = 0; i < childList.size(); i++) {
585:                            res.append("> ")
586:                                    .append(childList.get(i).toString());
587:                        }
588:                    }
589:                    return res.toString();
590:                }
591:            }
592:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.