Source Code Cross Referenced for MagicParser.java in  » Database-ORM » MMBase » org » mmbase » util » magicfile » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Database ORM » MMBase » org.mmbase.util.magicfile 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:
003:        This software is OSI Certified Open Source Software.  OSI Certified is
004:        a certification mark of the Open Source Initiative.
005:
006:        The license (Mozilla version 1.0) can be read at the MMBase site.
007:        See http://www.MMBase.org/license
008:
009:         */
010:
011:        package org.mmbase.util.magicfile;
012:
013:        import java.io.BufferedReader;
014:        import java.io.File;
015:        import java.io.FileReader;
016:        import java.io.FileWriter;
017:        import java.io.IOException;
018:        import java.util.List;
019:        import java.util.Vector;
020:
021:        import org.mmbase.util.logging.Logger;
022:        import org.mmbase.util.logging.Logging;
023:
024:        /**
025:         * This Parser translates the configuration file of UNIX's file to a
026:         * list of Detectors (and to a magic.xml) Perhaps it's easier to
027:         * rewrite this stuff to perl or something like that.
028:         *
029:         * @version $Id: MagicParser.java,v 1.12 2007/02/24 21:57:50 nklasens Exp $
030:         * @todo NOT TESTED YET
031:         */
032:
033:        public class MagicParser implements  DetectorProvider {
034:            /**
035:             * the default files used to create the Detectors
036:             * DEFAULT_MAGIC_FILE = "/etc/mime-magic"
037:             */
038:            public final static String DEFAULT_MAGIC_FILE = "/etc/mime-magic";
039:
040:            private static final Logger log = Logging
041:                    .getLoggerInstance(MagicParser.class);
042:            private List<Detector> detectors;
043:
044:            private int offset;
045:            private String type;
046:            private String typeAND;
047:            private String test;
048:            private String message;
049:            private char testComparator;
050:
051:            public MagicParser() {
052:                this (DEFAULT_MAGIC_FILE);
053:            }
054:
055:            /**
056:             * Construct a new MagicParser with configuration file
057:             * @since MMBase-1.7
058:             */
059:            public MagicParser(String fileName) {
060:                log.info("creating a new MagicParser with configuration"
061:                        + DEFAULT_MAGIC_FILE);
062:                try {
063:                    BufferedReader br = new BufferedReader(new FileReader(
064:                            new File(fileName)));
065:                    String line;
066:                    detectors = new Vector<Detector>();
067:
068:                    while ((line = br.readLine()) != null) {
069:                        Detector d = createDetector(line);
070:                        if (d != null) {
071:                            //System.out.println(d.toString());
072:                            detectors.add(d);
073:                        }
074:                    }
075:                } catch (Exception e) {
076:                    log.error("" + e.getMessage() + "}", e);
077:                }
078:                ;
079:            }
080:
081:            public List<Detector> getDetectors() {
082:                return detectors;
083:            }
084:
085:            // --------------------------------------------------------------------------------
086:            // some utitily functions
087:
088:            protected int nextWhiteSpace(String s) {
089:                return nextWhiteSpace(s, 0);
090:            }
091:
092:            protected int nextWhiteSpace(String s, int startIndex) {
093:                for (int j = startIndex; j < s.length(); j++) {
094:                    if (s.charAt(j) == ' ' || s.charAt(j) == '\t'
095:                            || s.charAt(j) == '\n') {
096:                        return j;
097:                    }
098:                }
099:                return s.length();
100:            }
101:
102:            protected int nextNonWhiteSpace(String s, int startIndex) {
103:                for (int j = startIndex; j < s.length(); j++) {
104:                    if (s.charAt(j) != ' ' && s.charAt(j) != '\t') {
105:                        return j;
106:                    }
107:                }
108:                return -1;
109:            }
110:
111:            /**
112:             * Separate command from offset
113:             * @exception Throws an exception when parsing failed
114:             */
115:            private int parseOffsetString(String s, int startIndex)
116:                    throws Exception {
117:                try {
118:                    int m = nextWhiteSpace(s, startIndex);
119:
120:                    // Bail out when encountering an indirect offset
121:                    char c = s.charAt(startIndex);
122:                    // '&': In sublevel we can start relatively to where the previous match ended
123:                    // '(': Read value at first address, and add that at second to it
124:                    if (c == '&') {
125:                        throw new UnsupportedOperationException(
126:                                "parseOffsetString: >& offset feature not implemented\n(Tt is used only for HP Printer Job Language type)");
127:                    } else if (c == '(') {
128:                        throw new UnsupportedOperationException(
129:                                "parseOffsetString: indirect offsets not implemented");
130:                    }
131:                    offset = Integer.decode(s.substring(startIndex, m))
132:                            .intValue();
133:                    return nextNonWhiteSpace(s, m + 1);
134:                } catch (NumberFormatException e) {
135:                    // log.error("string->integer conversion failure for '"+s+"'");
136:                    throw new Exception(
137:                            "parseOffetString: string->integer conversion failure for '"
138:                                    + s + "'");
139:                }
140:            }
141:
142:            /**
143:             * Parse the type string from the magic file
144:             *
145:             *   -- nothing to be done: the found string is already atomic :-)
146:             */
147:            private int parseTypeString(String s, int startIndex)
148:                    throws Exception {
149:                int m = nextWhiteSpace(s, startIndex);
150:                if (m <= startIndex) {
151:                    throw new Exception(
152:                            "parseTypeString: failed to delimit type string");
153:                }
154:                int n = s.indexOf('&', startIndex);
155:                if (n > -1 && n < m - 2) {
156:                    type = s.substring(startIndex, n);
157:                    typeAND = s.substring(n + 1, m);
158:                } else {
159:                    type = s.substring(startIndex, m);
160:                    typeAND = "0";
161:                }
162:                return nextNonWhiteSpace(s, m + 1);
163:            }
164:
165:            /**
166:             * Parse the test string from the magic file
167:             *   -- determine: a.) the test comparator, and b.) the test value
168:             */
169:            private int parseTestString(String s, int startIndex)
170:                    throws Exception {
171:                int start = 0;
172:                //int m = nextWhiteSpace(s,startIndex); // XXX need a better algorithm to account for '\' syntax
173:                // Can't use nextWhiteSpace here, we need harder parsing...
174:                boolean backslashmode = false;
175:                boolean octalmode = false;
176:                boolean hexmode = false;
177:                //int l = s.length();
178:                char c;
179:                StringBuffer numbuf = new StringBuffer();
180:
181:                test = "";
182:
183:                c = s.charAt(startIndex);
184:                switch (c) {
185:                case '>':
186:                case '<':
187:                case '&':
188:                case '^':
189:                case '=':
190:                    testComparator = c;
191:                    start = 1;
192:                    break;
193:                default:
194:                    testComparator = '=';
195:                    break;
196:                }
197:                if (s.charAt(startIndex + start) == '~'
198:                        || s.charAt(startIndex + start) == '!') {
199:                    // XXX do nothing with these, but remove them to get rid of decode errors
200:                    start++;
201:                }
202:                int i = startIndex + start;
203:
204:                if (!type.equals("string")) {
205:                    int m = nextWhiteSpace(s, i);
206:                    String t = s.substring(i, m);
207:                    if (t.equals("x")) {
208:                        test = "x";
209:                    } else if (type.equals("beshort") || type.equals("leshort")) {
210:                        try {
211:                            test = "0x"
212:                                    + Integer.toHexString(Integer.decode(
213:                                            s.substring(i, m)).intValue());
214:                            //test.addElement(Integer.decode(s.substring(i,m)));
215:                        } catch (NumberFormatException e) {
216:                            throw new Exception("decode(" + s.substring(i, m)
217:                                    + ")");
218:                        }
219:                    } else if (type.equals("belong") || type.equals("lelong")) {
220:                        // Values possibly too long for Integer, while Long type won't parse :-(
221:                        int endIndex = m;
222:                        try {
223:                            //test.addElement(Long.decode(s.substring(i,m)));
224:                            if (s.charAt(m - 1) == 'L'
225:                                    || s.charAt(m - 1) == 'l') {
226:                                endIndex = m - 1;
227:                            }
228:                            test = "0x"
229:                                    + Long.toHexString(Long.decode(
230:                                            s.substring(i, endIndex))
231:                                            .longValue());
232:                        } catch (NumberFormatException e) {
233:                            log.error(e.getMessage());
234:                            log.error(Logging.stackTrace(e));
235:                            throw new Exception("parseLong("
236:                                    + s.substring(i, endIndex) + ") ");
237:                        }
238:                    } else if (type.equals("byte")) {
239:                        try {
240:                            test = "0x"
241:                                    + Integer.toHexString(Integer.decode(
242:                                            s.substring(i, m)).intValue());
243:                            //test.addElement(Integer.decode(s.substring(i,m)));
244:                        } catch (NumberFormatException e) {
245:                            throw new Exception("decode(" + s.substring(i, m)
246:                                    + ")");
247:                        }
248:                    }
249:                    i = m;
250:                } else {
251:                    StringBuffer buf = new StringBuffer();
252:
253:                    int m = s.length();
254:                    while (i < m) {
255:                        c = s.charAt(i);
256:                        if (backslashmode) {
257:                            switch (c) {
258:                            case 'n':
259:                                backslashmode = false;
260:                                buf.append('\n');
261:                                break;
262:                            case 'r':
263:                                backslashmode = false;
264:                                buf.append('\r');
265:                                break;
266:                            case 't':
267:                                backslashmode = false;
268:                                buf.append('\t');
269:                                break;
270:                            case '\\':
271:                                if (hexmode) {
272:                                    try {
273:                                        //test.addElement(Integer.decode("0x"+numbuf.toString()));
274:                                        test = test
275:                                                + (char) Integer
276:                                                        .decode(
277:                                                                "0x"
278:                                                                        + numbuf
279:                                                                                .toString())
280:                                                        .intValue();
281:                                    } catch (NumberFormatException e) {
282:                                        throw new Exception("decode(0x"
283:                                                + numbuf.toString()
284:                                                + ") faalde");
285:                                    }
286:                                    hexmode = false;
287:                                } else if (octalmode) {
288:                                    try {
289:                                        //test.addElement(Integer.decode("0"+numbuf.toString()));
290:                                        test = test
291:                                                + (char) Integer
292:                                                        .decode(
293:                                                                "0"
294:                                                                        + numbuf
295:                                                                                .toString())
296:                                                        .intValue();
297:                                    } catch (NumberFormatException e) {
298:                                        throw new Exception("decode(0"
299:                                                + numbuf.toString()
300:                                                + ") faalde");
301:                                    }
302:                                    octalmode = false;
303:                                } else {
304:                                    backslashmode = false;
305:                                    buf.append('\\');
306:                                }
307:                                break;
308:                            case 'x':
309:                                if (octalmode && numbuf.length() == 3) {
310:                                    try {
311:                                        //test.addElement(Integer.decode("0"+numbuf.toString()));
312:                                        test = test
313:                                                + (char) Integer
314:                                                        .decode(
315:                                                                "0"
316:                                                                        + numbuf
317:                                                                                .toString())
318:                                                        .intValue();
319:                                    } catch (NumberFormatException e) {
320:                                        throw new Exception("decode(0"
321:                                                + numbuf.toString()
322:                                                + ") faalde");
323:                                    }
324:                                    octalmode = false;
325:                                    backslashmode = false;
326:                                    buf = new StringBuffer();
327:                                    buf.append('x');
328:                                } else {
329:                                    hexmode = true;
330:                                    numbuf = new StringBuffer();
331:                                    if (buf.length() > 0) {
332:                                        test = test + buf.toString();
333:                                        buf = new StringBuffer();
334:                                    }
335:                                }
336:                                break;
337:                            case '0':
338:                            case '1':
339:                            case '2':
340:                            case '3':
341:                            case '4':
342:                            case '5':
343:                            case '6':
344:                            case '7':
345:                            case '8':
346:                            case '9':
347:                                // We should be in octalmode or hexmode here!!
348:                                if (!octalmode && !hexmode) {
349:                                    if (buf.length() > 0) {
350:                                        //test.addElement(buf.toString());
351:                                        test = test + buf.toString();
352:                                        buf = new StringBuffer();
353:                                    }
354:                                    octalmode = true;
355:                                    numbuf = new StringBuffer();
356:                                }
357:                                numbuf.append(c);
358:                                break;
359:                            case ' ':
360:                                if (octalmode) {
361:                                    try {
362:                                        //test.addElement(Integer.decode("0"+numbuf.toString()));
363:                                        test = test
364:                                                + (char) Integer
365:                                                        .decode(
366:                                                                "0"
367:                                                                        + numbuf
368:                                                                                .toString())
369:                                                        .intValue();
370:                                    } catch (NumberFormatException e) {
371:                                        throw new Exception("decode(0"
372:                                                + numbuf.toString()
373:                                                + ") faalde");
374:                                    }
375:                                    octalmode = false;
376:                                } else if (hexmode) {
377:                                    try {
378:                                        //test.addElement(Integer.decode("0x"+numbuf.toString()));
379:                                        test = test
380:                                                + (char) Integer
381:                                                        .decode(
382:                                                                "0x"
383:                                                                        + numbuf
384:                                                                                .toString())
385:                                                        .intValue();
386:                                    } catch (NumberFormatException e) {
387:                                        throw new Exception("decode(0x"
388:                                                + numbuf.toString()
389:                                                + ") faalde");
390:                                    }
391:                                    hexmode = false;
392:                                } else {
393:                                    buf.append(' ');
394:                                }
395:                                backslashmode = false;
396:                                break;
397:                            default:
398:                                if (hexmode) {
399:                                    if (c == 'a' || c == 'A' || c == 'b'
400:                                            || c == 'B' || c == 'c' || c == 'C'
401:                                            || c == 'd' || c == 'D' || c == 'e'
402:                                            || c == 'E' || c == 'f' || c == 'F') {
403:                                        numbuf.append(c);
404:                                    } else {
405:                                        try {
406:                                            //test.addElement(Integer.decode("0x"+numbuf.toString()));
407:                                            test = test
408:                                                    + (char) Integer
409:                                                            .decode(
410:                                                                    "0x"
411:                                                                            + numbuf
412:                                                                                    .toString())
413:                                                            .intValue();
414:                                        } catch (NumberFormatException e) {
415:                                            throw new Exception("decode(0x"
416:                                                    + numbuf.toString()
417:                                                    + ") faalde");
418:                                        }
419:                                        hexmode = false;
420:                                        backslashmode = false;
421:                                    }
422:                                } else if (octalmode) {
423:                                    try {
424:                                        //test.addElement(Integer.decode("0"+numbuf.toString()));
425:                                        test = test
426:                                                + (char) Integer
427:                                                        .decode(
428:                                                                "0"
429:                                                                        + numbuf
430:                                                                                .toString())
431:                                                        .intValue();
432:                                    } catch (NumberFormatException e) {
433:                                        throw new Exception("decode(0"
434:                                                + numbuf.toString()
435:                                                + ") faalde");
436:                                    }
437:                                    octalmode = false;
438:                                    backslashmode = false;
439:                                } else {
440:                                    backslashmode = false;
441:                                    //tmp[testIndex++] = charToByte(c);
442:                                    buf.append(c);
443:                                }
444:                            }
445:                        } else if (c == '\\') {
446:                            if (buf.length() > 0) {
447:                                //test.addElement(buf.toString());
448:                                test = test + buf.toString();
449:                                buf = new StringBuffer();
450:                            }
451:                            backslashmode = true;
452:                        } else if (c == ' ' || c == '\t' || c == '\n'
453:                                || i == m - 1) { // Don't forget to set values on end of string
454:                            if (buf.length() > 0) {
455:                                //test.addElement(buf.toString());
456:                                test = test + buf.toString();
457:                                buf = new StringBuffer();
458:                            }
459:                            if (numbuf.length() > 0) {
460:                                if (octalmode) {
461:                                    try {
462:                                        //test.addElement(Integer.decode("0"+numbuf.toString()));
463:                                        test = test
464:                                                + (char) Integer
465:                                                        .decode(
466:                                                                "0"
467:                                                                        + numbuf
468:                                                                                .toString())
469:                                                        .intValue();
470:                                    } catch (NumberFormatException e) {
471:                                        throw new Exception("decode(0"
472:                                                + numbuf.toString()
473:                                                + ") faalde");
474:                                    }
475:                                    octalmode = false;
476:                                    backslashmode = false;
477:                                } else if (hexmode) {
478:                                    try {
479:                                        //test.addElement(Integer.decode("0x"+numbuf.toString()));
480:                                        test = test
481:                                                + (char) Integer
482:                                                        .decode(
483:                                                                "0x"
484:                                                                        + numbuf
485:                                                                                .toString())
486:                                                        .intValue();
487:                                    } catch (NumberFormatException e) {
488:                                        throw new Exception("decode(0x"
489:                                                + numbuf.toString()
490:                                                + ") faalde");
491:                                    }
492:                                    hexmode = false;
493:                                    backslashmode = false;
494:                                }
495:                            }
496:                            break;
497:                        } else {
498:                            buf.append(c);
499:                        }
500:                        i++;
501:                    }
502:                }
503:                //log.debug("test size = "+test.size());
504:                //log.debug("test = "+vectorToString(test));
505:                return nextNonWhiteSpace(s, i + 1);
506:            }
507:
508:            /**
509:             * Parse the message string from the magic file
510:             *
511:             *   -- nothing to be done: the found string is already atomic :-)
512:             */
513:            private int parseMessageString(String s, int startIndex)
514:                    throws Exception {
515:                if (false)
516:                    throw new Exception(
517:                            "dummy exception to stop jikes from complaining");
518:                message = s.substring(startIndex);
519:                return s.length() - 1;
520:
521:            }
522:
523:            private Detector createDetector(String line) {
524:                Detector detector = new Detector();
525:                // rawinput = line;
526:
527:                // hasX = false;
528:                //xInt = -99;
529:                //xString = "default";
530:                //xChar = 'x';
531:
532:                // parse line
533:                log.debug("parse: " + line);
534:                int n;
535:                String level = "start";
536:                try {
537:                    level = "parseOffsetString";
538:                    n = parseOffsetString(line, 0);
539:                    level = "parseTypeString";
540:                    n = parseTypeString(line, n);
541:                    level = "parseTestString";
542:                    n = parseTestString(line, n);
543:                    // If there are multiple test level, an upper one doesn't have to have a message string
544:                    if (n > 0) {
545:                        level = "parseMessageString";
546:                        parseMessageString(line, n);
547:                    } else {
548:                        message = "";
549:                    }
550:                    level = "end";
551:                } catch (UnsupportedOperationException e) {
552:                    log.warn(e.getMessage());
553:                } catch (Exception e) {
554:                    log.error("parse failure at " + level + ": "
555:                            + e.getMessage() + " for [" + line + "]");
556:                }
557:                detector.setType(type);
558:                detector.setOffset("" + offset);
559:                detector.setTest(test);
560:                detector.setComparator(testComparator);
561:                detector.setMimeType(message);
562:                detector.setDesignation(message);
563:                return detector;
564:            }
565:
566:            public boolean toXML(String path) throws IOException {
567:                File f = new File(path);
568:                return toXML(f);
569:            }
570:
571:            /**
572:             * Write the current datastructure to an XML file
573:             */
574:            public boolean toXML(File f) throws IOException {
575:                FileWriter writer = new FileWriter(f);
576:
577:                writer
578:                        .write("<!DOCTYPE magic PUBLIC \"-//MMBase//DTD magic config 1.0//EN\" \"http://www.mmbase.org/dtd/magic_1_0.dtd\">\n<magic>\n<info>\n<version>0.1</version>\n<author>cjr@dds.nl</author>\n<description>Conversion of the UNIX 'magic' file with added mime types and extensions.</description>\n</info>\n<detectorlist>\n");
579:                for (Detector detector : getDetectors()) {
580:                    detector.toXML(writer);
581:                }
582:                writer.write("</detectorlist>\n</magic>\n");
583:                writer.close();
584:                return true;
585:            }
586:
587:            public static void main(String[] argv) throws IOException {
588:                if (argv.length != 2) {
589:                    System.err
590:                            .println(MagicParser.class.getName()
591:                                    + " can be used to convert from mime files to mmbase magic.xml file format");
592:                    System.err.println("Usage:" + MagicParser.class.getName()
593:                            + " inpurtFileName outputfile.xml");
594:                    System.err.println("Example:" + MagicParser.class.getName()
595:                            + " /etc/mime-magic outputfile.xml");
596:                    System.exit(1);
597:                }
598:                System.out.println("reading the mime file");
599:                MagicParser parser = new MagicParser(argv[0]);
600:                System.out.println("writing the xml file");
601:                parser.toXML(new File(argv[1]));
602:                System.out.println("finished");
603:            }
604:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.