001: /*
002:
003: This software is OSI Certified Open Source Software. OSI Certified is
004: a certification mark of the Open Source Initiative.
005:
006: The license (Mozilla version 1.0) can be read at the MMBase site.
007: See http://www.MMBase.org/license
008:
009: */
010:
011: package org.mmbase.util.magicfile;
012:
013: import java.io.BufferedReader;
014: import java.io.File;
015: import java.io.FileReader;
016: import java.io.FileWriter;
017: import java.io.IOException;
018: import java.util.List;
019: import java.util.Vector;
020:
021: import org.mmbase.util.logging.Logger;
022: import org.mmbase.util.logging.Logging;
023:
024: /**
025: * This Parser translates the configuration file of UNIX's file to a
026: * list of Detectors (and to a magic.xml) Perhaps it's easier to
027: * rewrite this stuff to perl or something like that.
028: *
029: * @version $Id: MagicParser.java,v 1.12 2007/02/24 21:57:50 nklasens Exp $
030: * @todo NOT TESTED YET
031: */
032:
033: public class MagicParser implements DetectorProvider {
034: /**
035: * the default files used to create the Detectors
036: * DEFAULT_MAGIC_FILE = "/etc/mime-magic"
037: */
038: public final static String DEFAULT_MAGIC_FILE = "/etc/mime-magic";
039:
040: private static final Logger log = Logging
041: .getLoggerInstance(MagicParser.class);
042: private List<Detector> detectors;
043:
044: private int offset;
045: private String type;
046: private String typeAND;
047: private String test;
048: private String message;
049: private char testComparator;
050:
051: public MagicParser() {
052: this (DEFAULT_MAGIC_FILE);
053: }
054:
055: /**
056: * Construct a new MagicParser with configuration file
057: * @since MMBase-1.7
058: */
059: public MagicParser(String fileName) {
060: log.info("creating a new MagicParser with configuration"
061: + DEFAULT_MAGIC_FILE);
062: try {
063: BufferedReader br = new BufferedReader(new FileReader(
064: new File(fileName)));
065: String line;
066: detectors = new Vector<Detector>();
067:
068: while ((line = br.readLine()) != null) {
069: Detector d = createDetector(line);
070: if (d != null) {
071: //System.out.println(d.toString());
072: detectors.add(d);
073: }
074: }
075: } catch (Exception e) {
076: log.error("" + e.getMessage() + "}", e);
077: }
078: ;
079: }
080:
081: public List<Detector> getDetectors() {
082: return detectors;
083: }
084:
085: // --------------------------------------------------------------------------------
086: // some utitily functions
087:
088: protected int nextWhiteSpace(String s) {
089: return nextWhiteSpace(s, 0);
090: }
091:
092: protected int nextWhiteSpace(String s, int startIndex) {
093: for (int j = startIndex; j < s.length(); j++) {
094: if (s.charAt(j) == ' ' || s.charAt(j) == '\t'
095: || s.charAt(j) == '\n') {
096: return j;
097: }
098: }
099: return s.length();
100: }
101:
102: protected int nextNonWhiteSpace(String s, int startIndex) {
103: for (int j = startIndex; j < s.length(); j++) {
104: if (s.charAt(j) != ' ' && s.charAt(j) != '\t') {
105: return j;
106: }
107: }
108: return -1;
109: }
110:
111: /**
112: * Separate command from offset
113: * @exception Throws an exception when parsing failed
114: */
115: private int parseOffsetString(String s, int startIndex)
116: throws Exception {
117: try {
118: int m = nextWhiteSpace(s, startIndex);
119:
120: // Bail out when encountering an indirect offset
121: char c = s.charAt(startIndex);
122: // '&': In sublevel we can start relatively to where the previous match ended
123: // '(': Read value at first address, and add that at second to it
124: if (c == '&') {
125: throw new UnsupportedOperationException(
126: "parseOffsetString: >& offset feature not implemented\n(Tt is used only for HP Printer Job Language type)");
127: } else if (c == '(') {
128: throw new UnsupportedOperationException(
129: "parseOffsetString: indirect offsets not implemented");
130: }
131: offset = Integer.decode(s.substring(startIndex, m))
132: .intValue();
133: return nextNonWhiteSpace(s, m + 1);
134: } catch (NumberFormatException e) {
135: // log.error("string->integer conversion failure for '"+s+"'");
136: throw new Exception(
137: "parseOffetString: string->integer conversion failure for '"
138: + s + "'");
139: }
140: }
141:
142: /**
143: * Parse the type string from the magic file
144: *
145: * -- nothing to be done: the found string is already atomic :-)
146: */
147: private int parseTypeString(String s, int startIndex)
148: throws Exception {
149: int m = nextWhiteSpace(s, startIndex);
150: if (m <= startIndex) {
151: throw new Exception(
152: "parseTypeString: failed to delimit type string");
153: }
154: int n = s.indexOf('&', startIndex);
155: if (n > -1 && n < m - 2) {
156: type = s.substring(startIndex, n);
157: typeAND = s.substring(n + 1, m);
158: } else {
159: type = s.substring(startIndex, m);
160: typeAND = "0";
161: }
162: return nextNonWhiteSpace(s, m + 1);
163: }
164:
165: /**
166: * Parse the test string from the magic file
167: * -- determine: a.) the test comparator, and b.) the test value
168: */
169: private int parseTestString(String s, int startIndex)
170: throws Exception {
171: int start = 0;
172: //int m = nextWhiteSpace(s,startIndex); // XXX need a better algorithm to account for '\' syntax
173: // Can't use nextWhiteSpace here, we need harder parsing...
174: boolean backslashmode = false;
175: boolean octalmode = false;
176: boolean hexmode = false;
177: //int l = s.length();
178: char c;
179: StringBuffer numbuf = new StringBuffer();
180:
181: test = "";
182:
183: c = s.charAt(startIndex);
184: switch (c) {
185: case '>':
186: case '<':
187: case '&':
188: case '^':
189: case '=':
190: testComparator = c;
191: start = 1;
192: break;
193: default:
194: testComparator = '=';
195: break;
196: }
197: if (s.charAt(startIndex + start) == '~'
198: || s.charAt(startIndex + start) == '!') {
199: // XXX do nothing with these, but remove them to get rid of decode errors
200: start++;
201: }
202: int i = startIndex + start;
203:
204: if (!type.equals("string")) {
205: int m = nextWhiteSpace(s, i);
206: String t = s.substring(i, m);
207: if (t.equals("x")) {
208: test = "x";
209: } else if (type.equals("beshort") || type.equals("leshort")) {
210: try {
211: test = "0x"
212: + Integer.toHexString(Integer.decode(
213: s.substring(i, m)).intValue());
214: //test.addElement(Integer.decode(s.substring(i,m)));
215: } catch (NumberFormatException e) {
216: throw new Exception("decode(" + s.substring(i, m)
217: + ")");
218: }
219: } else if (type.equals("belong") || type.equals("lelong")) {
220: // Values possibly too long for Integer, while Long type won't parse :-(
221: int endIndex = m;
222: try {
223: //test.addElement(Long.decode(s.substring(i,m)));
224: if (s.charAt(m - 1) == 'L'
225: || s.charAt(m - 1) == 'l') {
226: endIndex = m - 1;
227: }
228: test = "0x"
229: + Long.toHexString(Long.decode(
230: s.substring(i, endIndex))
231: .longValue());
232: } catch (NumberFormatException e) {
233: log.error(e.getMessage());
234: log.error(Logging.stackTrace(e));
235: throw new Exception("parseLong("
236: + s.substring(i, endIndex) + ") ");
237: }
238: } else if (type.equals("byte")) {
239: try {
240: test = "0x"
241: + Integer.toHexString(Integer.decode(
242: s.substring(i, m)).intValue());
243: //test.addElement(Integer.decode(s.substring(i,m)));
244: } catch (NumberFormatException e) {
245: throw new Exception("decode(" + s.substring(i, m)
246: + ")");
247: }
248: }
249: i = m;
250: } else {
251: StringBuffer buf = new StringBuffer();
252:
253: int m = s.length();
254: while (i < m) {
255: c = s.charAt(i);
256: if (backslashmode) {
257: switch (c) {
258: case 'n':
259: backslashmode = false;
260: buf.append('\n');
261: break;
262: case 'r':
263: backslashmode = false;
264: buf.append('\r');
265: break;
266: case 't':
267: backslashmode = false;
268: buf.append('\t');
269: break;
270: case '\\':
271: if (hexmode) {
272: try {
273: //test.addElement(Integer.decode("0x"+numbuf.toString()));
274: test = test
275: + (char) Integer
276: .decode(
277: "0x"
278: + numbuf
279: .toString())
280: .intValue();
281: } catch (NumberFormatException e) {
282: throw new Exception("decode(0x"
283: + numbuf.toString()
284: + ") faalde");
285: }
286: hexmode = false;
287: } else if (octalmode) {
288: try {
289: //test.addElement(Integer.decode("0"+numbuf.toString()));
290: test = test
291: + (char) Integer
292: .decode(
293: "0"
294: + numbuf
295: .toString())
296: .intValue();
297: } catch (NumberFormatException e) {
298: throw new Exception("decode(0"
299: + numbuf.toString()
300: + ") faalde");
301: }
302: octalmode = false;
303: } else {
304: backslashmode = false;
305: buf.append('\\');
306: }
307: break;
308: case 'x':
309: if (octalmode && numbuf.length() == 3) {
310: try {
311: //test.addElement(Integer.decode("0"+numbuf.toString()));
312: test = test
313: + (char) Integer
314: .decode(
315: "0"
316: + numbuf
317: .toString())
318: .intValue();
319: } catch (NumberFormatException e) {
320: throw new Exception("decode(0"
321: + numbuf.toString()
322: + ") faalde");
323: }
324: octalmode = false;
325: backslashmode = false;
326: buf = new StringBuffer();
327: buf.append('x');
328: } else {
329: hexmode = true;
330: numbuf = new StringBuffer();
331: if (buf.length() > 0) {
332: test = test + buf.toString();
333: buf = new StringBuffer();
334: }
335: }
336: break;
337: case '0':
338: case '1':
339: case '2':
340: case '3':
341: case '4':
342: case '5':
343: case '6':
344: case '7':
345: case '8':
346: case '9':
347: // We should be in octalmode or hexmode here!!
348: if (!octalmode && !hexmode) {
349: if (buf.length() > 0) {
350: //test.addElement(buf.toString());
351: test = test + buf.toString();
352: buf = new StringBuffer();
353: }
354: octalmode = true;
355: numbuf = new StringBuffer();
356: }
357: numbuf.append(c);
358: break;
359: case ' ':
360: if (octalmode) {
361: try {
362: //test.addElement(Integer.decode("0"+numbuf.toString()));
363: test = test
364: + (char) Integer
365: .decode(
366: "0"
367: + numbuf
368: .toString())
369: .intValue();
370: } catch (NumberFormatException e) {
371: throw new Exception("decode(0"
372: + numbuf.toString()
373: + ") faalde");
374: }
375: octalmode = false;
376: } else if (hexmode) {
377: try {
378: //test.addElement(Integer.decode("0x"+numbuf.toString()));
379: test = test
380: + (char) Integer
381: .decode(
382: "0x"
383: + numbuf
384: .toString())
385: .intValue();
386: } catch (NumberFormatException e) {
387: throw new Exception("decode(0x"
388: + numbuf.toString()
389: + ") faalde");
390: }
391: hexmode = false;
392: } else {
393: buf.append(' ');
394: }
395: backslashmode = false;
396: break;
397: default:
398: if (hexmode) {
399: if (c == 'a' || c == 'A' || c == 'b'
400: || c == 'B' || c == 'c' || c == 'C'
401: || c == 'd' || c == 'D' || c == 'e'
402: || c == 'E' || c == 'f' || c == 'F') {
403: numbuf.append(c);
404: } else {
405: try {
406: //test.addElement(Integer.decode("0x"+numbuf.toString()));
407: test = test
408: + (char) Integer
409: .decode(
410: "0x"
411: + numbuf
412: .toString())
413: .intValue();
414: } catch (NumberFormatException e) {
415: throw new Exception("decode(0x"
416: + numbuf.toString()
417: + ") faalde");
418: }
419: hexmode = false;
420: backslashmode = false;
421: }
422: } else if (octalmode) {
423: try {
424: //test.addElement(Integer.decode("0"+numbuf.toString()));
425: test = test
426: + (char) Integer
427: .decode(
428: "0"
429: + numbuf
430: .toString())
431: .intValue();
432: } catch (NumberFormatException e) {
433: throw new Exception("decode(0"
434: + numbuf.toString()
435: + ") faalde");
436: }
437: octalmode = false;
438: backslashmode = false;
439: } else {
440: backslashmode = false;
441: //tmp[testIndex++] = charToByte(c);
442: buf.append(c);
443: }
444: }
445: } else if (c == '\\') {
446: if (buf.length() > 0) {
447: //test.addElement(buf.toString());
448: test = test + buf.toString();
449: buf = new StringBuffer();
450: }
451: backslashmode = true;
452: } else if (c == ' ' || c == '\t' || c == '\n'
453: || i == m - 1) { // Don't forget to set values on end of string
454: if (buf.length() > 0) {
455: //test.addElement(buf.toString());
456: test = test + buf.toString();
457: buf = new StringBuffer();
458: }
459: if (numbuf.length() > 0) {
460: if (octalmode) {
461: try {
462: //test.addElement(Integer.decode("0"+numbuf.toString()));
463: test = test
464: + (char) Integer
465: .decode(
466: "0"
467: + numbuf
468: .toString())
469: .intValue();
470: } catch (NumberFormatException e) {
471: throw new Exception("decode(0"
472: + numbuf.toString()
473: + ") faalde");
474: }
475: octalmode = false;
476: backslashmode = false;
477: } else if (hexmode) {
478: try {
479: //test.addElement(Integer.decode("0x"+numbuf.toString()));
480: test = test
481: + (char) Integer
482: .decode(
483: "0x"
484: + numbuf
485: .toString())
486: .intValue();
487: } catch (NumberFormatException e) {
488: throw new Exception("decode(0x"
489: + numbuf.toString()
490: + ") faalde");
491: }
492: hexmode = false;
493: backslashmode = false;
494: }
495: }
496: break;
497: } else {
498: buf.append(c);
499: }
500: i++;
501: }
502: }
503: //log.debug("test size = "+test.size());
504: //log.debug("test = "+vectorToString(test));
505: return nextNonWhiteSpace(s, i + 1);
506: }
507:
508: /**
509: * Parse the message string from the magic file
510: *
511: * -- nothing to be done: the found string is already atomic :-)
512: */
513: private int parseMessageString(String s, int startIndex)
514: throws Exception {
515: if (false)
516: throw new Exception(
517: "dummy exception to stop jikes from complaining");
518: message = s.substring(startIndex);
519: return s.length() - 1;
520:
521: }
522:
523: private Detector createDetector(String line) {
524: Detector detector = new Detector();
525: // rawinput = line;
526:
527: // hasX = false;
528: //xInt = -99;
529: //xString = "default";
530: //xChar = 'x';
531:
532: // parse line
533: log.debug("parse: " + line);
534: int n;
535: String level = "start";
536: try {
537: level = "parseOffsetString";
538: n = parseOffsetString(line, 0);
539: level = "parseTypeString";
540: n = parseTypeString(line, n);
541: level = "parseTestString";
542: n = parseTestString(line, n);
543: // If there are multiple test level, an upper one doesn't have to have a message string
544: if (n > 0) {
545: level = "parseMessageString";
546: parseMessageString(line, n);
547: } else {
548: message = "";
549: }
550: level = "end";
551: } catch (UnsupportedOperationException e) {
552: log.warn(e.getMessage());
553: } catch (Exception e) {
554: log.error("parse failure at " + level + ": "
555: + e.getMessage() + " for [" + line + "]");
556: }
557: detector.setType(type);
558: detector.setOffset("" + offset);
559: detector.setTest(test);
560: detector.setComparator(testComparator);
561: detector.setMimeType(message);
562: detector.setDesignation(message);
563: return detector;
564: }
565:
566: public boolean toXML(String path) throws IOException {
567: File f = new File(path);
568: return toXML(f);
569: }
570:
571: /**
572: * Write the current datastructure to an XML file
573: */
574: public boolean toXML(File f) throws IOException {
575: FileWriter writer = new FileWriter(f);
576:
577: writer
578: .write("<!DOCTYPE magic PUBLIC \"-//MMBase//DTD magic config 1.0//EN\" \"http://www.mmbase.org/dtd/magic_1_0.dtd\">\n<magic>\n<info>\n<version>0.1</version>\n<author>cjr@dds.nl</author>\n<description>Conversion of the UNIX 'magic' file with added mime types and extensions.</description>\n</info>\n<detectorlist>\n");
579: for (Detector detector : getDetectors()) {
580: detector.toXML(writer);
581: }
582: writer.write("</detectorlist>\n</magic>\n");
583: writer.close();
584: return true;
585: }
586:
587: public static void main(String[] argv) throws IOException {
588: if (argv.length != 2) {
589: System.err
590: .println(MagicParser.class.getName()
591: + " can be used to convert from mime files to mmbase magic.xml file format");
592: System.err.println("Usage:" + MagicParser.class.getName()
593: + " inpurtFileName outputfile.xml");
594: System.err.println("Example:" + MagicParser.class.getName()
595: + " /etc/mime-magic outputfile.xml");
596: System.exit(1);
597: }
598: System.out.println("reading the mime file");
599: MagicParser parser = new MagicParser(argv[0]);
600: System.out.println("writing the xml file");
601: parser.toXML(new File(argv[1]));
602: System.out.println("finished");
603: }
604: }
|