001: /*
002: * Project: Lius
003: * Package: de.teamskill.lius.index.application
004: *
005: * Copyright (c) 2004 by Jens Fendler <jf@teamskill.de>
006: *
007: * This program is a free software; you can redistribute it and/or modify it
008: * under the terms of the GNU General Public License as published by the Free
009: * Software Foundation; either version 2 of the License, or (at your option) any
010: * later version. This program is distributed in the hope that it will be
011: * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
013: * Public License for more details. You should have received a copy of the GNU
014: * General Public License along with this program; if not, write to the Free
015: * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
016: * USA
017: */
018:
019: package de.teamskill.util.parser;
020:
021: import java.io.BufferedReader;
022: import java.io.File;
023: import java.io.FileInputStream;
024: import java.io.IOException;
025: import java.io.InputStream;
026: import java.io.InputStreamReader;
027:
028: /**
029: * Class: TexParser <br>
030: *
031: * Read some interesting meta-data from LaTeX source files.
032: *
033: * Changelog:
034: * <ul>
035: * <li>01.06.2005: Initial implementation. Does not take any tex-options into
036: * account yet. Just a quick hack so far..</li>
037: * </ul>
038: *
039: * @author <a href="mailto:jf@teamskill.de">Jens Fendler </a>
040: */
041: public class TexParser {
042:
043: private static final String PATTERN_DOCUMENTCLASS_START = "\\documentclass{";
044:
045: private static final String PATTERN_DOCUMENTCLASS_END = "}";
046:
047: private static final String PATTERN_AUTHOR_START = "\\author{";
048:
049: private static final String PATTERN_AUTHOR_END = "}";
050:
051: private static final String PATTERN_TITLE_START = "\\title{";
052:
053: private static final String PATTERN_TITLE_END = "}";
054:
055: private static final String PATTERN_ABSTRACT_START = "\\begin{abstract}";
056:
057: private static final String PATTERN_ABSTRACT_END = "\\end{abstract}";
058:
059: private static final String PATTERN_CONTENT_START = "\\begin{document}";
060:
061: private static final String PATTERN_CONTENT_END = "\\end{document}";
062:
063: private InputStream stream;
064:
065: private String texDocumentclass = null;
066:
067: private String texAuthor = null;
068:
069: private String texTitle = null;
070:
071: private String texAbstract = null;
072:
073: private String texContent = null;
074:
075: public TexParser(InputStream texStream) throws IOException {
076: this .stream = texStream;
077: parse();
078: }
079:
080: public TexParser(String texFilename) throws IOException {
081: this (new FileInputStream(texFilename));
082: }
083:
084: public TexParser(File texFile) throws IOException {
085: this (new FileInputStream(texFile));
086: }
087:
088: private String getField(String text, String startPattern,
089: String endPattern) {
090: int startIndex = text.indexOf(startPattern);
091: int endIndex = text.indexOf(endPattern, startIndex);
092: if ((startIndex != -1) && (endIndex != -1))
093: return text.substring(startIndex + startPattern.length(),
094: endIndex);
095: else
096: return null;
097: }
098:
099: private void parse() throws IOException {
100: if (stream != null) {
101: BufferedReader br = new BufferedReader(
102: new InputStreamReader(stream));
103: String line = null;
104: StringBuffer texBuffer = new StringBuffer();
105: // read the whole tex document in a buffer
106: while ((line = br.readLine()) != null) {
107: texBuffer.append(line);
108: }
109: String texSource = texBuffer.toString();
110:
111: texTitle = getField(texSource, PATTERN_TITLE_START,
112: PATTERN_TITLE_END);
113: texAuthor = getField(texSource, PATTERN_AUTHOR_START,
114: PATTERN_AUTHOR_END);
115: texDocumentclass = getField(texSource,
116: PATTERN_DOCUMENTCLASS_START,
117: PATTERN_DOCUMENTCLASS_END);
118: texAbstract = getField(texSource, PATTERN_ABSTRACT_START,
119: PATTERN_ABSTRACT_END);
120: texContent = getField(texSource, PATTERN_CONTENT_START,
121: PATTERN_CONTENT_END);
122:
123: } else
124: throw new IllegalStateException("No InputStream available.");
125: }
126:
127: public String getTitle() {
128: return texTitle;
129: }
130:
131: public String getAuthor() {
132: return texAuthor;
133: }
134:
135: public String getDocumentclass() {
136: return texDocumentclass;
137: }
138:
139: public String getAbstract() {
140: return texAbstract;
141: }
142:
143: public String getContent() {
144: return texContent;
145: }
146:
147: public static void main(String[] args) {
148:
149: if (args.length == 0) {
150: System.out.println("Usage: TexParser <tex file>");
151: System.exit(1);
152: }
153: try {
154: TexParser tp = new TexParser(args[0]);
155: System.out.println("LaTeX Document Class : "
156: + tp.getDocumentclass());
157: System.out.println("Author : " + tp.getAuthor());
158: System.out.println("Title : " + tp.getTitle());
159: System.out.println("Abstract : " + tp.getAbstract());
160: if (tp.getContent() != null) {
161: System.out.println("Content : ");
162: System.out.println(tp.getTitle());
163: }
164: } catch (IOException e) {
165: e.printStackTrace(System.err);
166: }
167: }
168:
169: }
|