001: package br.com.gfp.ofc.tools;
002:
003: import java.io.BufferedReader;
004: import java.io.File;
005: import java.io.FileNotFoundException;
006: import java.io.FileReader;
007: import java.io.FileWriter;
008: import java.io.IOException;
009: import java.util.HashMap;
010: import java.util.Map;
011: import java.util.StringTokenizer;
012:
013: /**
014: * This class converts a SGML OFC file to a XML fil qith closing tags
015: * To use it just instantiate and call the execute method
016: *
017: * @author Marcelo Adamatti
018: * @since 06/09/2007
019: */
020: public class OFCConversor {
021: /**
022: * For test only.
023: * @param args
024: * @throws Exception
025: */
026: public static void main(String args[]) throws Exception {
027: File fileIn = new File("/home/igor/Desktop/extrato.ofc");
028: File fileOut = new File("/home/igor/Desktop/extrato.xml");
029: OFCConversor c = new OFCConversor();
030: c.execute(fileIn, fileOut);
031: }
032:
033: /**
034: * Get the input file as SGML and convert it to XML with closing tags
035: * @param fileIn
036: * @param fileOut
037: * @throws FileNotFoundException
038: * @throws IOException
039: */
040: public void execute(File fileIn, File fileOut)
041: throws FileNotFoundException, IOException {
042: String text = getText(fileIn);
043: Map<String, String> tags = getClosedTags(text);
044: writeOut(fileOut, text, tags);
045: }
046:
047: /**
048: * Write xml output
049: *
050: * @param file
051: * @param text
052: * @param tags
053: * @throws IOException
054: * @throws Exception
055: */
056: private void writeOut(File file, String text,
057: Map<String, String> tags) throws IOException {
058: FileWriter write = new FileWriter(file);
059: StringTokenizer st = new StringTokenizer(text, "\n");
060: while (st.hasMoreTokens()) {
061: String line = st.nextToken();
062: if (line.trim().startsWith("</")) {
063: write.write("\n" + line);
064: } else {
065: String tag = line.trim().substring(1,
066: line.trim().indexOf(">"));
067: if (tags.containsKey(tag)) {
068: write.write("\n" + line);
069: } else {
070: write.write("\n" + line + "</" + tag + ">");
071: }
072: }
073: }
074: write.close();
075: }
076:
077: /**
078: * Load the OFC file as a String
079: *
080: * @param file
081: * @return
082: * @throws FileNotFoundException
083: * @throws IOException
084: */
085: protected String getText(File file) throws FileNotFoundException,
086: IOException {
087: FileReader reader = new FileReader(file);
088: BufferedReader buffer = new BufferedReader(reader);
089: StringBuffer sb = new StringBuffer();
090: String line;
091: while ((line = buffer.readLine()) != null)
092: sb.append(line + "\n");
093:
094: buffer.close();
095: reader.close();
096: //we have to replace & character as they are invalid on XML files (at last for ISO-8859-1
097: return sb.toString().replace('&', 'e');
098: }
099:
100: /**
101: * Find tags with closed tag
102: *
103: * @param text
104: * @return
105: */
106: private Map<String, String> getClosedTags(String text) {
107: Map<String, String> tags = new HashMap<String, String>();
108: while (text.indexOf("<") >= 0) {
109: int ini = text.indexOf("<") + 1;
110: int end = text.indexOf(">", ini);
111: String tag = text.substring(ini, end);
112: if (tag.startsWith("/") && !tags.containsKey(tag))
113: tags.put(tag.substring(1), tag.substring(1));
114: text = text.substring(end + 1);
115: }
116: return tags;
117: }
118: }
|