001: /*
002: * Piscator: a small SQL/XML search engine
003: * Copyright (C) 2007 Luk Morbee
004: *
005: * This program is free software; you can redistribute it and/or modify
006: * it under the terms of the GNU General Public License as published by
007: * the Free Software Foundation; either version 2 of the License, or
008: * (at your option) any later version.
009: *
010: * This program is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013: * GNU General Public License for more details.
014: *
015: * You should have received a copy of the GNU General Public License
016: * along with this program; if not, write to the Free Software
017: * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
018: */
019: package piscator.service;
020:
021: import java.io.File;
022: import java.io.IOException;
023: import java.io.StringWriter;
024: import java.io.Writer;
025:
026: import javax.xml.parsers.ParserConfigurationException;
027: import javax.xml.parsers.SAXParser;
028: import javax.xml.parsers.SAXParserFactory;
029:
030: import org.apache.commons.logging.Log;
031: import org.apache.commons.logging.LogFactory;
032: import org.w3c.dom.Element;
033: import org.xml.sax.Attributes;
034: import org.xml.sax.SAXException;
035: import org.xml.sax.helpers.DefaultHandler;
036:
037: import piscator.dao.DocumentTableDao;
038: import piscator.util.DomUtils;
039:
040: import com.sun.org.apache.xml.internal.serialize.OutputFormat;
041: import com.sun.org.apache.xml.internal.serialize.XMLSerializer;
042:
043: /**
044: * The StoreServer splits an incomming xml and stores the seperate documents in a document table.
045: */
046: public class StoreServer {
047:
048: private static Log LOG = LogFactory.getLog(StoreServer.class);
049:
050: private DocumentTableDao documentTableDao = null;
051:
052: public DocumentTableDao getDocumentTableDao() {
053: return documentTableDao;
054: }
055:
056: public void setDocumentTableDao(DocumentTableDao documentTableDao) {
057: this .documentTableDao = documentTableDao;
058: }
059:
060: private SAXParser getParser() throws ParserConfigurationException,
061: SAXException {
062: SAXParserFactory factory = SAXParserFactory.newInstance();
063: return factory.newSAXParser();
064: }
065:
066: /**
067: * Splits and stores the documents in a document table according to configuration settings.
068: */
069: public void storeDocuments(File documentsFile, Element config)
070: throws SAXException, IOException,
071: ParserConfigurationException {
072: if (config != null) {
073: SplitHandler handler = new SplitHandler(config);
074: LOG.info("Storing ...");
075: getParser().parse(documentsFile, handler);
076: }
077: }
078:
079: /**
080: * It's this ContentHandler that does the actual splitting and storing
081: * according to configuration settings.
082: */
083: public class SplitHandler extends DefaultHandler {
084:
085: private String documentTable = null;
086: private String documentTag = null;
087: private Writer writer = null;
088: private XMLSerializer serializer = null;
089:
090: public SplitHandler(Element config) {
091: documentTable = DomUtils.getValue(config,
092: "./document-table/attribute::name");
093: documentTag = DomUtils.getValue(config,
094: "./document-table/attribute::document-tag");
095: }
096:
097: /* (non-Javadoc)
098: * @see org.xml.sax.ContentHandler#startDocument()
099: */
100: public void startDocument() throws SAXException {
101: documentTableDao.create(documentTable);
102: }
103:
104: /* (non-Javadoc)
105: * @see org.xml.sax.ContentHandler#endDocument()
106: */
107: public void endDocument() throws SAXException {
108: }
109:
110: /*
111: * (non-Javadoc)
112: * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
113: */
114: public void startElement(String namespaceURI, String localName,
115: String qName, Attributes atts) throws SAXException {
116: if (documentTag.equals(qName)) {
117: OutputFormat outputFormat = new OutputFormat();
118: outputFormat.setEncoding("UTF-8");
119: outputFormat.setIndenting(false);
120: outputFormat.setOmitXMLDeclaration(true);
121: writer = new StringWriter();
122: serializer = new XMLSerializer(writer, outputFormat);
123: serializer.startDocument();
124: }
125: if (serializer != null) {
126: serializer.startElement(namespaceURI, localName, qName,
127: atts);
128: }
129: }
130:
131: /*
132: * (non-Javadoc)
133: * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
134: */
135: public void endElement(String namespaceURI, String localName,
136: String qName) throws SAXException {
137: if (serializer != null) {
138: serializer.endElement(namespaceURI, localName, qName);
139: }
140: if (documentTag.equals(qName)) {
141: serializer.endDocument();
142: String xml = writer.toString();
143: xml = xml.replaceAll(">\n\\s+<", "><");
144: documentTableDao.insert(documentTable, xml);
145: serializer = null;
146: writer = null;
147: }
148: }
149:
150: /*
151: * (non-Javadoc)
152: * @see org.xml.sax.ContentHandler#characters(char[], int, int)
153: */
154: public void characters(char[] ch, int start, int length)
155: throws SAXException {
156: if (serializer != null) {
157: serializer.characters(ch, start, length);
158: }
159: }
160:
161: /*
162: * (non-Javadoc)
163: * @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
164: */
165: public void ignorableWhitespace(char[] ch, int start, int length)
166: throws SAXException {
167: if (serializer != null) {
168: serializer.ignorableWhitespace(ch, start, length);
169: }
170: }
171:
172: /*
173: * (non-Javadoc)
174: * @see org.xml.sax.ContentHandler#processingInstruction(java.lang.String, java.lang.String)
175: */
176: public void processingInstruction(String target, String data)
177: throws SAXException {
178: if (serializer != null) {
179: serializer.processingInstruction(target, data);
180: }
181: }
182:
183: }
184: }
|