001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017:
018: package org.apache.cocoon.transformation;
019:
020: import net.sourceforge.chaperon.build.LexicalAutomatonBuilder;
021: import net.sourceforge.chaperon.model.lexicon.Lexicon;
022: import net.sourceforge.chaperon.model.lexicon.LexiconFactory;
023: import net.sourceforge.chaperon.process.LexicalAutomaton;
024: import net.sourceforge.chaperon.process.LexicalProcessor;
025:
026: import org.apache.avalon.excalibur.pool.Recyclable;
027: import org.apache.avalon.framework.activity.Disposable;
028: import org.apache.avalon.framework.logger.LogEnabled;
029: import org.apache.avalon.framework.logger.Logger;
030: import org.apache.avalon.framework.parameters.ParameterException;
031: import org.apache.avalon.framework.parameters.Parameterizable;
032: import org.apache.avalon.framework.parameters.Parameters;
033: import org.apache.avalon.framework.service.ServiceException;
034: import org.apache.avalon.framework.service.ServiceManager;
035: import org.apache.avalon.framework.service.Serviceable;
036:
037: import org.apache.cocoon.ProcessingException;
038: import org.apache.cocoon.caching.CacheableProcessingComponent;
039: import org.apache.cocoon.components.source.SourceUtil;
040: import org.apache.cocoon.environment.SourceResolver;
041: import org.apache.cocoon.xml.XMLConsumer;
042:
043: //import org.apache.commons.logging.impl.AvalonLogger;
044:
045: import org.apache.excalibur.source.Source;
046: import org.apache.excalibur.source.SourceException;
047: import org.apache.excalibur.source.SourceValidity;
048: import org.apache.excalibur.store.Store;
049:
050: import org.xml.sax.SAXException;
051:
052: import java.io.IOException;
053: import java.io.Serializable;
054:
055: import java.util.Map;
056:
057: /**
058: * This transfomer transforms special mark text part of a XML file into lexemes by using a lexicon
059: * file.
060: *
061: * <p>
062: * Input:
063: * </p>
064: * <pre>
065: * <text xmlns="http://chaperon.sourceforge.net/schema/text/1.0">
066: * Text 123 bla
067: * </text>
068: * </pre>
069: *
070: * <p>
071: * were transform into the following output:
072: * </p>
073: * <pre>
074: * <lexemes xmlns="http://chaperon.sourceforge.net/schema/lexemes/1.0">
075: * <lexeme symbol="word" text="Text"/>
076: * <lexeme symbol="number" text="123"/>
077: * <lexeme symbol="word" text="bla"/>
078: * </lexemes>
079: * </pre>
080: *
081: * @author <a href="mailto:stephan@apache.org">Stephan Michels </a>
082: * @version CVS $Id: LexicalTransformer.java 433543 2006-08-22 06:22:54Z crossley $
083: */
084: public class LexicalTransformer extends LexicalProcessor implements
085: Transformer, LogEnabled, Serviceable, Recyclable, Disposable,
086: Parameterizable, CacheableProcessingComponent {
087: private String lexicon = null;
088: private Source lexiconSource = null;
089: private Logger logger = null;
090: private ServiceManager manager = null;
091: private SourceResolver resolver = null;
092:
093: /**
094: * Provide component with a logger.
095: *
096: * @param logger the logger
097: */
098: public void enableLogging(Logger logger) {
099: this .logger = logger;
100:
101: // TODO: check if the loglevel is correct LogKitLogger -> Logger
102: // setLog(new AvalonLogger(logger));
103: }
104:
105: /**
106: * Pass the ServiceManager to the object. The Serviceable implementation
107: * should use the specified ServiceManager to acquire the services it needs
108: * for execution.
109: *
110: * @param manager The ServiceManager which this Serviceable uses.
111: */
112: public void service(ServiceManager manager) {
113: this .manager = manager;
114: }
115:
116: /**
117: * Provide component with parameters.
118: *
119: * @param parameters the parameters
120: *
121: * @throws ParameterException if parameters are invalid
122: */
123: public void parameterize(Parameters parameters)
124: throws ParameterException {
125: //setRecovery(parameters.getParameterAsBoolean("recovery", false));
126: setLocalizable(parameters.getParameterAsBoolean("localizable",
127: false));
128: }
129:
130: /**
131: * Set the <code>XMLConsumer</code> that will receive XML data.
132: *
133: * @param consumer
134: */
135: public void setConsumer(XMLConsumer consumer) {
136: setContentHandler(consumer);
137: setLexicalHandler(consumer);
138: }
139:
140: /**
141: * Set the SourceResolver, objectModel Map, the source and sitemap Parameters used to process the
142: * request.
143: *
144: * @param resolver Source resolver
145: * @param objectmodel Object model
146: * @param src Source
147: * @param parameters Parameters
148: *
149: * @throws IOException
150: * @throws ProcessingException
151: * @throws SAXException
152: */
153: public void setup(SourceResolver resolver, Map objectmodel,
154: String src, Parameters parameters)
155: throws ProcessingException, SAXException, IOException {
156: this .resolver = resolver;
157:
158: Store store = null;
159:
160: try {
161: this .lexicon = src;
162:
163: this .lexiconSource = resolver.resolveURI(this .lexicon);
164:
165: // Retrieve the parser table from the transient store
166: store = (Store) this .manager.lookup(Store.TRANSIENT_STORE);
167:
168: LexicalAutomatonEntry entry = (LexicalAutomatonEntry) store
169: .get(this .lexiconSource.getURI());
170:
171: // If the parser table has changed, rebuild the parser table
172: if ((entry == null)
173: || (entry.getValidity() == null)
174: || (entry.getValidity().isValid(
175: this .lexiconSource.getValidity()) <= 0)) {
176: this .logger.info("(Re)building the automaton from '"
177: + this .lexiconSource.getURI() + "'");
178:
179: if (this .lexiconSource.getInputStream() == null)
180: throw new ProcessingException("Source '"
181: + this .lexiconSource.getURI()
182: + "' not found");
183:
184: LexiconFactory factory = new LexiconFactory();
185: SourceUtil.toSAX(this .manager, this .lexiconSource,
186: null, factory);
187:
188: Lexicon lexicon = factory.getLexicon();
189:
190: LexicalAutomatonBuilder builder = new LexicalAutomatonBuilder(
191: lexicon/*, new AvalonLogger(this.logger)*/);
192:
193: LexicalAutomaton automaton = builder
194: .getLexicalAutomaton();
195: setLexicalAutomaton(automaton);
196:
197: this .logger.info("Store automaton into store for '"
198: + this .lexiconSource.getURI() + "'");
199:
200: store.store(this .lexiconSource.getURI(),
201: new LexicalAutomatonEntry(automaton,
202: this .lexiconSource.getValidity()));
203: } else {
204: this .logger.info("Getting automaton from store for '"
205: + this .lexiconSource.getURI() + "'");
206: setLexicalAutomaton(entry.getLexicalAutomaton());
207: }
208: } catch (SourceException se) {
209: throw new ProcessingException("Error during resolving of '"
210: + src + "'.", se);
211: } catch (ServiceException se) {
212: throw new ProcessingException(
213: "Could not lookup for service", se);
214: } finally {
215: if (store != null)
216: this .manager.release(store);
217: }
218: }
219:
220: /**
221: * Generate the unique key. This key must be unique inside the space of this component.
222: *
223: * @return The generated key hashes the src
224: */
225: public Serializable getKey() {
226: return this .lexiconSource.getURI();
227: }
228:
229: /**
230: * Generate the validity object.
231: *
232: * @return The generated validity object or <code>null</code> if the component is currently not
233: * cacheable.
234: */
235: public SourceValidity getValidity() {
236: return this .lexiconSource.getValidity();
237: }
238:
239: /**
240: * Recycle this component. All instance variables are set to <code>null</code>.
241: */
242: public void recycle() {
243: if ((this .resolver != null) && (this .lexiconSource != null)) {
244: this .resolver.release(this .lexiconSource);
245: this .lexiconSource = null;
246: }
247: }
248:
249: /**
250: * The dispose operation is called at the end of a components lifecycle.
251: */
252: public void dispose() {
253: if ((this .resolver != null) && (this .lexiconSource != null)) {
254: this .resolver.release(this .lexiconSource);
255: this .lexiconSource = null;
256: }
257:
258: this .manager = null;
259: }
260:
261: /**
262: * This class represent a entry in a store to cache the lexical automaton.
263: */
264: public static class LexicalAutomatonEntry implements Serializable {
265: private SourceValidity validity = null;
266: private LexicalAutomaton automaton = null;
267:
268: /**
269: * Create a new entry.
270: *
271: * @param automaton Lexical automaton.
272: * @param validity Validity of the lexicon file.
273: */
274: public LexicalAutomatonEntry(LexicalAutomaton automaton,
275: SourceValidity validity) {
276: this .automaton = automaton;
277: this .validity = validity;
278: }
279:
280: /**
281: * Return the validity of the lexicon file.
282: *
283: * @return Validity of the lexicon file.
284: */
285: public SourceValidity getValidity() {
286: return this .validity;
287: }
288:
289: /**
290: * Return the lexical automaton.
291: *
292: * @return Lexical automaton.
293: */
294: public LexicalAutomaton getLexicalAutomaton() {
295: return this .automaton;
296: }
297:
298: private void writeObject(java.io.ObjectOutputStream out)
299: throws IOException {
300: out.writeObject(validity);
301: out.writeObject(automaton);
302: }
303:
304: private void readObject(java.io.ObjectInputStream in)
305: throws IOException, ClassNotFoundException {
306: validity = (SourceValidity) in.readObject();
307: automaton = (LexicalAutomaton) in.readObject();
308: }
309: }
310: }
|