001: package it.unimi.dsi.mg4j.util.parser.callback;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2005-2007 Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import it.unimi.dsi.mg4j.util.MutableString;
025: import it.unimi.dsi.mg4j.util.parser.Attribute;
026: import it.unimi.dsi.mg4j.util.parser.BulletParser;
027: import it.unimi.dsi.mg4j.util.parser.Element;
028:
029: import java.util.Map;
030:
031: /** A callback for the {@linkplain it.unimi.dsi.mg4j.util.parser.BulletParser bullet parser}.
032: *
033: * <P>This interface is very loosely inspired to the SAX2 interface. However, it
034: * strives to be simple, and to be StringFree™.
035: *
036: * <P>By contract, all implementations of this interface are bound to be <em>reusable</em>:
037: * by calling {@link #startDocument()}, a callback can be used again.
038: * It <strong>must</strong> be safe to call {@link #startDocument()} any number of times.
039: *
040: * @deprecated Moved to <code>dsiutils</code>.
041: */
042:
043: @Deprecated
044: public interface Callback {
045:
046: /** A singleton empty callback array. */
047: Callback[] EMPTY_CALLBACK_ARRAY = new Callback[0];
048:
049: /** Configure the parser for usage with this callback.
050: *
051: * <P>When a callback is registered with a parser, it needs to set up
052: * the parser so that all data required by the callback is actually parsed.
053: * The configuration <strong>must</strong> be a monotone process—you
054: * can only <em>set</em> properties and <em>add</em> attribute types to
055: * be parsed.
056: */
057: void configure(BulletParser parser);
058:
059: /** Receive notification of the beginning of the document.
060: *
061: * <P>The callback must use this method to reset its internal state so
062: * that it can be resued. It <strong>must</strong> be safe to invoke this method
063: * several times.
064: */
065: void startDocument();
066:
067: /** Receive notification of the start of an element.
068: *
069: * <P>For simple elements, this is the only notification that the
070: * callback will ever receive.
071: *
072: * @param element the element whose opening tag was found.
073: * @param attrMap a map from {@link it.unimi.dsi.mg4j.util.parser.Attribute}s to {@link MutableString}s.
074: * @return true to keep the parser parsing, false to stop it.
075: */
076: boolean startElement(Element element,
077: Map<Attribute, MutableString> attrMap);
078:
079: /** Receive notification of the end of an element.
080: *
081: * <strong>Warning</strong>: unless specific decorators are used, in
082: * general a callback will just receive notifications for elements
083: * whose closing tag appears <em>explicitly</em> in the document.
084: *
085: * <P>This method will never be called for element without closing tags,
086: * even if such a tag is found.
087: *
088: * @param element the element whose closing tag was found.
089: * @return true to keep the parser parsing, false to stop it.
090: */
091: boolean endElement(Element element);
092:
093: /** Receive notification of character data inside an element.
094: *
095: * <p>You must not write into <code>text</code>, as it could be passed
096: * around to many callbacks.
097: *
098: * <P><code>flowBroken</code> will be true iff
099: * the flow was broken before <code>text</code>. This feature makes it possible
100: * to extract quickly the text in a document without looking at the elements.
101: *
102: * @param text an array containing the character data.
103: * @param offset the start position in the array.
104: * @param length the number of characters to read from the array.
105: * @param flowBroken whether the flow is broken at the start of <code>text</code>.
106: * @return true to keep the parser parsing, false to stop it.
107: */
108: boolean characters(char[] text, int offset, int length,
109: boolean flowBroken);
110:
111: /** Receive notification of the content of a CDATA section.
112: *
113: * <p>CDATA sections in an HTML document are the result of meeting
114: * a <samp>STYLE</samp> or <samp>SCRIPT</samp> element. In that case, the element
115: * will be passed as first argument.
116: *
117: * <p>You must not write into <code>text</code>, as it could be passed
118: * around to many callbacks.
119: *
120: * @param element the element enclosing the CDATA section, or <code>null</code> if the
121: * CDATA section was created with explicit markup.
122: * @param text an array containing the character data.
123: * @param offset the start position in the array.
124: * @param length the number of characters to read from the array.
125: * @return true to keep the parser parsing, false to stop it.
126: */
127: boolean cdata(Element element, char[] text, int offset, int length);
128:
129: /** Receive notification of the end of the document. */
130:
131: void endDocument();
132: }
|