001: /*
002: * Created on Oct 21, 2005
003: */
004: package uk.org.ponder.rsf.template;
005:
006: import java.io.InputStream;
007: import java.util.HashMap;
008: import java.util.Iterator;
009: import java.util.List;
010:
011: import org.xmlpull.mxp1.MXParser;
012: import org.xmlpull.v1.XmlPullParser;
013:
014: import uk.org.ponder.arrayutil.ArrayUtil;
015: import uk.org.ponder.arrayutil.ListUtil;
016: import uk.org.ponder.rsf.renderer.ViewRender;
017: import uk.org.ponder.rsf.util.SplitID;
018: import uk.org.ponder.rsf.view.ViewTemplate;
019: import uk.org.ponder.rsf.view.ViewTemplateParser;
020: import uk.org.ponder.stringutil.CharWrap;
021: import uk.org.ponder.util.UniversalRuntimeException;
022: import uk.org.ponder.xml.XMLUtil;
023:
024: /**
025: * The parser for the IKAT view template, implemented using the XPP3 "rapid" XML
026: * pull parser. Returns an XMLViewTemplate, which can be recognised by the
027: * {@link ViewRender}, embodying the IKAT algorithm.
028: *
029: * @author Antranig Basman (antranig@caret.cam.ac.uk)
030: *
031: */
032:
033: public class XMLViewTemplateParser implements ViewTemplateParser {
034: private XMLViewTemplate t;
035:
036: public int INITIAL_LUMP_SIZE = 1000;
037: private CharWrap buffer;
038:
039: private List parseinterceptors;
040:
041: public void setTemplateParseInterceptors(List parseinterceptors) {
042: this .parseinterceptors = parseinterceptors;
043: }
044:
045: private void writeToken(int token, XmlPullParser parser, CharWrap w) {
046:
047: char[] chars = parser.getTextCharacters(limits);
048: switch (token) {
049: case XmlPullParser.COMMENT:
050: w.append("<!--");
051: break;
052: case XmlPullParser.ENTITY_REF:
053: w.append("&");
054: break;
055: case XmlPullParser.CDSECT:
056: w.append("<![CDATA[");
057: break;
058: case XmlPullParser.PROCESSING_INSTRUCTION:
059: w.append("<?");
060: break;
061: case XmlPullParser.DOCDECL:
062: w.append("<!DOCTYPE");
063: break;
064:
065: }
066: w.append(chars, limits[0], limits[1]);
067: switch (token) {
068: case XmlPullParser.COMMENT:
069: w.append("-->");
070: break;
071: case XmlPullParser.ENTITY_REF:
072: w.append(";");
073: break;
074: case XmlPullParser.CDSECT:
075: w.append("]]>");
076: break;
077: case XmlPullParser.PROCESSING_INSTRUCTION:
078: w.append("?>");
079: break;
080: case XmlPullParser.DOCDECL:
081: w.append(">");
082: break;
083: }
084: }
085:
086: private void setLumpChars(XMLLump lump, char[] chars, int start,
087: int length) {
088: lump.start = buffer.size;
089: lump.length = length;
090: if (length > 0) {
091: buffer.append(chars, start, length);
092: }
093: }
094:
095: private void setLumpString(XMLLump lump, String string) {
096: lump.start = buffer.size;
097: lump.length = string.length();
098: buffer.append(string);
099: }
100:
101: private void simpleTagText(XmlPullParser parser) {
102: justended = false;
103: char[] chars = parser.getTextCharacters(limits);
104: XMLLump newlump = newLump(parser);
105: setLumpChars(newlump, chars, limits[0], limits[1]);
106: // String text = new String(chars, limits[0], limits[1]);
107: // lumps[lumpindex - 1].text = text;
108: }
109:
110: private void processDefaultTag(int token, XmlPullParser parser) {
111: justended = false;
112: CharWrap w = new CharWrap();
113: writeToken(token, parser, w);
114: XMLLump newlump = newLump(parser);
115: setLumpChars(newlump, w.storage, 0, w.size);
116: // lumps[lumpindex - 1].text = w.toString();
117: }
118:
119: private void checkContribute(String id, XMLLump headlump) {
120: if (id.startsWith(XMLLump.SCR_CONTRIBUTE_PREFIX)) {
121: String scr = id.substring(XMLLump.SCR_CONTRIBUTE_PREFIX
122: .length());
123: t.collectmap.addLump(scr, headlump);
124: }
125: }
126:
127: private void processTagStart(XmlPullParser parser, boolean isempty) {
128: if (justended) {
129: // avoid the pathological case where we have for example
130: // <td class="tmiblock1" rsf:id="tmiblock:"></td><td> which makes it
131: // hard to spot run ends on the basis of recursion uncession.
132: justended = false;
133: XMLLump backlump = newLump(parser);
134: backlump.nestingdepth--;
135: setLumpChars(backlump, null, 0, 0);
136: }
137: XMLLump headlump = newLump(parser);
138: XMLLump stacktop = getStackTop();
139: headlump.uplump = stacktop;
140:
141: if (t.roottagindex == -1)
142: t.roottagindex = headlump.lumpindex;
143: String tagname = parser.getName();
144: // standard text of |<tagname | to allow easy identification.
145: setLumpString(headlump, XMLLump.tagToText(tagname));
146: // HashMap forwardmap = new HashMap();
147: // headlump.forwardmap = forwardmap;
148: // current policy - every open tag gets a forwardmap, and separate lumps.
149: // eventually we only want a lump where there is an rsf:id.
150: int attrs = parser.getAttributeCount();
151: headlump.attributemap = new HashMap(attrs < 3 ? (attrs + 1) * 2
152: : attrs * 2);
153:
154: for (int i = 0; i < attrs; ++i) {
155: String attrname = parser.getAttributeName(i);
156: String attrvalue = parser.getAttributeValue(i);
157: headlump.attributemap.put(attrname, attrvalue);
158: }
159: try {
160: if (parseinterceptors != null) {
161: for (int i = 0; i < parseinterceptors.size(); ++i) {
162: TemplateParseInterceptor parseinterceptor = (TemplateParseInterceptor) parseinterceptors
163: .get(i);
164: parseinterceptor.adjustAttributes(tagname,
165: headlump.attributemap);
166: }
167: }
168: } catch (Exception e) {
169: throw UniversalRuntimeException.accumulate(e,
170: "Error processing tag " + headlump);
171: }
172: attrs = headlump.attributemap.size(); // TPI may have changed it
173: if (headlump.attributemap.isEmpty()) {
174: headlump.attributemap = null;
175: } else {
176: boolean firstattr = true;
177: for (Iterator keyit = headlump.attributemap.keySet()
178: .iterator(); keyit.hasNext();) {
179: String attrname = (String) keyit.next();
180: String attrvalue = (String) headlump.attributemap
181: .get(attrname);
182:
183: if (attrname.equals(XMLLump.ID_ATTRIBUTE)) {
184: --attrs; // reduce count which is kept for close tag accounting below
185: String ID = attrvalue;
186: if (ID.startsWith(XMLLump.FORID_PREFIX)
187: && ID.endsWith(XMLLump.FORID_SUFFIX)) {
188: ID = ID.substring(0, ID.length()
189: - XMLLump.FORID_SUFFIX.length());
190: }
191: checkContribute(ID, headlump);
192: headlump.rsfID = ID;
193:
194: XMLLump downreg = findTopContainer(ID);
195: if (downreg.downmap == null) {
196: downreg.downmap = new XMLLumpMMap(); // to handle payload-component case
197: }
198: downreg.downmap.addLump(ID, headlump);
199:
200: t.globalmap.addLump(ID, headlump);
201:
202: SplitID split = new SplitID(ID);
203:
204: if (split.prefix.equals(XMLLump.FORID_PREFIX)) {
205: // no special note, just prevent suffix logic.
206: }
207: // we need really to be able to locate 3 levels of id -
208: // for-message:message:to
209: // ideally we would also like to be able to locate repetition
210: // constructs too, hopefully the standard suffix-based computation
211: // will allow this. However we previously never allowed BOTH
212: // repetitious and non-repetitious constructs to share the same
213: // prefix, so revisit this to solve.
214: // }
215: else if (split.suffix != null) {
216: // a repetitive tag is found.
217: // Repetitions within a SCOPE should be UNIQUE and CONTIGUOUS.
218: //XMLLump prevlast = stacktop.getFinal(split.prefix);
219: stacktop.setFinal(split.prefix, headlump);
220: }
221: } else { // is not rsf:id attribute
222: XMLLump frontlump = newLump(parser);
223: CharWrap lumpac = new CharWrap();
224: if (!firstattr) {
225: lumpac.append("\" ");
226: }
227: firstattr = false;
228: lumpac.append(attrname).append("=\"");
229: setLumpChars(frontlump, lumpac.storage, 0,
230: lumpac.size);
231: // frontlump holds |" name="|
232: // valuelump just holds the value.
233:
234: XMLLump valuelump = newLump(parser);
235: setLumpString(valuelump, XMLUtil.encode(attrvalue));
236: }
237: } // end for each attribute
238: }
239: XMLLump finallump = newLump(parser);
240:
241: String closetext = attrs == 0 ? (isempty ? "/>" : ">")
242: : (isempty ? "\"/>" : "\">");
243: setLumpString(finallump, closetext);
244: headlump.open_end = finallump;
245:
246: tagstack.add(nestingdepth, headlump);
247: if (isempty) {
248: processTagEnd(parser);
249: }
250: }
251:
252: boolean justended;
253:
254: private void processTagEnd(XmlPullParser parser) {
255: // String tagname = parser.getName();
256: XMLLump oldtop = tagstack.lumpAt(nestingdepth);
257:
258: oldtop.close_tag = t.lumps[lumpindex - 1];
259: tagstack.remove(nestingdepth);
260: justended = true;
261: }
262:
263: private XMLLump getStackTop() {
264: XMLLump togo = (XMLLump) ListUtil.peek(tagstack);
265: return togo == null ? t.rootlump : togo;
266: }
267:
268: private XMLLump findTopContainer(String id) {
269: for (int i = tagstack.size() - 1; i >= 0; --i) {
270: XMLLump lump = tagstack.lumpAt(i);
271: if (lump.rsfID != null
272: && (id.equals(XMLLump.PAYLOAD_COMPONENT) || SplitID
273: .isSplit(lump.rsfID)))
274: return lump;
275: }
276: return t.rootlump;
277: }
278:
279: // temporary array for getCharacterText
280: private int[] limits = new int[2];
281: private int lumpindex = 0;
282: private int nestingdepth = 0;
283: // only stores repetitive tags.
284: private XMLLumpList tagstack = new XMLLumpList();
285:
286: private XMLLump newLump(XmlPullParser parser) {
287: if (lumpindex == t.lumps.length) {
288: t.lumps = (XMLLump[]) ArrayUtil.expand(t.lumps, 2.0);
289: }
290: XMLLump togo = new XMLLump(lumpindex, nestingdepth);
291: togo.line = parser.getLineNumber();
292: togo.column = parser.getColumnNumber();
293: togo.parent = t;
294: t.lumps[lumpindex] = togo;
295:
296: ++lumpindex;
297: return togo;
298: }
299:
300: // XPP tag depths:
301: // <!-- outside --> 0
302: // <root> 1
303: // sometext 1
304: // <foobar> 2
305: // </foobar> 2
306: // </root> 1
307: // <!-- outside --> 0
308:
309: public void init() {
310: t = new XMLViewTemplate();
311: t.lumps = new XMLLump[INITIAL_LUMP_SIZE];
312: buffer = new CharWrap(INITIAL_LUMP_SIZE * 10);
313: lumpindex = 0;
314: tagstack.clear();
315: t.rootlump = new XMLLump();
316: t.rootlump.downmap = new XMLLumpMMap();
317: t.rootlump.nestingdepth = -1;
318: t.rootlump.parent = t;
319: t.roottagindex = -1;
320: t.collectmap = new XMLLumpMMap();
321: justended = false;
322: }
323:
324: public ViewTemplate parse(InputStream xmlstream) {
325: // long time = System.currentTimeMillis();
326: init();
327: XmlPullParser parser = new MXParser();
328: try {
329: // parser.setFeature(FEATURE_XML_ROUNDTRIP, true);
330: parser.setInput(xmlstream, "UTF-8");
331: while (true) {
332: int token = parser.nextToken();
333: if (token == XmlPullParser.END_DOCUMENT)
334: break;
335: // currently 1 lump for each token - an optimisation would collapse
336: // provable irrelevant lumps. but watch out for end tags! Some might
337: // be fused, some not.
338: nestingdepth = parser.getDepth() - 1;
339:
340: switch (token) {
341: case XmlPullParser.START_TAG:
342: boolean isempty = parser.isEmptyElementTag();
343: processTagStart(parser, isempty);
344: if (isempty) {
345: parser.next();
346: }
347: break;
348: case XmlPullParser.END_TAG:
349: simpleTagText(parser);
350: processTagEnd(parser);
351: break;
352: default:
353: processDefaultTag(token, parser);
354: }
355: }
356:
357: } catch (Throwable t) {
358: throw UniversalRuntimeException.accumulate(t,
359: "Error parsing template");
360: }
361: endParse();
362: return t;
363: // Logger.log.info("Template parsed in " + (System.currentTimeMillis() -
364: // time) + "ms");
365: }
366:
367: private void endParse() {
368: t.lumps = (XMLLump[]) ArrayUtil.trim(t.lumps, lumpindex);
369: tagstack.clear();
370: char[] compacted = new char[buffer.size];
371: System.arraycopy(buffer.storage, 0, compacted, 0, buffer.size);
372: t.buffer = compacted;
373: buffer = null;
374: }
375:
376: }
|