001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package javax.swing.text.html.parser;
018:
019: import java.io.Serializable;
020: import java.util.ArrayList;
021: import java.util.LinkedList;
022: import java.util.List;
023: import java.util.Vector;
024:
025: /**
026: *
027: * {@link Element}s content representation. That's unary or binary expression.
028: * Operands can be null (matched with null object), instance of {@link Element},
029: * instance of {@link ContentModel}.
030: * <p>
031: * Valid operations can be unary operations (types):
032: * <ol>
033: * <li> '+' - (e+) - e must occur one or more times;
034: * <li> '*' - (e*) - e must occur zero or more times;
035: * <li> '?' - (e?) - e must occur zero or one time;
036: * <li> (0) - (e) - e must occur one time only
037: * </ol>
038: * <p>
039: * and binary operations (types):
040: * <ol>
041: * <li> '|' - (e1|e2) means either e1 or e2 must occur, but not both;
042: * <li> ',' - (e1,e2) means both A and B must occur, in that order;
043: * <li> '&' - (e1 & e2) means both e1 and e2 must occur, in any order;
044: * </ol>
045: * (Operation interpretation corresponds to HTML 4.01 Specification (3.3.3))
046: * <p>
047: * As content model is using for dtd presentation here is some ambiguity what
048: * null object can be matched with. So null shouldn't be passed to constructor.
049: * <p>
050: * No recursion is allowed.
051: * <p>
052: * Content, next, type fields has the following limitation:
053: * <ol>
054: * <li> if type is one from {'+', '*', '?'} content hasn't to be null and next
055: * can be not null, if a binary operation is applyed to them;
056: * <li> if type is one from {'|', ',', '&'} content hasn't to be null and next
057: * must be null;
058: * <li> content can be null, instance of Element or instance of ContentModel;
059: * <li> type can be one from the following '*', '+', '?', '|', '&', ','.
060: * </ol>
061: *
062: * <p>
063: * The structure of a {@link ContentModel} is represented by a relation through
064: * its {@link ContentModel#content} and {@link ContentModel#next} fields. Using
065: * these fields and the information stored in the {@link ContentModel#type}
066: * field a {@link ContentModel} can be represented as a binary tree.
067: * <p>
068: * From now on, in the examples that will follow, we will consider the left
069: * branch and the one belonging to the {@link ContentModel#content} field and
070: * the right branch the {@link ContentModel#next} field.
071: * <p>
072: * Depending on the {@link ContentModel#type} of a {@link ContentModel}, the
073: * following cases may arise:
074: * <p>
075: * <b>CASE 1: A binary relation over some {@link ContentModel}s:</b>
076: *
077: * <pre>
078: * B
079: * / \
080: * C1 NULL
081: * / \
082: * C2
083: * / \
084: * ...
085: * / \
086: * Cn
087: * / \
088: * NULL
089: * </pre>
090: *
091: * Where the binary operation <b>B</b> is applyed to all the
092: * {@link ContentModel}s C1, C2, ..., Cn (that is a sequence of
093: * {@link ContentModel} related by the {@link ContentModel#next} field,
094: * finishing in a null value). This means that this {@link ContentModel}
095: * represents the content model:
096: *
097: * <pre>
098: * (C1 B C2 B ... B Cn)
099: * </pre>
100: *
101: * Here, obviously the <b>B</b> operator can be one of:
102: * <ul>
103: * <li> |
104: * <li> &
105: * <li> ,
106: * </ul>
107: *
108: * <b>CASE 2: A unary relation applied to one {@link ContentModel}</b>
109: *
110: * <pre>
111: * U
112: * / \
113: * C1 NULL
114: * </pre>
115: *
116: * Where the unary operator <b>U</b> is only applyed to the
117: * {@link ContentModel} C1. This means that this {@link ContentModel} represents
118: * the content model:
119: *
120: * <pre>
121: * C1 U
122: * </pre>
123: *
124: * Here, obviously the <b>U</b> operator can be one of:
125: * <ul>
126: * <li> +
127: * <li> *
128: * <li> ?
129: * <li> 0
130: * </ul>
131: *
132: * <b>CASE 3: An element</b>
133: *
134: * <pre>
135: * ELEM
136: * </pre>
137: *
138: * Where this is only an instance of a {@link Element} class and obviosly
139: * denotes a {@link Element} of the {@link ContentModel}. An important fact to
140: * remark is that a {@link ContentModel} may not be just an {@link Element}, it
141: * must be applyed to at least one unary operator, usually the 0 operator.
142: * <p>
143: * This means that if we want to represent the <code>body</code>
144: * {@link ContentModel}, the {@link ContentModel} will be denoted by:
145: *
146: * <pre>
147: * 0
148: * +-----+-----+
149: * | |
150: * BODY NULL
151: * </pre>
152: *
153: * <b>CASE 4: A null value</b>
154: *
155: * <pre>
156: * NULL
157: * </pre>
158: *
159: * The empty or null {@link ContentModel} is denoted by this value. It is also
160: * used to denote the end of a sequence of {@link ContentModel} (as seen in the
161: * CASE 1).
162: *
163: * <p>
164: * As an example, if we want to represent the content model denoted by the
165: * expression:
166: *
167: * <pre>
168: * ((E1? , E2)* & E3)+
169: * </pre>
170: *
171: * The {@link ContentModel} will be denoted by:
172: *
173: * <pre>
174: *
175: *
176: * '+'
177: * +---------+---------+
178: * | |
179: * '&' NULL
180: * +---------+---------+
181: * | |
182: * '*' NULL
183: * +---------+---------+
184: * | |
185: * '|' '0'
186: * +------+------+ +------+------+
187: * | | | |
188: * '?' NULL E4 NULL
189: * +---------+---------+
190: * | |
191: * '0' '0'
192: * +------+------+ +------+------+
193: * | | | |
194: * E1 NULL E2 '+'
195: * +------+------+
196: * | |
197: * '0' NULL
198: * +-----+-----+
199: * | |
200: * E3 NULL
201: * </pre>
202: *
203: */
204:
205: public final class ContentModel implements Serializable {
206:
207: /**
208: * The serialization UID value.
209: */
210: static final long serialVersionUID = -1130825523866321257L;
211:
212: /**
213: * The type of the content model. It should be '*', '+', '?', '|', '&', ','
214: * or 0.
215: */
216: public int type;
217:
218: /**
219: * The content of the ContentModel.
220: */
221: public Object content;
222:
223: /**
224: * The next ContentModel in the ContentModel structure.
225: */
226: public ContentModel next;
227:
228: /**
229: * The symbols representing an obligatory single occurence of an expression.
230: */
231: private static final char DEFAULT_TYPE = 0;
232:
233: /**
234: * The symbols representing that an expression must occur at least one time.
235: */
236: private static final char PLUS_TYPE = '+';
237:
238: /**
239: * The symbols representing that an expression can occur zero or more times.
240: */
241: private static final char STAR_TYPE = '*';
242:
243: /**
244: * The symbols representing that an expression must occur zero or one time.
245: */
246: private static final char QUESTION_TYPE = '?';
247:
248: /**
249: * The symbols representing that any of the expressions related by this
250: * operator must occur.
251: */
252: private static final char LINE_TYPE = '|';
253:
254: /**
255: * The symbols representing that all the expressions in the given order must
256: * occur.
257: */
258: private static final char COMMA_TYPE = ',';
259:
260: /**
261: * The symbols representing that all the expressions in any order must
262: * occur.
263: */
264: private static final char AMPER_TYPE = '&';
265:
266: /**
267: * Returns a new {@link ContentModel}. The {@link ContentModel#type},
268: * {@link ContentModel#content} and {@link ContentModel#next} fields are
269: * filled with the information given as argument.
270: */
271: public ContentModel(final int type, final Object content,
272: final ContentModel next) {
273: this .type = type;
274: this .content = content;
275: this .next = next;
276: }
277:
278: /**
279: * Returns a new {@link ContentModel}. The {@link ContentModel#type} and
280: * {ContentModel#content} fields are filled with the information given
281: * throough the arguments. The {@link ContentModel#next} field is set to
282: * null.
283: */
284: public ContentModel(final int type, final ContentModel content) {
285: this .type = type;
286: this .content = content;
287: }
288:
289: /**
290: * That content model will be mathed with exactly one element. Type will be
291: * 0. {@link Element} can be equals to null. In such case
292: * {@link ContentModel} will be matched with an empty input stream.
293: */
294: public ContentModel(final Element content) {
295: this .content = content;
296: }
297:
298: /**
299: * Returns a {@link ContentModel} with its {@link ContentModel#type} field
300: * set to 0 and its {@link ContentModel#content} and
301: * {@link ContentModel#next} fields set to null.
302: */
303: public ContentModel() {
304: }
305:
306: /**
307: * Returns a representation of the {@link ContentModel} converted to a
308: * string.
309: *
310: * @return a String representing the {@link ContentModel}
311: */
312: public String toString() {
313: String str = new String();
314:
315: try {
316: if (type == DEFAULT_TYPE && content instanceof Element) {
317: str = str + ((Element) content).getName();
318: } else {
319: if (type == PLUS_TYPE || type == STAR_TYPE
320: || type == QUESTION_TYPE) {
321: str = content + String.valueOf((char) type);
322: } else if (isBinaryOperator(type)) {
323: ContentModel auxModel = (ContentModel) content;
324: while (auxModel != null) {
325: str = str + auxModel;
326: if (auxModel.next != null) {
327: str = str + " "
328: + String.valueOf((char) type) + " ";
329: }
330: auxModel = auxModel.next;
331: }
332: str = "(" + str + ")";
333: } else {
334: str = content.toString();
335: }
336: }
337: return str;
338: } catch (ClassCastException e) {
339: throw new ClassCastException(content.getClass().getName());
340: }
341: }
342:
343: private boolean isBinaryOperator(final int type) {
344: return (type == COMMA_TYPE || type == LINE_TYPE || type == AMPER_TYPE);
345: }
346:
347: /**
348: * Returns the {@link Element} that must be first in the
349: * {@link ContentModel}.
350: *
351: * @return The first element that may appear in the {@link ContentModel}.
352: * Null if there is more than one possible {@link Element}.
353: */
354: public Element first() {
355: Element element;
356:
357: try {
358: if (type == STAR_TYPE || type == QUESTION_TYPE
359: || type == LINE_TYPE || type == AMPER_TYPE) {
360: element = null;
361: } else if (type == PLUS_TYPE || type == COMMA_TYPE) {
362: element = ((ContentModel) content).first();
363: } else {
364: element = (Element) content;
365: }
366: return element;
367: } catch (ClassCastException e) {
368: throw new ClassCastException(content.getClass().getName());
369: }
370: }
371:
372: /**
373: * Returns if a given token can occur as first elements in a
374: * {@link ContentModel}
375: *
376: * @param token
377: * the element we are interested in determining whether it can
378: * occur as the first element of a {@link ContentModel}
379: *
380: * @return
381: * <ul>
382: * <li> if type equals to 0, returns true if and only if token equals to
383: * content.
384: * <li> if type is one from the unary types returns true if and only if one
385: * of the following conditions is true:
386: * <ol>
387: * <li> content is instance of {@link Element}, token is instance of
388: * {@link Element} and token equals to content
389: * <li> content is instance of {@link ContentModel}, token is instance of
390: * {@link Element} and content.first(token) returns true;
391: * </ol>
392: * <li> if type is one from binary types then:
393: * <ol>
394: * <li> if content instance of {@link Element} and content equals to token
395: * returns true;
396: * <li> if content instance of {@link ContentModel} and content.first(token)
397: * equals to true, then returns true;
398: * <li> if type equals to ',' returns true if and only if content is
399: * instance of {@link ContentModel} and:
400: * <ul>
401: * <li> for at least one of the {@link ContentModel} related by the ',',
402: * first(token) is true and,
403: * <li> for all the {@link ContentModel}s that preceded it, empty() is
404: * true.
405: * </ul>
406: * <li> if type equals to '| or '&', it returns true if and only if at least
407: * one of {@link ContentModel}s related by the '|' or '&' operator
408: * satisfies that first(token) is true.
409: * </ol>
410: * </ul>
411: */
412: public boolean first(Object token) {
413: boolean found = false;
414: boolean maybeNext = true;
415: ContentModel auxModel;
416:
417: if (type == COMMA_TYPE) {
418: auxModel = (ContentModel) content;
419: while (auxModel != null && maybeNext) {
420: found = auxModel.first(token);
421: maybeNext = !found && auxModel.empty();
422: auxModel = auxModel.next;
423: }
424: } else if (type == LINE_TYPE || type == AMPER_TYPE) {
425: found = ((Element) token).equals(content);
426: auxModel = (ContentModel) content;
427: while (auxModel != null && !found) {
428: found = token.equals(content)
429: || auxModel.first((Element) token);
430: auxModel = auxModel.next;
431: }
432:
433: } else if (type == PLUS_TYPE || type == STAR_TYPE
434: || type == QUESTION_TYPE) {
435: found = ((ContentModel) content).first(token);
436: } else {
437: found = content == token;
438: }
439:
440: return found;
441: }
442:
443: /**
444: * Adds all elements of this contentModel to elemVec ignoring operations
445: * between elements. For instance, for ((a+)| ((b*),(c?))) elements a,b,c
446: * will be added to the elemVec. The argument elemVec should not be null.
447: * If content is null, nothing will be added to elemVec.
448: *
449: * @param elemVec
450: * the vector where the {@link Element}s of the
451: * {@link ContentModel} will be added to.
452: *
453: */
454: public void getElements(final Vector<Element> elemVec) {
455: try {
456:
457: if (type == LINE_TYPE || type == AMPER_TYPE
458: || type == COMMA_TYPE) {
459: ContentModel auxModel = (ContentModel) content;
460: while (auxModel != null) {
461: auxModel.getElements(elemVec);
462: auxModel = auxModel.next;
463: }
464: } else if (type == PLUS_TYPE || type == STAR_TYPE
465: || type == QUESTION_TYPE) {
466: ((ContentModel) content).getElements(elemVec);
467: } else {
468: elemVec.add((Element) content);
469: }
470:
471: } catch (ClassCastException e) {
472: throw new ClassCastException(content.getClass().getName());
473: }
474: }
475:
476: /**
477: * Checks if the {@link ContentModel} can match an empty expression.
478: *
479: * @return true if and only if some of the conditions is true:
480: * <ul>
481: * <li> type equals to '*' or '?';
482: * <li> type equals to '|' and one of the {@link ContentModel}s
483: * related by the binary operator can be empty.
484: * <li> type equals to '&' or ',' and all the {@link ContentModel}s
485: * related by the binary operation can be empty.
486: * </ul>
487: * <p>
488: * If the type equals '+', then it returns true if the
489: * {@link ContentModel} applied to this operator can be empty;
490: * <p>
491: * If the type equals '0' then it returns false.
492: */
493: public boolean empty() {
494: boolean found = false;
495: ContentModel auxModel;
496:
497: try {
498: if (type == PLUS_TYPE || type == LINE_TYPE) {
499: auxModel = (ContentModel) content;
500: while (auxModel != null && !found) {
501: found = auxModel.empty();
502: auxModel = auxModel.next;
503: }
504: } else if (type == AMPER_TYPE || type == COMMA_TYPE) {
505: auxModel = (ContentModel) content;
506: found = true;
507: while (auxModel != null && found) {
508: found = auxModel.empty();
509: auxModel = auxModel.next;
510: }
511: } else {
512: found = (type == STAR_TYPE || type == QUESTION_TYPE);
513: }
514: return found;
515: } catch (ClassCastException e) {
516: throw new ClassCastException(content.getClass().getName());
517: }
518: }
519:
520: /**
521: * Determines the sequence of {@link Element} needed to be implied to
522: * insert an {@link Element}.
523: *
524: * @param e
525: * the {@link Element} found in the document are for which the
526: * implication is needed.
527: * @param parsed
528: * the {@link ArrayList} of {@link Element}s found previosly to
529: * the {@link Element} e.
530: * @param many
531: * a value specifyng whether the given {@link Element} may appear
532: * more than once in the {@link ContentModel}
533: * @param depth
534: * the depth level been searched in the {@link ContentModel}
535: * @param path
536: * a possible path of implied {@link Element}s that may leed to
537: * the {@link Element} e.
538: * @return
539: * <ol>
540: * <li> null, if an implication path to the element e could not be found.
541: * <li> an empty {@link List} if the element may be inserted
542: * in the actual position, with no implication of other elements needed.
543: * <li> a non empty {@link List} if some {@link Element}s need to be implied.
544: * In such case, the {@link List} is formed by a pair. The first
545: * component defines the {@link Element} needed to be implied. The
546: * second component defines if the {@link Element} must be opened and
547: * closed (if true) or just opened (false).
548: * </ol>
549: */
550: List<Pair<Element, Boolean>> implication(final Element e,
551: final List<Element> parsed, final boolean many,
552: final int depth,
553: final LinkedList<Pair<Element, Boolean>> path) {
554:
555: ContentModel auxModel;
556: List<Pair<Element, Boolean>> implied = null;
557:
558: if (content instanceof Element) {
559: Element currentElement = (Element) content;
560: if (e.equals(currentElement)
561: && (!parsed.contains(e) || many)) {
562: implied = new LinkedList<Pair<Element, Boolean>>(path);
563: } else if (currentElement.inclusions != null
564: && e.getIndex() > 0
565: && currentElement.inclusions.get(e.getIndex())) {
566: implied = new LinkedList<Pair<Element, Boolean>>(path);
567: implied.add(new Pair<Element, Boolean>(currentElement,
568: Boolean.FALSE));
569: } else if (depth > 1
570: && !currentElement.hasRequiredAttributes()
571: && !currentElement.isEmpty()
572: && !currentElement.isScript()
573: && !parsed.contains(currentElement)
574: && currentElement.getContent() != null) {
575: LinkedList<Pair<Element, Boolean>> newPath = (LinkedList) path
576: .clone();
577: newPath.add(new Pair<Element, Boolean>(currentElement,
578: new Boolean(false)));
579: implied = currentElement.getContent().implication(e,
580: parsed, many, depth - 1, newPath);
581: }
582: } else if (type == STAR_TYPE || type == PLUS_TYPE) {
583: if (content != null) {
584: implied = ((ContentModel) content).implication(e,
585: parsed, true, depth, path);
586: }
587: } else if (type == COMMA_TYPE) {
588: auxModel = (ContentModel) content;
589: while (auxModel != null && implied == null) {
590: implied = auxModel.implication(e, parsed, many, depth,
591: path);
592: if (implied == null && !auxModel.empty()) {
593: path.add(new Pair<Element, Boolean>(auxModel
594: .first(), new Boolean(true)));
595: }
596: auxModel = auxModel.next;
597: }
598: } else if (type == LINE_TYPE || type == AMPER_TYPE) {
599: auxModel = (ContentModel) content;
600: while (auxModel != null && implied == null) {
601: implied = auxModel.implication(e, parsed, many, depth,
602: path);
603: auxModel = auxModel.next;
604: }
605: } else {
606: if (content != null) {
607: implied = ((ContentModel) content).implication(e,
608: parsed, many, depth, path);
609: }
610: }
611:
612: return implied;
613: }
614: }
|