001: // Jericho HTML Parser - Java based library for analysing and manipulating HTML
002: // Version 2.5
003: // Copyright (C) 2007 Martin Jericho
004: // http://jerichohtml.sourceforge.net/
005: //
006: // This library is free software; you can redistribute it and/or
007: // modify it under the terms of either one of the following licences:
008: //
009: // 1. The Eclipse Public License (EPL) version 1.0,
010: // included in this distribution in the file licence-epl-1.0.html
011: // or available at http://www.eclipse.org/legal/epl-v10.html
012: //
013: // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
014: // included in this distribution in the file licence-lgpl-2.1.txt
015: // or available at http://www.gnu.org/licenses/lgpl.txt
016: //
017: // This library is distributed on an "AS IS" basis,
018: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
019: // See the individual licence texts for more details.
020:
021: package au.id.jericho.lib.html;
022:
023: import java.util.*;
024:
025: /**
026: * Represents a cached map of character positions to tags.
027: * The allTagTypesSubCache object is used to cache all tags.
028: * Additional subcaches are used to cache single tag types, which increases the performance when searching for those tag types.
029: * A list of tag types to be cached separately is specified in the SeparatelyCachedTagTypes property.
030: * The standard implementation caches only COMMENT tag types separately, as these tag types are searched extensively
031: * in the process of ensuring that every non-server tag is not located inside a comment.
032: */
033: final class Cache {
034: public final Source source;
035: private final SubCache allTagTypesSubCache;
036: private final SubCache[] subCaches; // contains allTagTypesSubCache plus a SubCache object for each separately cached tag type
037:
038: public Cache(final Source source) {
039: this .source = source;
040: allTagTypesSubCache = new SubCache(this , null);
041: TagType[] separatelyCachedTagTypes = getSeparatelyCachedTagTypes();
042: subCaches = new SubCache[separatelyCachedTagTypes.length + 1];
043: subCaches[0] = allTagTypesSubCache;
044: for (int i = 0; i < separatelyCachedTagTypes.length; i++)
045: subCaches[i + 1] = new SubCache(this ,
046: separatelyCachedTagTypes[i]);
047: }
048:
049: public void clear() {
050: for (int i = 0; i < subCaches.length; i++)
051: subCaches[i].clear();
052: }
053:
054: public Tag getTagAt(final int pos) {
055: return source.useAllTypesCache ? allTagTypesSubCache
056: .getTagAt(pos) : Tag.getTagAtUncached(source, pos);
057: }
058:
059: public Tag findPreviousOrNextTag(final int pos,
060: final boolean previous) {
061: // returns null if pos is out of range.
062: return allTagTypesSubCache.findPreviousOrNextTag(pos, previous);
063: }
064:
065: public Tag findPreviousOrNextTag(final int pos,
066: final TagType tagType, final boolean previous) {
067: // returns null if pos is out of range.
068: for (int i = source.useAllTypesCache ? 0 : 1; i < subCaches.length; i++)
069: if (tagType == subCaches[i].tagType)
070: return subCaches[i]
071: .findPreviousOrNextTag(pos, previous);
072: return Tag.findPreviousOrNextTagUncached(source, pos, tagType,
073: previous, ParseText.NO_BREAK);
074: }
075:
076: public Tag addTagAt(final int pos) {
077: final Tag tag = Tag.getTagAtUncached(source, pos);
078: allTagTypesSubCache.addTagAt(pos, tag);
079: if (tag == null)
080: return tag;
081: final TagType tagType = tag.getTagType();
082: for (int i = 1; i < subCaches.length; i++) {
083: if (tagType == subCaches[i].tagType) {
084: subCaches[i].addTagAt(pos, tag);
085: return tag;
086: }
087: }
088: return tag;
089: }
090:
091: public int getTagCount() {
092: return allTagTypesSubCache.size() - 2;
093: }
094:
095: public Iterator getTagIterator() {
096: return allTagTypesSubCache.getTagIterator();
097: }
098:
099: public void loadAllTags(final List tags,
100: final Tag[] allRegisteredTags,
101: final StartTag[] allRegisteredStartTags) {
102: // assumes the tags list implements RandomAccess
103: final int tagCount = tags.size();
104: allTagTypesSubCache.bulkLoad_Init(tagCount);
105: int registeredTagIndex = 0;
106: int registeredStartTagIndex = 0;
107: for (int i = 0; i < tagCount; i++) {
108: Tag tag = (Tag) tags.get(i);
109: if (!tag.isUnregistered()) {
110: allRegisteredTags[registeredTagIndex++] = tag;
111: if (tag instanceof StartTag)
112: allRegisteredStartTags[registeredStartTagIndex++] = (StartTag) tag;
113: }
114: allTagTypesSubCache.bulkLoad_Set(i, tag);
115: for (int x = 1; x < subCaches.length; x++) {
116: if (tag.getTagType() == subCaches[x].tagType) {
117: subCaches[x].bulkLoad_AddToTypeSpecificCache(tag);
118: break;
119: }
120: }
121: }
122: for (int x = 1; x < subCaches.length; x++)
123: subCaches[x].bulkLoad_FinaliseTypeSpecificCache();
124: }
125:
126: public String toString() {
127: StringBuffer sb = new StringBuffer();
128: for (int i = 0; i < subCaches.length; i++)
129: subCaches[i].appendTo(sb);
130: return sb.toString();
131: }
132:
133: protected int getSourceLength() {
134: return source.end;
135: }
136:
137: private static TagType[] getSeparatelyCachedTagTypes() {
138: return TagType.getTagTypesIgnoringEnclosedMarkup();
139: }
140: }
|