001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: *
017: */
018:
019: /* $Id: HTMLHandler.java 473861 2006-11-12 03:51:14Z gregor $ */
020:
021: package org.apache.lenya.util;
022:
023: import java.util.ArrayList;
024:
025: import javax.swing.text.MutableAttributeSet;
026: import javax.swing.text.html.HTML;
027: import javax.swing.text.html.HTML.Tag;
028: import javax.swing.text.html.HTMLEditorKit.ParserCallback;
029:
030: /**
031: * HTML handler class
032: */
033: public class HTMLHandler extends ParserCallback {
034: private ArrayList img_src;
035: private ArrayList img_src_all;
036: private ArrayList a_href;
037: private ArrayList a_href_all;
038: private ArrayList link_href;
039: private ArrayList link_href_all;
040:
041: /**
042: * Creates a new HTMLHandler object.
043: */
044: public HTMLHandler() {
045: this .img_src_all = new ArrayList();
046: this .img_src = new ArrayList();
047: this .a_href_all = new ArrayList();
048: this .a_href = new ArrayList();
049: this .link_href_all = new ArrayList();
050: this .link_href = new ArrayList();
051: }
052:
053: /**
054: * Handle the start tag
055: * @param tag The tag
056: * @param attributes The set of attributes
057: * @param pos The position
058: */
059: public void handleStartTag(Tag tag, MutableAttributeSet attributes,
060: int pos) {
061: if (tag.equals(HTML.Tag.A)) {
062: String href = (String) attributes
063: .getAttribute(HTML.Attribute.HREF);
064:
065: if (href != null) {
066: this .a_href_all.add(href);
067:
068: if (!this .a_href.contains(href)) {
069: this .a_href.add(href);
070: }
071: }
072: }
073: }
074:
075: /**
076: * Handle a simple tag
077: * @param tag The tag
078: * @param attributes The set of attributes
079: * @param pos The position
080: */
081: public void handleSimpleTag(Tag tag,
082: MutableAttributeSet attributes, int pos) {
083: if (tag.equals(HTML.Tag.IMG)) {
084: String src = (String) attributes
085: .getAttribute(HTML.Attribute.SRC);
086:
087: if (src != null) {
088: this .img_src_all.add(src);
089:
090: if (!this .img_src.contains(src)) {
091: this .img_src.add(src);
092: }
093: }
094: }
095:
096: if (tag.equals(HTML.Tag.LINK)) {
097: String href = (String) attributes
098: .getAttribute(HTML.Attribute.HREF);
099:
100: if (href != null) {
101: this .link_href_all.add(href);
102:
103: if (!this .link_href.contains(href)) {
104: this .link_href.add(href);
105: }
106: }
107: }
108: }
109:
110: /**
111: * Get the list of src attributes for images
112: * @return The list of src attributes
113: */
114: public ArrayList getImageSrcs() {
115: return this .img_src;
116: }
117:
118: /**
119: * Get the list of src attributes for all images
120: * @return The list of src attributes
121: */
122: public ArrayList getAllImageSrcs() {
123: return this .img_src_all;
124: }
125:
126: /**
127: * Get a list of links
128: * @return The list of links
129: */
130: public ArrayList getLinkHRefs() {
131: return this .link_href;
132: }
133:
134: /**
135: * Get a list of all links
136: * @return The list of links
137: */
138: public ArrayList getAllLinkHRefs() {
139: return this .link_href_all;
140: }
141:
142: /**
143: * Get a list of a href=
144: * @return The list of a href
145: */
146: public ArrayList getAHRefs() {
147: return this .a_href;
148: }
149:
150: /**
151: * Get a list of all a href=
152: * @return The list of a href
153: */
154: public ArrayList getAllAHRefs() {
155: return this.a_href_all;
156: }
157: }
|