001: /**
002: * Copyright (c) 2000-2008 Liferay, Inc. All rights reserved.
003: *
004: * Permission is hereby granted, free of charge, to any person obtaining a copy
005: * of this software and associated documentation files (the "Software"), to deal
006: * in the Software without restriction, including without limitation the rights
007: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
008: * copies of the Software, and to permit persons to whom the Software is
009: * furnished to do so, subject to the following conditions:
010: *
011: * The above copyright notice and this permission notice shall be included in
012: * all copies or substantial portions of the Software.
013: *
014: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
017: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
019: * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
020: * SOFTWARE.
021: */package com.liferay.util;
022:
023: import java.io.IOException;
024: import java.io.Reader;
025:
026: import java.util.ArrayList;
027: import java.util.List;
028:
029: import javax.swing.text.MutableAttributeSet;
030: import javax.swing.text.html.HTML;
031: import javax.swing.text.html.HTMLEditorKit;
032:
033: /**
034: * <a href="HTMLParser.java.html"><b><i>View Source</i></b></a>
035: *
036: * @author Brian Wing Shun Chan
037: *
038: */
039: public class HTMLParser {
040:
041: public HTMLParser(Reader reader) throws IOException {
042: HTMLEditorKit.Parser parser = new DefaultParser().getParser();
043:
044: parser.parse(reader, new HTMLCallback(), true);
045: }
046:
047: public List getImages() {
048: return _images;
049: }
050:
051: public List getLinks() {
052: return _links;
053: }
054:
055: private List _images = new ArrayList();
056: private List _links = new ArrayList();
057:
058: private class DefaultParser extends HTMLEditorKit {
059:
060: public HTMLEditorKit.Parser getParser() {
061: return super .getParser();
062: }
063:
064: }
065:
066: private class HTMLCallback extends HTMLEditorKit.ParserCallback {
067:
068: public void handleText(char[] data, int pos) {
069: }
070:
071: public void handleStartTag(HTML.Tag tag,
072: MutableAttributeSet attributes, int pos) {
073:
074: if (tag.equals(HTML.Tag.A)) {
075: String href = (String) attributes
076: .getAttribute(HTML.Attribute.HREF);
077:
078: if (href != null) {
079: _links.add(href);
080: }
081: } else if (tag.equals(HTML.Tag.IMG)) {
082: String src = (String) attributes
083: .getAttribute(HTML.Attribute.SRC);
084:
085: if (src != null) {
086: _images.add(src);
087: }
088: }
089: }
090:
091: public void handleEndTag(HTML.Tag tag, int pos) {
092: }
093:
094: public void handleSimpleTag(HTML.Tag tag,
095: MutableAttributeSet attributes, int pos) {
096:
097: if (tag.equals(HTML.Tag.A)) {
098: String href = (String) attributes
099: .getAttribute(HTML.Attribute.HREF);
100:
101: if (href != null) {
102: _links.add(href);
103: }
104: } else if (tag.equals(HTML.Tag.IMG)) {
105: String src = (String) attributes
106: .getAttribute(HTML.Attribute.SRC);
107:
108: if (src != null) {
109: _images.add(src);
110: }
111: }
112: }
113:
114: public void handleComment(char[] data, int pos) {
115: }
116:
117: public void handleError(String errorMsg, int pos) {
118: }
119:
120: }
121:
122: }
|