001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.lib.collab.util;
043:
044: import java.io.*;
045: import java.net.*;
046: import java.util.*;
047: import javax.swing.text.*;
048: import javax.swing.text.html.*;
049:
050: /**
051: * Html Document Loader for optimized parsing and loading of Html files and url's
052: * TODO -- need to thread loading of document
053: *
054: */
055: public class HTMLDocumentLoader {
056:
057: static public ImageDictionary _dic = new ImageDictionary();
058: static final String IMAGE_CACHE_PROPERTY = "imageCache";
059:
060: public HTMLDocument loadDocument(HTMLDocument doc, URL url,
061: String charSet) throws IOException {
062: doc.putProperty(Document.StreamDescriptionProperty, url);
063:
064: InputStream in = null;
065: boolean ignoreCharSet = false;
066:
067: for (;;) {
068: try {
069: doc.remove(0, doc.getLength());
070: URLConnection urlc = url.openConnection();
071: in = urlc.getInputStream();
072: Reader reader = (charSet == null) ? new InputStreamReader(
073: in)
074: : new InputStreamReader(in, charSet);
075:
076: HTMLEditorKit.Parser parser = getParser();
077: HTMLEditorKit.ParserCallback htmlReader = getParserCallback(doc);
078: parser.parse(reader, htmlReader, ignoreCharSet);
079: htmlReader.flush();
080: in.close();
081: break;
082: } catch (BadLocationException ex) {
083: throw new IOException(ex.getMessage());
084: } catch (ChangedCharSetException e) {
085: // The character set has changed - restart
086: charSet = getNewCharSet(e);
087: // Prevent recursion by suppressing further exceptions
088: ignoreCharSet = true;
089: in.close();
090: }
091: }
092:
093: return doc;
094: }
095:
096: /**
097: *
098: *
099: * @param
100: */
101: public void setImageCache(ImageDictionary d) {
102: _dic = d;
103: }
104:
105: /**
106: *
107: *
108: * @param
109: */
110: public HTMLDocument loadDocument(URL url, String charSet)
111: throws IOException {
112: return loadDocument((HTMLDocument) kit.createDefaultDocument(),
113: url, charSet);
114: }
115:
116: /**
117: * Returns a new HtmlDoc with the given URL inserted and rendered into the new doc
118: * @param URL url
119: */
120: public HTMLDocument loadDocument(URL url) throws IOException {
121: return loadDocument(url, null);
122: }
123:
124: /**
125: * Returns a new HtmlDoc with the given text inserted and rendered into the new doc
126: * @param String text
127: */
128: public HTMLDocument loadDocument(String text) throws IOException {
129: return loadDocument((HTMLDocument) kit.createDefaultDocument(),
130: text, true);
131: }
132:
133: /**
134: * Returns a new HtmlDoc with the given text inserted and rendered into the given doc
135: * if the boolean parameter use_cache == true, any images will be flushed if reloaded
136: * @param HTMLDocument doc, String text, boolean use_cache
137: */
138: public HTMLDocument loadDocument(HTMLDocument doc, String text,
139: boolean use_cache) throws IOException {
140: if (use_cache && _dic != null)
141: doc.putProperty(IMAGE_CACHE_PROPERTY, _dic);
142:
143: boolean ignoreCharSet = false;
144: for (;;) {
145: try {
146: StringReader in = new StringReader(text);
147: BufferedReader bufin = new BufferedReader(in);
148: HTMLEditorKit.Parser parser = getParser();
149: HTMLEditorKit.ParserCallback htmlReader = getParserCallback(doc);
150: parser.parse(bufin, htmlReader, ignoreCharSet);
151: htmlReader.flush();
152: bufin.close();
153: in.close();
154: break;
155: } catch (BadLocationException ex) {
156: throw new IOException(ex.getMessage());
157: } catch (ChangedCharSetException e) {
158: System.out.println("loadDocument:" + e);
159: // The character set has changed - restart
160: //charSet = getNewCharSet(e);
161: // Prevent recursion by suppressing further exceptions
162: //Aqueel - 550108
163: ignoreCharSet = true;
164: // Close original input stream
165: //in.close();
166: }
167: }
168:
169: return doc;
170: }
171:
172: /**
173: * Inserts a string of HTML into the document at the given position.
174: * parent is used to identify the tag to look for in
175: * html (unless insertTag, in which case it
176: * is used). If parent is a leaf this can have
177: * unexpected results.
178: */
179: public void insertHTML(HTMLDocument doc, Element parent,
180: int offset, String html, HTML.Tag insertTag)
181: throws BadLocationException, IOException {
182: if (parent != null && html != null) {
183: // Determine the tag we are to look for in html.
184: Object name = (insertTag != null) ? insertTag : parent
185: .getAttributes().getAttribute(
186: StyleConstants.NameAttribute);
187: HTMLEditorKit.Parser parser = getParser();
188:
189: if (parser != null && name != null
190: && (name instanceof HTML.Tag)) {
191: int lastOffset = Math.max(0, offset - 1);
192: Element charElement = doc
193: .getCharacterElement(lastOffset);
194: Element commonParent = parent;
195: int pop = 0;
196: int push = 0;
197:
198: if (parent.getStartOffset() > lastOffset) {
199: while (commonParent != null
200: && commonParent.getStartOffset() > lastOffset) {
201: commonParent = commonParent.getParentElement();
202: push++;
203: }
204: if (commonParent == null) {
205: throw new BadLocationException(
206: "No common parent", offset);
207: }
208: }
209: while (charElement != null
210: && charElement != commonParent) {
211: pop++;
212: charElement = charElement.getParentElement();
213: }
214: if (charElement != null) {
215: // Found it, do the insert.
216:
217: HTMLEditorKit.ParserCallback callback = doc
218: .getReader(offset, pop - 1, push,
219: (HTML.Tag) name);
220: //(insertTag != null));
221:
222: parser
223: .parse(new StringReader(html), callback,
224: true);
225: callback.flush();
226: }
227: }
228: }
229: }
230:
231: /*public void insertHTML(HTMLDocument doc, Element parent, int offset, String html,
232: boolean wantsTrailingNewline)
233: throws BadLocationException, IOException {
234: if (parent != null && html != null) {
235: HTMLEditorKit.Parser parser = getParser();
236: if (parser != null) {
237: int lastOffset = Math.max(0, offset - 1);
238: Element charElement = doc.getCharacterElement(lastOffset);
239: Element commonParent = parent;
240: int pop = 0;
241: int push = 0;
242:
243: if (parent.getStartOffset() > lastOffset) {
244: while (commonParent != null &&
245: commonParent.getStartOffset() > lastOffset) {
246: commonParent = commonParent.getParentElement();
247: push++;
248: }
249: if (commonParent == null) {
250: throw new BadLocationException("No common parent",
251: offset);
252: }
253: }
254: while (charElement != null && charElement != commonParent) {
255: pop++;
256: charElement = charElement.getParentElement();
257: }
258: if (charElement != null) {
259: // Found it, do the insert.
260: HTMLReader reader = doc.getReader(offset, pop - 1, push, null);
261: HTMLReader reader = new HTMLReader(offset, pop - 1, push,
262: null, false, true,
263: wantsTrailingNewline);
264:
265: parser.parse(new StringReader(html), reader, true);
266: reader.flush();
267: }
268: }
269: }
270: }
271: */
272:
273: // Methods that allow customization of the parser and the callback
274: public synchronized HTMLEditorKit.Parser getParser() {
275: if (parser == null) {
276: try {
277: Class c = Class
278: .forName("javax.swing.text.html.parser.ParserDelegator");
279: parser = (HTMLEditorKit.Parser) c.newInstance();
280: } catch (Throwable e) {
281: }
282: }
283: return parser;
284: }
285:
286: /**
287: * Returns the parser callback for a given html doc
288: * @param HTMLDocument doc
289: */
290: public synchronized HTMLEditorKit.ParserCallback getParserCallback(
291: HTMLDocument doc) {
292: return doc.getReader(0);
293: }
294:
295: /**
296: * Returns a String id for a new Character Set
297: * If none is found, a guess ("8859_1") will be returned
298: * The event contains the content type
299: * plus ";" plus qualifiers which may
300: * contain a "charset" directive.
301: * @param ChangedCharSetException e
302: */
303: protected String getNewCharSet(ChangedCharSetException e) {
304: String spec = e.getCharSetSpec();
305: if (e.keyEqualsCharSet())
306: return spec;
307:
308: //First remove the content type.
309: int index = spec.indexOf(";");
310: if (index != -1) {
311: spec = spec.substring(index + 1);
312: }
313:
314: spec = spec.toLowerCase();
315: StringTokenizer st = new StringTokenizer(spec, " \t=", true);
316: boolean foundCharSet = false;
317: boolean foundEquals = false;
318: while (st.hasMoreTokens()) {
319: String token = st.nextToken();
320: if (token.equals(" ") || token.equals("\t")) {
321: continue;
322: }
323: if (foundCharSet == false && foundEquals == false
324: && token.equals("charset")) {
325: foundCharSet = true;
326: continue;
327: } else if (foundEquals == false && token.equals("=")) {
328: foundEquals = true;
329: continue;
330: } else if (foundEquals == true && foundCharSet == true) {
331: return token;
332: }
333:
334: // Not recognized
335: foundCharSet = false;
336: foundEquals = false;
337: }
338:
339: // No charset found - return a guess
340: return "8859_1";
341: }
342:
343: protected static HTMLEditorKit kit;
344: protected static HTMLEditorKit.Parser parser;
345:
346: static {
347: kit = new HTMLEditorKit();
348: }
349: }
|