001: /*******************************************************************************
002: * Copyright (c) 2006 IBM Corporation and others.
003: * All rights reserved. This program and the accompanying materials
004: * are made available under the terms of the Eclipse Public License v1.0
005: * which accompanies this distribution, and is available at
006: * http://www.eclipse.org/legal/epl-v10.html
007: *
008: * Contributors:
009: * IBM Corporation - initial API and implementation
010: *******************************************************************************/package org.eclipse.pde.internal.core.util;
011:
012: import java.util.HashMap;
013:
014: /**
015: * PDEHTMLHelper
016: *
017: */
018: public class PDEHTMLHelper {
019:
020: public final static HashMap fgEntityLookup = new HashMap(6);
021: static {
022: fgEntityLookup.put("lt", "<"); //$NON-NLS-1$ //$NON-NLS-2$
023: fgEntityLookup.put("gt", ">"); //$NON-NLS-1$ //$NON-NLS-2$
024: fgEntityLookup.put("nbsp", " "); //$NON-NLS-1$ //$NON-NLS-2$
025: fgEntityLookup.put("amp", "&"); //$NON-NLS-1$ //$NON-NLS-2$
026: fgEntityLookup.put("apos", "'"); //$NON-NLS-1$ //$NON-NLS-2$
027: fgEntityLookup.put("quot", "\""); //$NON-NLS-1$ //$NON-NLS-2$
028: }
029:
030: public static String stripTags(String html) {
031: if (html == null) {
032: return null;
033: }
034: int length = html.length();
035: boolean write = true;
036: char oldChar = ' ';
037: StringBuffer sb = new StringBuffer(length);
038:
039: boolean processingEntity = false;
040: StringBuffer entityBuffer = null;
041:
042: for (int i = 0; i < length; i++) {
043: char curr = html.charAt(i);
044:
045: // Detect predefined character entities
046: if (curr == '&') {
047: // Process predefined character entity found
048: processingEntity = true;
049: entityBuffer = new StringBuffer();
050: continue;
051: } else if (processingEntity && (curr == ';')) {
052: // End of predefined character entity found
053: processingEntity = false;
054: // Resolve the entity
055: String entity = ((String) fgEntityLookup
056: .get(entityBuffer.toString()));
057: if (entity == null) {
058: // If the entity is not found or supported, ignore it
059: continue;
060: }
061: // Present the resolved character for writing
062: curr = entity.charAt(0);
063: } else if (processingEntity) {
064: // Collect predefined character entity name character by
065: // character
066: entityBuffer.append(curr);
067: continue;
068: }
069:
070: if (curr == '<') {
071: write = false;
072: } else if (curr == '>') {
073: write = true;
074: } else if (write && curr != '\r' && curr != '\n'
075: && curr != '\t') {
076: if (!(curr == ' ') || !(oldChar == curr)) { // skip multiple spaces
077: sb.append(curr);
078: oldChar = curr;
079: }
080: }
081: }
082: if (isAllWhitespace(sb.toString())) {
083: return null;
084: }
085: return sb.toString();
086: }
087:
088: public static boolean isAllWhitespace(String string) {
089: if (string == null) {
090: return false;
091: }
092: char[] characters = string.toCharArray();
093: for (int i = 0; i < characters.length; i++) {
094: if (!Character.isWhitespace(characters[i])) {
095: return false;
096: }
097: }
098: return true;
099: }
100:
101: }
|