001: /*
002: * Copyright 2002-2005 the original author or authors.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.springframework.web.util;
018:
019: /**
020: * Helper for decoding HTML Strings by replacing character
021: * entity references with the referred character.
022: *
023: * @author Juergen Hoeller
024: * @author Martin Kersten
025: * @since 1.2.1
026: */
027: class HtmlCharacterEntityDecoder {
028:
029: private static final int MAX_REFERENCE_SIZE = 10;
030:
031: private final HtmlCharacterEntityReferences characterEntityReferences;
032:
033: private final String originalMessage;
034:
035: private final StringBuffer decodedMessage;
036:
037: private int currentPosition = 0;
038:
039: private int nextPotentialReferencePosition = -1;
040:
041: private int nextSemicolonPosition = -2;
042:
043: public HtmlCharacterEntityDecoder(
044: HtmlCharacterEntityReferences characterEntityReferences,
045: String original) {
046: this .characterEntityReferences = characterEntityReferences;
047: this .originalMessage = original;
048: this .decodedMessage = new StringBuffer(originalMessage.length());
049: }
050:
051: public String decode() {
052: while (currentPosition < originalMessage.length()) {
053: findNextPotentialReference(currentPosition);
054: copyCharactersTillPotentialReference();
055: processPossibleReference();
056: }
057: return decodedMessage.toString();
058: }
059:
060: private void findNextPotentialReference(int startPosition) {
061: nextPotentialReferencePosition = Math.max(startPosition,
062: nextSemicolonPosition - MAX_REFERENCE_SIZE);
063:
064: do {
065: nextPotentialReferencePosition = originalMessage.indexOf(
066: '&', nextPotentialReferencePosition);
067:
068: if (nextSemicolonPosition != -1
069: && nextSemicolonPosition < nextPotentialReferencePosition)
070: nextSemicolonPosition = originalMessage.indexOf(';',
071: nextPotentialReferencePosition + 1);
072:
073: boolean isPotentialReference = nextPotentialReferencePosition != -1
074: && nextSemicolonPosition != -1
075: && nextPotentialReferencePosition
076: - nextSemicolonPosition < MAX_REFERENCE_SIZE;
077:
078: if (isPotentialReference) {
079: break;
080: }
081: if (nextPotentialReferencePosition == -1) {
082: break;
083: }
084: if (nextSemicolonPosition == -1) {
085: nextPotentialReferencePosition = -1;
086: break;
087: }
088:
089: nextPotentialReferencePosition = nextPotentialReferencePosition + 1;
090: } while (nextPotentialReferencePosition != -1);
091: }
092:
093: private void copyCharactersTillPotentialReference() {
094: if (nextPotentialReferencePosition != currentPosition) {
095: int skipUntilIndex = nextPotentialReferencePosition != -1 ? nextPotentialReferencePosition
096: : originalMessage.length();
097: if (skipUntilIndex - currentPosition > 3) {
098: decodedMessage.append(originalMessage.substring(
099: currentPosition, skipUntilIndex));
100: currentPosition = skipUntilIndex;
101: } else {
102: while (currentPosition < skipUntilIndex)
103: decodedMessage.append(originalMessage
104: .charAt(currentPosition++));
105: }
106: }
107: }
108:
109: private void processPossibleReference() {
110: if (nextPotentialReferencePosition != -1) {
111: boolean isNumberedReference = originalMessage
112: .charAt(currentPosition + 1) == '#';
113: boolean wasProcessable = isNumberedReference ? processNumberedReference()
114: : processNamedReference();
115: if (wasProcessable) {
116: currentPosition = nextSemicolonPosition + 1;
117: } else {
118: char currentChar = originalMessage
119: .charAt(currentPosition);
120: decodedMessage.append(currentChar);
121: currentPosition++;
122: }
123: }
124: }
125:
126: private boolean processNumberedReference() {
127: boolean isHexNumberedReference = originalMessage
128: .charAt(nextPotentialReferencePosition + 2) == 'x'
129: || originalMessage
130: .charAt(nextPotentialReferencePosition + 2) == 'X';
131: try {
132: int value = (!isHexNumberedReference) ? Integer
133: .parseInt(getReferenceSubstring(2)) : Integer
134: .parseInt(getReferenceSubstring(3), 16);
135: decodedMessage.append((char) value);
136: return true;
137: } catch (NumberFormatException ex) {
138: return false;
139: }
140: }
141:
142: private boolean processNamedReference() {
143: String referenceName = getReferenceSubstring(1);
144: char mappedCharacter = characterEntityReferences
145: .convertToCharacter(referenceName);
146: if (mappedCharacter != HtmlCharacterEntityReferences.CHAR_NULL) {
147: decodedMessage.append(mappedCharacter);
148: return true;
149: }
150: return false;
151: }
152:
153: private String getReferenceSubstring(int referenceOffset) {
154: return originalMessage.substring(nextPotentialReferencePosition
155: + referenceOffset, nextSemicolonPosition);
156: }
157:
158: }
|