001: /*
002: * Copyright 2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package org.apache.myfaces.shared_impl.renderkit.html.util;
017:
018: /**
019: * Converts Strings so that they can be used within HTML-Code.
020: */
021: public abstract class HTMLEncoder {
022: /**
023: * Variant of {@link #encode} where encodeNewline is false and encodeNbsp is true.
024: */
025: public static String encode(String string) {
026: return encode(string, false, true);
027: }
028:
029: /**
030: * Variant of {@link #encode} where encodeNbsp is true.
031: */
032: public static String encode(String string, boolean encodeNewline) {
033: return encode(string, encodeNewline, true);
034: }
035:
036: /**
037: * Variant of {@link #encode} where encodeNbsp and encodeNonLatin are true
038: */
039: public static String encode(String string, boolean encodeNewline,
040: boolean encodeSubsequentBlanksToNbsp) {
041: return encode(string, encodeNewline,
042: encodeSubsequentBlanksToNbsp, true);
043: }
044:
045: /**
046: * Encodes the given string, so that it can be used within a html page.
047: * @param string the string to convert
048: * @param encodeNewline if true newline characters are converted to <br>'s
049: * @param encodeSubsequentBlanksToNbsp if true subsequent blanks are converted to &nbsp;'s
050: * @param encodeNonLatin if true encode non-latin characters as numeric character references
051: */
052: public static String encode(String string, boolean encodeNewline,
053: boolean encodeSubsequentBlanksToNbsp, boolean encodeNonLatin) {
054: if (string == null) {
055: return "";
056: }
057:
058: StringBuffer sb = null; //create later on demand
059: String app;
060: char c;
061: for (int i = 0; i < string.length(); ++i) {
062: app = null;
063: c = string.charAt(i);
064: switch (c) {
065: case '"':
066: app = """;
067: break; //"
068: case '&':
069: app = "&";
070: break; //&
071: case '<':
072: app = "<";
073: break; //<
074: case '>':
075: app = ">";
076: break; //>
077: case ' ':
078: if (encodeSubsequentBlanksToNbsp
079: && (i == 0 || (i - 1 >= 0 && string
080: .charAt(i - 1) == ' '))) {
081: //Space at beginning or after another space
082: app = " ";
083: }
084: break;
085: case '\n':
086: if (encodeNewline) {
087: app = "<br/>";
088: }
089: break;
090:
091: default:
092: if (encodeNonLatin)
093: switch (c) {
094: //german umlauts
095: case '\u00E4':
096: app = "ä";
097: break;
098: case '\u00C4':
099: app = "Ä";
100: break;
101: case '\u00F6':
102: app = "ö";
103: break;
104: case '\u00D6':
105: app = "Ö";
106: break;
107: case '\u00FC':
108: app = "ü";
109: break;
110: case '\u00DC':
111: app = "Ü";
112: break;
113: case '\u00DF':
114: app = "ß";
115: break;
116:
117: //misc
118: //case 0x80: app = "€"; break; sometimes euro symbol is ascii 128, should we suport it?
119: case '\u20AC':
120: app = "€";
121: break;
122: case '\u00AB':
123: app = "«";
124: break;
125: case '\u00BB':
126: app = "»";
127: break;
128: case '\u00A0':
129: app = " ";
130: break;
131:
132: default:
133: if (((int) c) >= 0x80) {
134: //encode all non basic latin characters
135: app = "&#" + ((int) c) + ";";
136: }
137: break;
138: }
139: break;
140: }
141: if (app != null) {
142: if (sb == null) {
143: sb = new StringBuffer(string.substring(0, i));
144: }
145: sb.append(app);
146: } else {
147: if (sb != null) {
148: sb.append(c);
149: }
150: }
151: }
152:
153: if (sb == null) {
154: return string;
155: } else {
156: return sb.toString();
157: }
158: }
159:
160: }
|