001: /*
002: * Copyright 2006 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: package com.sun.xml.internal.bind;
027:
028: /**
029: * Processes white space normalization.
030: *
031: * @since 1.0
032: */
033: public abstract class WhiteSpaceProcessor {
034:
035: // benchmarking (see test/src/ReplaceTest.java in the CVS Attic)
036: // showed that this code is slower than the current code.
037: //
038: // public static String replace(String text) {
039: // final int len = text.length();
040: // StringBuffer result = new StringBuffer(len);
041: //
042: // for (int i = 0; i < len; i++) {
043: // char ch = text.charAt(i);
044: // if (isWhiteSpace(ch))
045: // result.append(' ');
046: // else
047: // result.append(ch);
048: // }
049: //
050: // return result.toString();
051: // }
052:
053: public static String replace(String text) {
054: return replace((CharSequence) text).toString();
055: }
056:
057: /**
058: * @since 2.0
059: */
060: public static CharSequence replace(CharSequence text) {
061: int i = text.length() - 1;
062:
063: // look for the first whitespace char.
064: while (i >= 0 && !isWhiteSpaceExceptSpace(text.charAt(i)))
065: i--;
066:
067: if (i < 0)
068: // no such whitespace. replace(text)==text.
069: return text;
070:
071: // we now know that we need to modify the text.
072: // allocate a char array to do it.
073: StringBuilder buf = new StringBuilder(text);
074:
075: buf.setCharAt(i--, ' ');
076: for (; i >= 0; i--)
077: if (isWhiteSpaceExceptSpace(buf.charAt(i)))
078: buf.setCharAt(i, ' ');
079:
080: return new String(buf);
081: }
082:
083: /**
084: * Equivalent of {@link String#trim()}.
085: * @since 2.0
086: */
087: public static CharSequence trim(CharSequence text) {
088: int len = text.length();
089: int start = 0;
090:
091: while (start < len && isWhiteSpace(text.charAt(start)))
092: start++;
093:
094: int end = len - 1;
095:
096: while (end > start && isWhiteSpace(text.charAt(end)))
097: end--;
098:
099: if (start == 0 && end == len - 1)
100: return text; // no change
101: else
102: return text.subSequence(start, end + 1);
103: }
104:
105: public static String collapse(String text) {
106: return collapse((CharSequence) text).toString();
107: }
108:
109: /**
110: * This is usually the biggest processing bottleneck.
111: *
112: * @since 2.0
113: */
114: public static CharSequence collapse(CharSequence text) {
115: int len = text.length();
116:
117: // most of the texts are already in the collapsed form.
118: // so look for the first whitespace in the hope that we will
119: // never see it.
120: int s = 0;
121: while (s < len) {
122: if (isWhiteSpace(text.charAt(s)))
123: break;
124: s++;
125: }
126: if (s == len)
127: // the input happens to be already collapsed.
128: return text;
129:
130: // we now know that the input contains spaces.
131: // let's sit down and do the collapsing normally.
132:
133: StringBuilder result = new StringBuilder(len /*allocate enough size to avoid re-allocation*/);
134:
135: if (s != 0) {
136: for (int i = 0; i < s; i++)
137: result.append(text.charAt(i));
138: result.append(' ');
139: }
140:
141: boolean inStripMode = true;
142: for (int i = s + 1; i < len; i++) {
143: char ch = text.charAt(i);
144: boolean b = isWhiteSpace(ch);
145: if (inStripMode && b)
146: continue; // skip this character
147:
148: inStripMode = b;
149: if (inStripMode)
150: result.append(' ');
151: else
152: result.append(ch);
153: }
154:
155: // remove trailing whitespaces
156: len = result.length();
157: if (len > 0 && result.charAt(len - 1) == ' ')
158: result.setLength(len - 1);
159: // whitespaces are already collapsed,
160: // so all we have to do is to remove the last one character
161: // if it's a whitespace.
162:
163: return result;
164: }
165:
166: /**
167: * Returns true if the specified string is all whitespace.
168: */
169: public static final boolean isWhiteSpace(CharSequence s) {
170: for (int i = s.length() - 1; i >= 0; i--)
171: if (!isWhiteSpace(s.charAt(i)))
172: return false;
173: return true;
174: }
175:
176: /** returns true if the specified char is a white space character. */
177: public static final boolean isWhiteSpace(char ch) {
178: // most of the characters are non-control characters.
179: // so check that first to quickly return false for most of the cases.
180: if (ch > 0x20)
181: return false;
182:
183: // other than we have to do four comparisons.
184: return ch == 0x9 || ch == 0xA || ch == 0xD || ch == 0x20;
185: }
186:
187: /**
188: * Returns true if the specified char is a white space character
189: * but not 0x20.
190: */
191: protected static final boolean isWhiteSpaceExceptSpace(char ch) {
192: // most of the characters are non-control characters.
193: // so check that first to quickly return false for most of the cases.
194: if (ch >= 0x20)
195: return false;
196:
197: // other than we have to do four comparisons.
198: return ch == 0x9 || ch == 0xA || ch == 0xD;
199: }
200: }
|