001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common Development
008: * and Distribution License("CDDL") (collectively, the "License"). You
009: * may not use this file except in compliance with the License. You can obtain
010: * a copy of the License at https://glassfish.dev.java.net/public/CDDL+GPL.html
011: * or glassfish/bootstrap/legal/LICENSE.txt. See the License for the specific
012: * language governing permissions and limitations under the License.
013: *
014: * When distributing the software, include this License Header Notice in each
015: * file and include the License file at glassfish/bootstrap/legal/LICENSE.txt.
016: * Sun designates this particular file as subject to the "Classpath" exception
017: * as provided by Sun in the GPL Version 2 section of the License file that
018: * accompanied this code. If applicable, add the following below the License
019: * Header, with the fields enclosed by brackets [] replaced by your own
020: * identifying information: "Portions Copyrighted [year]
021: * [name of copyright owner]"
022: *
023: * Contributor(s):
024: *
025: * If you wish your version of this file to be governed by only the CDDL or
026: * only the GPL Version 2, indicate your decision by adding "[Contributor]
027: * elects to include this software in this distribution under the [CDDL or GPL
028: * Version 2] license." If you don't indicate a single choice of license, a
029: * recipient has the option to distribute your version of this file under
030: * either the CDDL, the GPL Version 2 or to extend the choice of license to
031: * its licensees as provided above. However, if you add GPL Version 2 code
032: * and therefore, elected the GPL Version 2 license, then the option applies
033: * only if the new code is made subject to such option by the copyright
034: * holder.
035: */
036: package com.sun.xml.bind;
037:
038: /**
039: * Processes white space normalization.
040: *
041: * @since 1.0
042: */
043: public abstract class WhiteSpaceProcessor {
044:
045: // benchmarking (see test/src/ReplaceTest.java in the CVS Attic)
046: // showed that this code is slower than the current code.
047: //
048: // public static String replace(String text) {
049: // final int len = text.length();
050: // StringBuffer result = new StringBuffer(len);
051: //
052: // for (int i = 0; i < len; i++) {
053: // char ch = text.charAt(i);
054: // if (isWhiteSpace(ch))
055: // result.append(' ');
056: // else
057: // result.append(ch);
058: // }
059: //
060: // return result.toString();
061: // }
062:
063: public static String replace(String text) {
064: return replace((CharSequence) text).toString();
065: }
066:
067: /**
068: * @since 2.0
069: */
070: public static CharSequence replace(CharSequence text) {
071: int i = text.length() - 1;
072:
073: // look for the first whitespace char.
074: while (i >= 0 && !isWhiteSpaceExceptSpace(text.charAt(i)))
075: i--;
076:
077: if (i < 0)
078: // no such whitespace. replace(text)==text.
079: return text;
080:
081: // we now know that we need to modify the text.
082: // allocate a char array to do it.
083: StringBuilder buf = new StringBuilder(text);
084:
085: buf.setCharAt(i--, ' ');
086: for (; i >= 0; i--)
087: if (isWhiteSpaceExceptSpace(buf.charAt(i)))
088: buf.setCharAt(i, ' ');
089:
090: return new String(buf);
091: }
092:
093: /**
094: * Equivalent of {@link String#trim()}.
095: * @since 2.0
096: */
097: public static CharSequence trim(CharSequence text) {
098: int len = text.length();
099: int start = 0;
100:
101: while (start < len && isWhiteSpace(text.charAt(start)))
102: start++;
103:
104: int end = len - 1;
105:
106: while (end > start && isWhiteSpace(text.charAt(end)))
107: end--;
108:
109: if (start == 0 && end == len - 1)
110: return text; // no change
111: else
112: return text.subSequence(start, end + 1);
113: }
114:
115: public static String collapse(String text) {
116: return collapse((CharSequence) text).toString();
117: }
118:
119: /**
120: * This is usually the biggest processing bottleneck.
121: *
122: * @since 2.0
123: */
124: public static CharSequence collapse(CharSequence text) {
125: int len = text.length();
126:
127: // most of the texts are already in the collapsed form.
128: // so look for the first whitespace in the hope that we will
129: // never see it.
130: int s = 0;
131: while (s < len) {
132: if (isWhiteSpace(text.charAt(s)))
133: break;
134: s++;
135: }
136: if (s == len)
137: // the input happens to be already collapsed.
138: return text;
139:
140: // we now know that the input contains spaces.
141: // let's sit down and do the collapsing normally.
142:
143: StringBuilder result = new StringBuilder(len /*allocate enough size to avoid re-allocation*/);
144:
145: if (s != 0) {
146: for (int i = 0; i < s; i++)
147: result.append(text.charAt(i));
148: result.append(' ');
149: }
150:
151: boolean inStripMode = true;
152: for (int i = s + 1; i < len; i++) {
153: char ch = text.charAt(i);
154: boolean b = isWhiteSpace(ch);
155: if (inStripMode && b)
156: continue; // skip this character
157:
158: inStripMode = b;
159: if (inStripMode)
160: result.append(' ');
161: else
162: result.append(ch);
163: }
164:
165: // remove trailing whitespaces
166: len = result.length();
167: if (len > 0 && result.charAt(len - 1) == ' ')
168: result.setLength(len - 1);
169: // whitespaces are already collapsed,
170: // so all we have to do is to remove the last one character
171: // if it's a whitespace.
172:
173: return result;
174: }
175:
176: /**
177: * Returns true if the specified string is all whitespace.
178: */
179: public static final boolean isWhiteSpace(CharSequence s) {
180: for (int i = s.length() - 1; i >= 0; i--)
181: if (!isWhiteSpace(s.charAt(i)))
182: return false;
183: return true;
184: }
185:
186: /** returns true if the specified char is a white space character. */
187: public static final boolean isWhiteSpace(char ch) {
188: // most of the characters are non-control characters.
189: // so check that first to quickly return false for most of the cases.
190: if (ch > 0x20)
191: return false;
192:
193: // other than we have to do four comparisons.
194: return ch == 0x9 || ch == 0xA || ch == 0xD || ch == 0x20;
195: }
196:
197: /**
198: * Returns true if the specified char is a white space character
199: * but not 0x20.
200: */
201: protected static final boolean isWhiteSpaceExceptSpace(char ch) {
202: // most of the characters are non-control characters.
203: // so check that first to quickly return false for most of the cases.
204: if (ch >= 0x20)
205: return false;
206:
207: // other than we have to do four comparisons.
208: return ch == 0x9 || ch == 0xA || ch == 0xD;
209: }
210: }
|