001: package javax.xml.bind;
002:
003: /**
004: * Processes white space normalization.
005: *
006: * @since 1.0
007: */
008: abstract class WhiteSpaceProcessor {
009:
010: // benchmarking (see test/src/ReplaceTest.java in the CVS Attic)
011: // showed that this code is slower than the current code.
012: //
013: // public static String replace(String text) {
014: // final int len = text.length();
015: // StringBuffer result = new StringBuffer(len);
016: //
017: // for (int i = 0; i < len; i++) {
018: // char ch = text.charAt(i);
019: // if (isWhiteSpace(ch))
020: // result.append(' ');
021: // else
022: // result.append(ch);
023: // }
024: //
025: // return result.toString();
026: // }
027:
028: public static String replace(String text) {
029: return replace((CharSequence) text).toString();
030: }
031:
032: /**
033: * @since 2.0
034: */
035: public static CharSequence replace(CharSequence text) {
036: int i = text.length() - 1;
037:
038: // look for the first whitespace char.
039: while (i >= 0 && !isWhiteSpaceExceptSpace(text.charAt(i)))
040: i--;
041:
042: if (i < 0)
043: // no such whitespace. replace(text)==text.
044: return text;
045:
046: // we now know that we need to modify the text.
047: // allocate a char array to do it.
048: StringBuilder buf = new StringBuilder(text);
049:
050: buf.setCharAt(i--, ' ');
051: for (; i >= 0; i--)
052: if (isWhiteSpaceExceptSpace(buf.charAt(i)))
053: buf.setCharAt(i, ' ');
054:
055: return new String(buf);
056: }
057:
058: /**
059: * Equivalent of {@link String#trim()}.
060: * @since 2.0
061: */
062: public static CharSequence trim(CharSequence text) {
063: int len = text.length();
064: int start = 0;
065:
066: while (start < len && isWhiteSpace(text.charAt(start)))
067: start++;
068:
069: int end = len - 1;
070:
071: while (end > start && isWhiteSpace(text.charAt(end)))
072: end--;
073:
074: if (start == 0 && end == len - 1)
075: return text; // no change
076: else
077: return text.subSequence(start, end + 1);
078: }
079:
080: public static String collapse(String text) {
081: return collapse((CharSequence) text).toString();
082: }
083:
084: /**
085: * This is usually the biggest processing bottleneck.
086: *
087: * @since 2.0
088: */
089: public static CharSequence collapse(CharSequence text) {
090: int len = text.length();
091:
092: // most of the texts are already in the collapsed form.
093: // so look for the first whitespace in the hope that we will
094: // never see it.
095: int s = 0;
096: while (s < len) {
097: if (isWhiteSpace(text.charAt(s)))
098: break;
099: s++;
100: }
101: if (s == len)
102: // the input happens to be already collapsed.
103: return text;
104:
105: // we now know that the input contains spaces.
106: // let's sit down and do the collapsing normally.
107:
108: StringBuilder result = new StringBuilder(len /*allocate enough size to avoid re-allocation*/);
109:
110: if (s != 0) {
111: for (int i = 0; i < s; i++)
112: result.append(text.charAt(i));
113: result.append(' ');
114: }
115:
116: boolean inStripMode = true;
117: for (int i = s + 1; i < len; i++) {
118: char ch = text.charAt(i);
119: boolean b = isWhiteSpace(ch);
120: if (inStripMode && b)
121: continue; // skip this character
122:
123: inStripMode = b;
124: if (inStripMode)
125: result.append(' ');
126: else
127: result.append(ch);
128: }
129:
130: // remove trailing whitespaces
131: len = result.length();
132: if (len > 0 && result.charAt(len - 1) == ' ')
133: result.setLength(len - 1);
134: // whitespaces are already collapsed,
135: // so all we have to do is to remove the last one character
136: // if it's a whitespace.
137:
138: return result;
139: }
140:
141: /**
142: * Returns true if the specified string is all whitespace.
143: */
144: public static final boolean isWhiteSpace(CharSequence s) {
145: for (int i = s.length() - 1; i >= 0; i--)
146: if (!isWhiteSpace(s.charAt(i)))
147: return false;
148: return true;
149: }
150:
151: /** returns true if the specified char is a white space character. */
152: public static final boolean isWhiteSpace(char ch) {
153: // most of the characters are non-control characters.
154: // so check that first to quickly return false for most of the cases.
155: if (ch > 0x20)
156: return false;
157:
158: // other than we have to do four comparisons.
159: return ch == 0x9 || ch == 0xA || ch == 0xD || ch == 0x20;
160: }
161:
162: /**
163: * Returns true if the specified char is a white space character
164: * but not 0x20.
165: */
166: protected static final boolean isWhiteSpaceExceptSpace(char ch) {
167: // most of the characters are non-control characters.
168: // so check that first to quickly return false for most of the cases.
169: if (ch >= 0x20)
170: return false;
171:
172: // other than we have to do four comparisons.
173: return ch == 0x9 || ch == 0xA || ch == 0xD;
174: }
175: }
|