001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.commons.lang;
018:
019: /**
020: * <p>Operations on <code>CharSet</code>s.</p>
021: *
022: * <p>This class handles <code>null</code> input gracefully.
023: * An exception will not be thrown for a <code>null</code> input.
024: * Each method documents its behaviour in more detail.</p>
025: *
026: * @see CharSet
027: * @author Stephen Colebourne
028: * @author Phil Steitz
029: * @author Gary Gregory
030: * @since 1.0
031: * @version $Id: CharSetUtils.java 471626 2006-11-06 04:02:09Z bayard $
032: */
033: public class CharSetUtils {
034:
035: /**
036: * <p>CharSetUtils instances should NOT be constructed in standard programming.
037: * Instead, the class should be used as <code>CharSetUtils.evaluateSet(null);</code>.</p>
038: *
039: * <p>This constructor is public to permit tools that require a JavaBean instance
040: * to operate.</p>
041: */
042: public CharSetUtils() {
043: super ();
044: }
045:
046: // Factory
047: //-----------------------------------------------------------------------
048: /**
049: * <p>Creates a <code>CharSet</code> instance which allows a certain amount of
050: * set logic to be performed.</p>
051: * <p>The syntax is:</p>
052: * <ul>
053: * <li>"aeio" which implies 'a','e',..</li>
054: * <li>"^e" implies not e.</li>
055: * <li>"ej-m" implies e,j->m. e,j,k,l,m.</li>
056: * </ul>
057: *
058: * <pre>
059: * CharSetUtils.evaluateSet(null) = null
060: * CharSetUtils.evaluateSet([]) = CharSet matching nothing
061: * CharSetUtils.evaluateSet(["a-e"]) = CharSet matching a,b,c,d,e
062: * </pre>
063: *
064: * @param set the set, may be null
065: * @return a CharSet instance, <code>null</code> if null input
066: * @deprecated Use {@link CharSet#getInstance(String)}.
067: * Method will be removed in Commons Lang 3.0.
068: */
069: public static CharSet evaluateSet(String[] set) {
070: if (set == null) {
071: return null;
072: }
073: return new CharSet(set);
074: }
075:
076: // Squeeze
077: //-----------------------------------------------------------------------
078: /**
079: * <p>Squeezes any repetitions of a character that is mentioned in the
080: * supplied set.</p>
081: *
082: * <pre>
083: * CharSetUtils.squeeze(null, *) = null
084: * CharSetUtils.squeeze("", *) = ""
085: * CharSetUtils.squeeze(*, null) = *
086: * CharSetUtils.squeeze(*, "") = *
087: * CharSetUtils.squeeze("hello", "k-p") = "helo"
088: * CharSetUtils.squeeze("hello", "a-e") = "hello"
089: * </pre>
090: *
091: * @see #evaluateSet(java.lang.String[]) for set-syntax.
092: * @param str the string to squeeze, may be null
093: * @param set the character set to use for manipulation, may be null
094: * @return modified String, <code>null</code> if null string input
095: */
096: public static String squeeze(String str, String set) {
097: if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
098: return str;
099: }
100: String[] strs = new String[1];
101: strs[0] = set;
102: return squeeze(str, strs);
103: }
104:
105: /**
106: * <p>Squeezes any repetitions of a character that is mentioned in the
107: * supplied set.</p>
108: *
109: * <p>An example is:</p>
110: * <ul>
111: * <li>squeeze("hello", {"el"}) => "helo"</li>
112: * </ul>
113: *
114: * @see #evaluateSet(java.lang.String[]) for set-syntax.
115: * @param str the string to squeeze, may be null
116: * @param set the character set to use for manipulation, may be null
117: * @return modified String, <code>null</code> if null string input
118: */
119: public static String squeeze(String str, String[] set) {
120: if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
121: return str;
122: }
123: CharSet chars = evaluateSet(set);
124: StringBuffer buffer = new StringBuffer(str.length());
125: char[] chrs = str.toCharArray();
126: int sz = chrs.length;
127: char lastChar = ' ';
128: char ch = ' ';
129: for (int i = 0; i < sz; i++) {
130: ch = chrs[i];
131: if (chars.contains(ch)) {
132: if ((ch == lastChar) && (i != 0)) {
133: continue;
134: }
135: }
136: buffer.append(ch);
137: lastChar = ch;
138: }
139: return buffer.toString();
140: }
141:
142: // Count
143: //-----------------------------------------------------------------------
144: /**
145: * <p>Takes an argument in set-syntax, see evaluateSet,
146: * and returns the number of characters present in the specified string.</p>
147: *
148: * <pre>
149: * CharSetUtils.count(null, *) = 0
150: * CharSetUtils.count("", *) = 0
151: * CharSetUtils.count(*, null) = 0
152: * CharSetUtils.count(*, "") = 0
153: * CharSetUtils.count("hello", "k-p") = 3
154: * CharSetUtils.count("hello", "a-e") = 1
155: * </pre>
156: *
157: * @see #evaluateSet(java.lang.String[]) for set-syntax.
158: * @param str String to count characters in, may be null
159: * @param set String set of characters to count, may be null
160: * @return character count, zero if null string input
161: */
162: public static int count(String str, String set) {
163: if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
164: return 0;
165: }
166: String[] strs = new String[1];
167: strs[0] = set;
168: return count(str, strs);
169: }
170:
171: /**
172: * <p>Takes an argument in set-syntax, see evaluateSet,
173: * and returns the number of characters present in the specified string.</p>
174: *
175: * <p>An example would be:</p>
176: * <ul>
177: * <li>count("hello", {"c-f", "o"}) returns 2.</li>
178: * </ul>
179: *
180: * @see #evaluateSet(java.lang.String[]) for set-syntax.
181: * @param str String to count characters in, may be null
182: * @param set String[] set of characters to count, may be null
183: * @return character count, zero if null string input
184: */
185: public static int count(String str, String[] set) {
186: if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
187: return 0;
188: }
189: CharSet chars = evaluateSet(set);
190: int count = 0;
191: char[] chrs = str.toCharArray();
192: int sz = chrs.length;
193: for (int i = 0; i < sz; i++) {
194: if (chars.contains(chrs[i])) {
195: count++;
196: }
197: }
198: return count;
199: }
200:
201: // Keep
202: //-----------------------------------------------------------------------
203: /**
204: * <p>Takes an argument in set-syntax, see evaluateSet,
205: * and keeps any of characters present in the specified string.</p>
206: *
207: * <pre>
208: * CharSetUtils.keep(null, *) = null
209: * CharSetUtils.keep("", *) = ""
210: * CharSetUtils.keep(*, null) = ""
211: * CharSetUtils.keep(*, "") = ""
212: * CharSetUtils.keep("hello", "hl") = "hll"
213: * CharSetUtils.keep("hello", "le") = "ell"
214: * </pre>
215: *
216: * @see #evaluateSet(java.lang.String[]) for set-syntax.
217: * @param str String to keep characters from, may be null
218: * @param set String set of characters to keep, may be null
219: * @return modified String, <code>null</code> if null string input
220: * @since 2.0
221: */
222: public static String keep(String str, String set) {
223: if (str == null) {
224: return null;
225: }
226: if (str.length() == 0 || StringUtils.isEmpty(set)) {
227: return "";
228: }
229: String[] strs = new String[1];
230: strs[0] = set;
231: return keep(str, strs);
232: }
233:
234: /**
235: * <p>Takes an argument in set-syntax, see evaluateSet,
236: * and keeps any of characters present in the specified string.</p>
237: *
238: * <p>An example would be:</p>
239: * <ul>
240: * <li>keep("hello", {"c-f", "o"})
241: * returns "eo"</li>
242: * </ul>
243: *
244: * @see #evaluateSet(java.lang.String[]) for set-syntax.
245: * @param str String to keep characters from, may be null
246: * @param set String[] set of characters to keep, may be null
247: * @return modified String, <code>null</code> if null string input
248: * @since 2.0
249: */
250: public static String keep(String str, String[] set) {
251: if (str == null) {
252: return null;
253: }
254: if (str.length() == 0 || ArrayUtils.isEmpty(set)) {
255: return "";
256: }
257: return modify(str, set, true);
258: }
259:
260: // Delete
261: //-----------------------------------------------------------------------
262: /**
263: * <p>Takes an argument in set-syntax, see evaluateSet,
264: * and deletes any of characters present in the specified string.</p>
265: *
266: * <pre>
267: * CharSetUtils.delete(null, *) = null
268: * CharSetUtils.delete("", *) = ""
269: * CharSetUtils.delete(*, null) = *
270: * CharSetUtils.delete(*, "") = *
271: * CharSetUtils.delete("hello", "hl") = "eo"
272: * CharSetUtils.delete("hello", "le") = "ho"
273: * </pre>
274: *
275: * @see #evaluateSet(java.lang.String[]) for set-syntax.
276: * @param str String to delete characters from, may be null
277: * @param set String set of characters to delete, may be null
278: * @return modified String, <code>null</code> if null string input
279: */
280: public static String delete(String str, String set) {
281: if (StringUtils.isEmpty(str) || StringUtils.isEmpty(set)) {
282: return str;
283: }
284: String[] strs = new String[1];
285: strs[0] = set;
286: return delete(str, strs);
287: }
288:
289: /**
290: * <p>Takes an argument in set-syntax, see evaluateSet,
291: * and deletes any of characters present in the specified string.</p>
292: *
293: * <p>An example would be:</p>
294: * <ul>
295: * <li>delete("hello", {"c-f", "o"}) returns
296: * "hll"</li>
297: * </ul>
298: *
299: * @see #evaluateSet(java.lang.String[]) for set-syntax.
300: * @param str String to delete characters from, may be null
301: * @param set String[] set of characters to delete, may be null
302: * @return modified String, <code>null</code> if null string input
303: */
304: public static String delete(String str, String[] set) {
305: if (StringUtils.isEmpty(str) || ArrayUtils.isEmpty(set)) {
306: return str;
307: }
308: return modify(str, set, false);
309: }
310:
311: //-----------------------------------------------------------------------
312: /**
313: * Implementation of delete and keep
314: *
315: * @param str String to modify characters within
316: * @param set String[] set of characters to modify
317: * @param expect whether to evaluate on match, or non-match
318: * @return modified String
319: */
320: private static String modify(String str, String[] set,
321: boolean expect) {
322: CharSet chars = evaluateSet(set);
323: StringBuffer buffer = new StringBuffer(str.length());
324: char[] chrs = str.toCharArray();
325: int sz = chrs.length;
326: for (int i = 0; i < sz; i++) {
327: if (chars.contains(chrs[i]) == expect) {
328: buffer.append(chrs[i]);
329: }
330: }
331: return buffer.toString();
332: }
333:
334: // Translate
335: //-----------------------------------------------------------------------
336: /**
337: * <p>Translate characters in a String.
338: * This is a multi character search and replace routine.</p>
339: *
340: * <p>An example is:</p>
341: * <ul>
342: * <li>translate("hello", "ho", "jy")
343: * => jelly</li>
344: * </ul>
345: *
346: * <p>If the length of characters to search for is greater than the
347: * length of characters to replace, then the last character is
348: * used.</p>
349: *
350: * <pre>
351: * CharSetUtils.translate(null, *, *) = null
352: * CharSetUtils.translate("", *, *) = ""
353: * </pre>
354: *
355: * @param str String to replace characters in, may be null
356: * @param searchChars a set of characters to search for, must not be null
357: * @param replaceChars a set of characters to replace, must not be null or empty ("")
358: * @return translated String, <code>null</code> if null string input
359: * @throws NullPointerException if <code>searchChars</code> or <code>replaceChars</code>
360: * is <code>null</code>
361: * @throws ArrayIndexOutOfBoundsException if <code>replaceChars</code> is empty ("")
362: * @deprecated Use {@link StringUtils#replaceChars(String, String, String)}.
363: * Method will be removed in Commons Lang 3.0.
364: * NOTE: StringUtils#replaceChars behaves differently when 'searchChars' is longer
365: * than 'replaceChars'. CharSetUtils#translate will use the last char of the replacement
366: * string whereas StringUtils#replaceChars will delete
367: */
368: public static String translate(String str, String searchChars,
369: String replaceChars) {
370: if (StringUtils.isEmpty(str)) {
371: return str;
372: }
373: StringBuffer buffer = new StringBuffer(str.length());
374: char[] chrs = str.toCharArray();
375: char[] withChrs = replaceChars.toCharArray();
376: int sz = chrs.length;
377: int withMax = replaceChars.length() - 1;
378: for (int i = 0; i < sz; i++) {
379: int idx = searchChars.indexOf(chrs[i]);
380: if (idx != -1) {
381: if (idx > withMax) {
382: idx = withMax;
383: }
384: buffer.append(withChrs[idx]);
385: } else {
386: buffer.append(chrs[i]);
387: }
388: }
389: return buffer.toString();
390: }
391:
392: }
|