001: //$HeadURL: https://svn.wald.intevation.org/svn/deegree/base/trunk/src/org/deegree/framework/util/StringTools.java $
002: /*---------------- FILE HEADER ------------------------------------------
003:
004: This file is part of deegree.
005: Copyright (C) 2001-2008 by:
006: EXSE, Department of Geography, University of Bonn
007: http://www.giub.uni-bonn.de/deegree/
008: lat/lon GmbH
009: http://www.lat-lon.de
010:
011: This library is free software; you can redistribute it and/or
012: modify it under the terms of the GNU Lesser General Public
013: License as published by the Free Software Foundation; either
014: version 2.1 of the License, or (at your option) any later version.
015:
016: This library is distributed in the hope that it will be useful,
017: but WITHOUT ANY WARRANTY; without even the implied warranty of
018: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019: Lesser General Public License for more details.
020:
021: You should have received a copy of the GNU Lesser General Public
022: License along with this library; if not, write to the Free Software
023: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024:
025: Contact:
026:
027: Andreas Poth
028: lat/lon GmbH
029: Aennchenstr. 19
030: 53115 Bonn
031: Germany
032: E-Mail: poth@lat-lon.de
033:
034: Prof. Dr. Klaus Greve
035: Department of Geography
036: University of Bonn
037: Meckenheimer Allee 166
038: 53115 Bonn
039: Germany
040: E-Mail: greve@giub.uni-bonn.de
041:
042:
043: ---------------------------------------------------------------------------*/
044: package org.deegree.framework.util;
045:
046: import java.io.BufferedReader;
047: import java.io.IOException;
048: import java.io.InputStream;
049: import java.io.InputStreamReader;
050: import java.io.StringReader;
051: import java.util.ArrayList;
052: import java.util.HashMap;
053: import java.util.List;
054: import java.util.Locale;
055: import java.util.Map;
056: import java.util.Set;
057: import java.util.StringTokenizer;
058:
059: import org.deegree.framework.xml.XMLFragment;
060: import org.deegree.framework.xml.XMLParsingException;
061: import org.deegree.framework.xml.XMLTools;
062: import org.deegree.ogcbase.CommonNamespaces;
063: import org.w3c.dom.Node;
064: import org.xml.sax.SAXException;
065:
066: /**
067: * this is a collection of some methods that extends the functionallity of the sun-java string
068: * class.
069: *
070: * @author <a href="mailto:poth@lat-lon.de">Andreas Poth</a>
071: * @author last edited by: $Author: otonnhofer $
072: *
073: * @version $Revision: 10642 $, $Date: 2008-03-20 03:17:45 -0700 (Thu, 20 Mar 2008) $
074: */
075: public class StringTools {
076:
077: /**
078: * This map is used for methods normalizeString() and initMap().
079: *
080: * key = locale language, e.g. "de"
081: * value = map of substitution rules for this locale
082: */
083: private static Map<String, Map<String, String>> localeMap;
084:
085: /**
086: * concatenates an array of strings using a
087: *
088: * @see StringBuffer
089: *
090: * @param size
091: * estimated size of the target string
092: * @param objects
093: * toString() will be called for each object to append it to the result string
094: * @return
095: */
096: public static String concat(int size, Object... objects) {
097: StringBuilder sbb = new StringBuilder(size);
098: for (int i = 0; i < objects.length; i++) {
099: sbb.append(objects[i]);
100: }
101: return sbb.toString();
102: }
103:
104: /**
105: * replaces occurences of a string fragment within a string by a new string.
106: *
107: * @param target
108: * is the original string
109: * @param from
110: * is the string to be replaced
111: * @param to
112: * is the string which will used to replace
113: * @param all
114: * if it's true all occurences of the string to be replaced will be replaced. else
115: * only the first occurence will be replaced.
116: * @return the changed target string
117: */
118: public static String replace(String target, String from, String to,
119: boolean all) {
120:
121: StringBuffer buffer = new StringBuffer(target.length());
122: int copyFrom = 0;
123: char[] targetChars = null;
124: int lf = from.length();
125: int start = -1;
126: do {
127: start = target.indexOf(from);
128: copyFrom = 0;
129: if (start == -1) {
130: return target;
131: }
132:
133: targetChars = target.toCharArray();
134: while (start != -1) {
135: buffer.append(targetChars, copyFrom, start - copyFrom);
136: buffer.append(to);
137: copyFrom = start + lf;
138: start = target.indexOf(from, copyFrom);
139: if (!all) {
140: start = -1;
141: }
142: }
143: buffer.append(targetChars, copyFrom, targetChars.length
144: - copyFrom);
145: target = buffer.toString();
146: buffer.delete(0, buffer.length());
147: } while (target.indexOf(from) > -1 && to.indexOf(from) < 0);
148:
149: return target;
150: }
151:
152: /**
153: * parse a string and return its tokens as array
154: *
155: * @param s
156: * string to parse
157: * @param delimiter
158: * delimiter that marks the end of a token
159: * @param deleteDoubles
160: * if it's true all string that are already within the resulting array will be
161: * deleted, so that there will only be one copy of them.
162: * @return
163: */
164: public static String[] toArray(String s, String delimiter,
165: boolean deleteDoubles) {
166: if (s == null || s.equals("")) {
167: return new String[0];
168: }
169:
170: StringTokenizer st = new StringTokenizer(s, delimiter);
171: ArrayList<String> vec = new ArrayList<String>(st.countTokens());
172:
173: if (st.countTokens() > 0) {
174: for (int i = 0; st.hasMoreTokens(); i++) {
175: String t = st.nextToken();
176: if ((t != null) && (t.length() > 0)) {
177: vec.add(t.trim());
178: }
179: }
180: } else {
181: vec.add(s);
182: }
183:
184: String[] kw = vec.toArray(new String[vec.size()]);
185: if (deleteDoubles) {
186: kw = deleteDoubles(kw);
187: }
188:
189: return kw;
190: }
191:
192: /**
193: * parse a string and return its tokens as typed List.
194: * empty fields will be removed from the list.
195: *
196: * @param s
197: * string to parse
198: * @param delimiter
199: * delimiter that marks the end of a token
200: * @param deleteDoubles
201: * if it's true all string that are already within the resulting array will be
202: * deleted, so that there will only be one copy of them.
203: * @return
204: */
205: public static List<String> toList(String s, String delimiter,
206: boolean deleteDoubles) {
207: if (s == null || s.equals("")) {
208: return new ArrayList<String>();
209: }
210:
211: StringTokenizer st = new StringTokenizer(s, delimiter);
212: ArrayList<String> vec = new ArrayList<String>(st.countTokens());
213: for (int i = 0; st.hasMoreTokens(); i++) {
214: String t = st.nextToken();
215: if ((t != null) && (t.length() > 0)) {
216: if (deleteDoubles) {
217: if (!vec.contains(t.trim())) {
218: vec.add(t.trim());
219: }
220: } else {
221: vec.add(t.trim());
222: }
223: }
224: }
225:
226: return vec;
227: }
228:
229: /**
230: * transforms a string array to one string. the array fields are seperated by the submitted
231: * delimiter:
232: *
233: * @param s
234: * stringarray to transform
235: * @param delimiter
236: */
237: public static String arrayToString(String[] s, char delimiter) {
238: StringBuffer res = new StringBuffer(s.length * 20);
239:
240: for (int i = 0; i < s.length; i++) {
241: res.append(s[i]);
242:
243: if (i < (s.length - 1)) {
244: res.append(delimiter);
245: }
246: }
247:
248: return res.toString();
249: }
250:
251: /**
252: * transforms a list to one string. the array fields are seperated by the submitted delimiter:
253: *
254: * @param s
255: * stringarray to transform
256: * @param delimiter
257: */
258: public static String listToString(List s, char delimiter) {
259: StringBuffer res = new StringBuffer(s.size() * 20);
260:
261: for (int i = 0; i < s.size(); i++) {
262: res.append(s.get(i));
263:
264: if (i < (s.size() - 1)) {
265: res.append(delimiter);
266: }
267: }
268:
269: return res.toString();
270: }
271:
272: /**
273: * transforms a double array to one string. the array fields are seperated by the submitted
274: * delimiter:
275: *
276: * @param s
277: * stringarray to transform
278: * @param delimiter
279: */
280: public static String arrayToString(double[] s, char delimiter) {
281: StringBuffer res = new StringBuffer(s.length * 20);
282:
283: for (int i = 0; i < s.length; i++) {
284: res.append(Double.toString(s[i]));
285:
286: if (i < (s.length - 1)) {
287: res.append(delimiter);
288: }
289: }
290:
291: return res.toString();
292: }
293:
294: /**
295: * transforms a int array to one string. the array fields are seperated by the submitted
296: * delimiter:
297: *
298: * @param s
299: * stringarray to transform
300: * @param delimiter
301: */
302: public static String arrayToString(int[] s, char delimiter) {
303: StringBuffer res = new StringBuffer(s.length * 20);
304:
305: for (int i = 0; i < s.length; i++) {
306: res.append(Integer.toString(s[i]));
307:
308: if (i < (s.length - 1)) {
309: res.append(delimiter);
310: }
311: }
312:
313: return res.toString();
314: }
315:
316: /**
317: * clears the begin and end of a string from the strings sumitted
318: *
319: * @param s
320: * string to validate
321: * @param mark
322: * string to remove from begin and end of <code>s</code>
323: */
324: public static String validateString(String s, String mark) {
325: if (s == null) {
326: return null;
327: }
328:
329: if (s.length() == 0) {
330: return s;
331: }
332:
333: s = s.trim();
334:
335: while (s.startsWith(mark)) {
336: s = s.substring(mark.length(), s.length()).trim();
337: }
338:
339: while (s.endsWith(mark)) {
340: s = s.substring(0, s.length() - mark.length()).trim();
341: }
342:
343: return s;
344: }
345:
346: /**
347: * deletes all double entries from the submitted array
348: */
349: public static String[] deleteDoubles(String[] s) {
350: ArrayList<String> vec = new ArrayList<String>(s.length);
351:
352: for (int i = 0; i < s.length; i++) {
353: if (!vec.contains(s[i])) {
354: vec.add(s[i]);
355: }
356: }
357:
358: return vec.toArray(new String[vec.size()]);
359: }
360:
361: /**
362: * removes all fields from the array that equals <code>s</code>
363: *
364: * @param target
365: * array where to remove the submitted string
366: * @param s
367: * string to remove
368: */
369: public static String[] removeFromArray(String[] target, String s) {
370: ArrayList<String> vec = new ArrayList<String>(target.length);
371:
372: for (int i = 0; i < target.length; i++) {
373: if (!target[i].equals(s)) {
374: vec.add(target[i]);
375: }
376: }
377:
378: return vec.toArray(new String[vec.size()]);
379: }
380:
381: /**
382: * checks if the submitted array contains the string <code>value</code>
383: *
384: * @param target
385: * array to check if it contains <code>value</code>
386: * @param value
387: * string to check if it within the array
388: */
389: public static boolean contains(String[] target, String value) {
390: if (target == null || value == null) {
391: return false;
392: }
393:
394: if (value.endsWith(",")) {
395: value = value.substring(0, value.length() - 1);
396: }
397:
398: for (int i = 0; i < target.length; i++) {
399: if (value.equalsIgnoreCase(target[i])) {
400: return true;
401: }
402: }
403:
404: return false;
405: }
406:
407: /**
408: * convert the array of string like [(x1,y1),(x2,y2)...] into an array of double
409: * [x1,y1,x2,y2...]
410: *
411: * @param s
412: * @param delimiter
413: *
414: * @return
415: */
416: public static double[] toArrayDouble(String s, String delimiter) {
417: if (s == null) {
418: return null;
419: }
420:
421: if (s.equals("")) {
422: return null;
423: }
424:
425: StringTokenizer st = new StringTokenizer(s, delimiter);
426:
427: ArrayList<String> vec = new ArrayList<String>(st.countTokens());
428:
429: for (int i = 0; st.hasMoreTokens(); i++) {
430: String t = st.nextToken().replace(' ', '+');
431:
432: if ((t != null) && (t.length() > 0)) {
433: vec.add(t.trim().replace(',', '.'));
434: }
435: }
436:
437: double[] array = new double[vec.size()];
438:
439: for (int i = 0; i < vec.size(); i++) {
440: array[i] = Double.parseDouble(vec.get(i));
441: }
442:
443: return array;
444: }
445:
446: /**
447: * convert the array of string like [(x1,y1),(x2,y2)...] into an array of
448: * float values [x1,y1,x2,y2...]
449: *
450: * @param s
451: * @param delimiter
452: *
453: * @return
454: */
455: public static float[] toArrayFloat(String s, String delimiter) {
456: if (s == null) {
457: return null;
458: }
459:
460: if (s.equals("")) {
461: return null;
462: }
463:
464: StringTokenizer st = new StringTokenizer(s, delimiter);
465:
466: ArrayList<String> vec = new ArrayList<String>(st.countTokens());
467: for (int i = 0; st.hasMoreTokens(); i++) {
468: String t = st.nextToken().replace(' ', '+');
469: if ((t != null) && (t.length() > 0)) {
470: vec.add(t.trim().replace(',', '.'));
471: }
472: }
473:
474: float[] array = new float[vec.size()];
475:
476: for (int i = 0; i < vec.size(); i++) {
477: array[i] = Float.parseFloat(vec.get(i));
478: }
479:
480: return array;
481: }
482:
483: /**
484: * transforms an array of StackTraceElements into a String
485: */
486: public static String stackTraceToString(StackTraceElement[] se) {
487:
488: StringBuffer sb = new StringBuffer();
489: for (int i = 0; i < se.length; i++) {
490: sb.append(se[i].getClassName() + " ");
491: sb.append(se[i].getFileName() + " ");
492: sb.append(se[i].getMethodName() + "(");
493: sb.append(se[i].getLineNumber() + ")\n");
494: }
495: return sb.toString();
496: }
497:
498: /**
499: * gets the stacktrace array from the passed Excption and transforms it into a String
500: */
501: public static String stackTraceToString(Throwable e) {
502:
503: StackTraceElement[] se = e.getStackTrace();
504: StringBuffer sb = new StringBuffer();
505: sb.append(e.getMessage()).append("\n");
506: sb.append(e.getClass().getName()).append("\n");
507: for (int i = 0; i < se.length; i++) {
508: sb.append(se[i].getClassName() + " ");
509: sb.append(se[i].getFileName() + " ");
510: sb.append(se[i].getMethodName() + "(");
511: sb.append(se[i].getLineNumber() + ")\n");
512: if (i > 4)
513: break;
514: }
515: return sb.toString();
516: }
517:
518: /**
519: * countString count the occurrences of token into target
520: *
521: * @param target
522: * @param token
523: *
524: * @return
525: */
526: public static int countString(String target, String token) {
527: int start = target.indexOf(token);
528: int count = 0;
529:
530: while (start != -1) {
531: count++;
532: start = target.indexOf(token, start + 1);
533: }
534:
535: return count;
536: }
537:
538: /**
539: * Extract all the strings that begin with "start" and end with "end" and store it into an array
540: * of String
541: *
542: * @param target
543: * @param startString
544: * @param endString
545: *
546: * @return <code>null</code> if no strings were found!!
547: */
548: public static String[] extractStrings(String target,
549: String startString, String endString) {
550: int start = target.indexOf(startString);
551:
552: if (start == -1) {
553: return null;
554: }
555:
556: int count = countString(target, startString);
557: String[] subString = null;
558: if (startString.equals(endString)) {
559: count = count / 2;
560: subString = new String[count];
561: for (int i = 0; i < count; i++) {
562: int tmp = target.indexOf(endString, start + 1);
563: subString[i] = target.substring(start, tmp + 1);
564: start = target.indexOf(startString, tmp + 1);
565: }
566: } else {
567: subString = new String[count];
568: for (int i = 0; i < count; i++) {
569: subString[i] = target.substring(start, target.indexOf(
570: endString, start + 1) + 1);
571: subString[i] = extractString(subString[i], startString,
572: endString, true, true);
573: start = target.indexOf(startString, start + 1);
574: }
575: }
576:
577: return subString;
578: }
579:
580: /**
581: * extract a string contained between startDel and endDel, you can remove the delimiters if set
582: * true the parameters delStart and delEnd
583: *
584: * @param target
585: * @param startDel
586: * @param endDel
587: * @param delStart
588: * @param delEnd
589: *
590: * @return
591: */
592: public static String extractString(String target, String startDel,
593: String endDel, boolean delStart, boolean delEnd) {
594: int start = target.indexOf(startDel);
595:
596: if (start == -1) {
597: return null;
598: }
599:
600: String s = target.substring(start, target.indexOf(endDel,
601: start + 1) + 1);
602:
603: s = s.trim();
604:
605: if (delStart) {
606: while (s.startsWith(startDel)) {
607: s = s.substring(startDel.length(), s.length()).trim();
608: }
609: }
610:
611: if (delEnd) {
612: while (s.endsWith(endDel)) {
613: s = s.substring(0, s.length() - endDel.length()).trim();
614: }
615: }
616:
617: return s;
618: }
619:
620: /**
621: * Initialize the substitution map with all normalization rules for a given locale and
622: * add this map to the static localeMap.
623: *
624: * @param locale
625: * @throws IOException
626: * @throws SAXException
627: * @throws XMLParsingException
628: */
629: private static void initMap(String locale) throws IOException,
630: SAXException, XMLParsingException {
631:
632: // read normalization file
633: StringBuffer sb = new StringBuffer(1000);
634: InputStream is = StringTools.class
635: .getResourceAsStream("/normalization.xml");
636: if (is == null) {
637: is = StringTools.class
638: .getResourceAsStream("normalization.xml");
639: }
640: BufferedReader br = new BufferedReader(
641: new InputStreamReader(is));
642: String s = null;
643: while ((s = br.readLine()) != null) {
644: sb.append(s);
645: }
646: br.close();
647:
648: // transform into xml fragment
649: XMLFragment xml = new XMLFragment();
650: xml.load(new StringReader(sb.toString()), StringTools.class
651: .getResource("normalization.xml").toString()); //FIXME
652:
653: // create map
654: Map<String, String> substitutionMap = new HashMap<String, String>(
655: 20);
656:
657: // extract case attrib ( "toLower" or "toUpper" or missing ) for passed locale
658: String xpath = "Locale[@name = '"
659: + Locale.GERMANY.getLanguage() + "']/@case";
660: String letterCase = XMLTools.getNodeAsString(xml
661: .getRootElement(), xpath, CommonNamespaces
662: .getNamespaceContext(), null);
663: if (letterCase != null) {
664: substitutionMap.put("case", letterCase);
665: }
666:
667: // extract removeDoubles attrib ( "true" or "false" ) for passed locale
668: xpath = "Locale[@name = '" + Locale.GERMANY.getLanguage()
669: + "']/@removeDoubles";
670: String removeDoubles = XMLTools.getNodeAsString(xml
671: .getRootElement(), xpath, CommonNamespaces
672: .getNamespaceContext(), null);
673: if (removeDoubles != null && removeDoubles.length() > 0) {
674: substitutionMap.put("removeDoubles", removeDoubles);
675: }
676:
677: // extract rules section for passed locale
678: xpath = "Locale[@name = '" + locale + "']/Rule";
679: List list = XMLTools.getNodes(xml.getRootElement(), xpath,
680: CommonNamespaces.getNamespaceContext());
681: if (list != null) {
682: for (int i = 0; i < list.size(); i++) {
683: String src = XMLTools.getRequiredNodeAsString(
684: (Node) list.get(i), "Source", CommonNamespaces
685: .getNamespaceContext());
686: String target = XMLTools.getRequiredNodeAsString(
687: (Node) list.get(i), "Target", CommonNamespaces
688: .getNamespaceContext());
689: substitutionMap.put(src, target);
690: }
691: }
692:
693: // init localeMap if needed
694: if (localeMap == null) {
695: localeMap = new HashMap<String, Map<String, String>>(20);
696: }
697:
698: localeMap.put(locale, substitutionMap);
699: }
700:
701: /**
702: * The passed string gets normalized along the rules for the given locale as they are set in
703: * the file "./normalization.xml".
704: * If such rules are specified, the following order is obeyed:
705: *
706: * <ol>
707: * <li>if the attribute "case" is set with "toLower" or "toUpper", the letters are switched
708: * to lower case or to upper case respectively.</li>
709: * <li>all rules given in the "Rule" elements are performed.</li>
710: * <li>if the attribute "removeDoubles" is set and not empty, all multi occurences of the
711: * letters given in this attribute are reduced to a single occurence.</li>
712: * </ol>
713: *
714: * @param source the String to normalize
715: * @param locale the locale language defining the rules to choose, e.g. "de"
716: * @return the normalized String
717: * @throws IOException
718: * @throws SAXException
719: * @throws XMLParsingException
720: */
721: public static String normalizeString(String source, String locale)
722: throws IOException, SAXException, XMLParsingException {
723:
724: if (localeMap == null) {
725: localeMap = new HashMap<String, Map<String, String>>(20);
726: }
727: Map<String, String> substitutionMap = localeMap.get(locale);
728:
729: if (substitutionMap == null) {
730: initMap(locale);
731: }
732: substitutionMap = localeMap.get(locale);
733:
734: String output = source;
735: Set<String> keys = substitutionMap.keySet();
736:
737: boolean toUpper = false;
738: boolean toLower = false;
739: boolean removeDoubles = false;
740:
741: for (String key : keys) {
742: if ("case".equals(key)) {
743: toUpper = "toUpper".equals(substitutionMap.get(key));
744: toLower = "toLower".equals(substitutionMap.get(key));
745: }
746: if ("removeDoubles".equals(key)
747: && substitutionMap.get(key).length() > 0) {
748: removeDoubles = true;
749: }
750: }
751:
752: // first: change letters to upper / lower case
753: if (toUpper) {
754: output = output.toUpperCase();
755: } else if (toLower) {
756: output = output.toLowerCase();
757: }
758:
759: // second: change string according to specified rules
760: for (String key : keys) {
761: if (!"case".equals(key) && !"removeDoubles".equals(key)) {
762: output = output.replaceAll(key, substitutionMap
763: .get(key));
764: }
765: }
766:
767: // third: remove doubles
768: if (removeDoubles) {
769: String doubles = substitutionMap.get("removeDoubles");
770: for (int i = 0; i < doubles.length(); i++) {
771: String remove = "" + doubles.charAt(i) + "+";
772: String replaceWith = "" + doubles.charAt(i);
773: output = output.replaceAll(remove, replaceWith);
774: }
775: }
776: return output;
777: }
778: }
|