001: package snow.utils;
002:
003: import java.util.Arrays;
004: import tide.utils.SyntaxUtils;
005: import java.io.BufferedReader;
006: import java.io.StringReader;
007:
008: public final class StringUtils {
009: private StringUtils() {
010: }
011:
012: /** Used for example to detect html files (starting with <pre><html></pre>)
013: ignores the blanks at the text start
014: */
015: public static boolean startsWithIgnoresCaseAndBlanks(String text,
016: String start) {
017:
018: // Can be boost with Knuth's algorithm for fast search... ###
019: for (int i = 0; i < text.length() - start.length(); i++) {
020: //System.out.println("\""+text.substring(i, i+start.length())+"\"");
021: if (text.substring(i, i + start.length()).equalsIgnoreCase(
022: start))
023: return true;
024:
025: if (text.charAt(i) == ' ' || text.charAt(i) == '\r'
026: || text.charAt(i) == '\n' || text.charAt(i) == '\t') {
027: continue;
028: }
029: break;
030: }
031: return false;
032: }
033:
034: /** Finds the index of <tagName
035: @return the position of the first letter of the tag, not <
036: */
037: public static int indexOfStartTagIgnoreCase(String text,
038: String tagName) {
039: for (int i = 0; i < text.length() - tagName.length(); i++) {
040: if (text.charAt(i) == '<') {
041: String tt = text.substring(i + 1, i + 1
042: + tagName.length());
043: //System.out.println("."+tt+".");
044: if (tt.compareToIgnoreCase(tagName) == 0) {
045: return i + 1;
046: }
047: }
048: }
049: return -1;
050: }
051:
052: /** TODO: boost (copy java.lang.String's method)
053: */
054: public static int indexOfIgnoreCases_SLOW(String src,
055: String toSearch) {
056: return src.toUpperCase().indexOf(toSearch.toUpperCase());
057: }
058:
059: /** Finds the index of tagName>
060: @return the position of >
061: */
062: public static int indexOfEndTagIgnoreCase(String text,
063: String tagName) {
064: for (int i = tagName.length(); i < text.length(); i++) {
065: if (text.charAt(i) == '>') {
066: String tt = text.substring(i - tagName.length(), i);
067: //System.out.println("."+tt+".");
068: if (tt.compareToIgnoreCase(tagName) == 0)
069: return i;
070: }
071: }
072: return -1;
073: }
074:
075: public static String formatTime(long millis) {
076: if (millis == 0)
077: return "0 s";
078: if (millis < 0)
079: return "-" + formatTime(-millis); // BE CAREFUL OF INFINITE RECURSION
080:
081: if (millis < 1000)
082: return millis + " ms";
083: int sec = (int) millis / 1000;
084: if (millis < 60000)
085: return millis / 1000 + " s";
086: int min = sec / 60;
087: sec = sec - min * 60;
088: return min + " m " + sec + " s";
089: }
090:
091: /** remove the \r, and the spaces at the end of the lines.
092: * (tail or tailing)
093: */
094: public static String removeLineTailSpaces(String buf) {
095: StringBuilder ret = new StringBuilder(buf.length());
096: // kept in case of some non white before line end
097: StringBuilder tempSpaces = new StringBuilder();
098:
099: for (int i = 0; i < buf.length(); i++) {
100: char ci = buf.charAt(i);
101: if (ci == '\r')
102: continue; // ignore
103: else if (ci == '\n') {
104: // let fall whites !
105: if (tempSpaces.length() > 0) {
106: tempSpaces.setLength(0);
107: }
108: ret.append('\n');
109: } else if (Character.isWhitespace(ci)) {
110: tempSpaces.append(ci);
111: } else {
112: if (tempSpaces.length() > 0) {
113: // keep them
114: ret.append(tempSpaces);
115: tempSpaces.setLength(0);
116: }
117: ret.append(ci);
118: }
119: }
120: // forgt last whites...
121: return ret.toString();
122: }
123:
124: /** Prepend ind to all lines of txt.
125: */
126: public static String indent(String txt, String ind,
127: boolean exceptFirstLine) {
128: StringBuilder sb = new StringBuilder(txt.length() * 4 / 5);
129: BufferedReader br = new BufferedReader(new StringReader(txt));
130: String line = null;
131: try {
132: boolean first = true;
133: while ((line = br.readLine()) != null) {
134: if (first && exceptFirstLine) {
135: first = false;
136: sb.append(line + "\n");
137: } else {
138: sb.append(ind + line + "\n");
139: }
140: }
141: } catch (Exception e) {
142: e.printStackTrace();
143: }
144:
145: if (!txt.endsWith("\n"))
146: sb.setLength(sb.length() - 1); // don't add the last return if the origin txt hasn't one
147: return sb.toString();
148: }
149:
150: /** @return the line, where the text before elt has been removed.
151: whole line if elt not found.
152: elt is also removed.
153: */
154: public static String removeBeforeIncluded(String line, String elt) {
155: int pos = line.indexOf(elt);
156: if (pos == -1)
157: return line;
158:
159: return line.substring(pos + elt.length());
160: }
161:
162: public static String removeFirstLine(String mess) {
163: mess = StringUtils.removeBeforeIncluded(mess, "\n");
164: if (mess.endsWith("\r"))
165: mess = mess.substring(0, mess.length() - 1);
166: return mess;
167: }
168:
169: public static String removeAfterLastIncluded(String line, String elt) {
170: int pos = line.lastIndexOf(elt);
171: if (pos == -1)
172: return line;
173: return line.substring(0, pos);
174:
175: }
176:
177: /** null if one of from or end not found.
178: */
179: @edu.umd.cs.findbugs.annotations.CheckForNull
180: public static String extractFromFirstToNext_Excluded(String line,
181: String from, String to) {
182: int posFrom = line.indexOf(from);
183: if (posFrom == -1)
184: return null;
185: int posEnd = line.indexOf(to, posFrom + from.length());
186: if (posEnd == -1)
187: return null;
188:
189: return line.substring(posFrom + from.length(), posEnd);
190: }
191:
192: /** null if one of from or end not found.
193: */
194: @edu.umd.cs.findbugs.annotations.CheckForNull
195: public static String extractFromFirstToLast_Excluded(String line,
196: String from, String to) {
197: int posFrom = line.indexOf(from);
198: if (posFrom == -1)
199: return null;
200: int posEnd = line.lastIndexOf(to);
201: if (posEnd == -1)
202: return null;
203:
204: return line.substring(posFrom + from.length(), posEnd);
205: }
206:
207: /** null if not found
208: */
209: public static String extractFromStartUpToFirstExcluded(String text,
210: String upto) {
211: int posEnd = text.indexOf(upto, 0);
212: if (posEnd == -1)
213: return null;
214:
215: return text.substring(0, posEnd);
216:
217: }
218:
219: /** null if not found. Exclude the from from returned string.
220: */
221: public static String keepAfterLastExcl(String text, String from) {
222: int pos = text.lastIndexOf(from);
223: if (pos < 0) {
224: return null;
225: }
226: return text.substring(pos + from.length());
227: }
228:
229: /** First line only.
230: */
231: public static String firstLine(String text) {
232: //
233: int posRet = text.indexOf('\n');
234: if (posRet >= 0) {
235: return text.substring(0, posRet).trim();
236: }
237: return text;
238: }
239:
240: /** @return the column number (first = 1) at the given position in text. -1 if bad pos.
241:
242: that is the number of chars backward to the next return.
243: the return itself is considered to be at the end of the line.
244: used because line, col is a robust way to describe a position,
245: absolute positions are not robust.
246: a javax.swing.text.Document will not point to the same char !
247: */
248: public static int getColumnNumberForPosition(String txt, int pos) {
249: if (pos < 0)
250: return -1; // bad pos !
251: int col = 0;
252: if (pos > txt.length()) {
253: return -1; // bad pos !
254: } else if (pos == txt.length()) // important special case for the last position.
255: {
256: pos = txt.length() - 1;
257: //System.out.println("col for end pos");
258: col++;
259: }
260:
261: for (int i = pos; i >= 0; i--) {
262: if (i != pos && txt.charAt(i) == '\n')
263: return col;
264: col++;
265: }
266: return col;
267: }
268:
269: /** @return the line number, 0 for the first, -1 if not found
270: */
271: public static int getLineNumberForPosition(String txt, int pos) {
272: return SyntaxUtils.countLinesUpToPosition(txt, pos);
273: }
274:
275: /** -1 if not found
276: */
277: public static int getPositionFor(String cont, int line, int col) {
278: // locate the line:
279: int nlines = 0;
280: int pos = -1;
281: while (nlines < line) {
282: pos = cont.indexOf('\n', pos + 1);
283: if (pos < 0)
284: return -1;
285: nlines++;
286: }
287:
288: return pos + col + 1;
289: }
290:
291: /** Long strings are replaced with "start...end"
292: * @param maxLen should be approx 70
293: */
294: public static String shortFormForDisplay(String str, int maxLen) {
295: if (str == null)
296: return "ERROR: null string in shortFormForDisplay";
297: if (str.length() < maxLen)
298: return str;
299: int mk = Math.max(5, maxLen / 2 - 5);
300: return str.substring(0, mk) + " .... "
301: + str.substring(str.length() - mk, str.length());
302: }
303:
304: public static int count(String txt, String item) {
305: int pos = -item.length();
306: int count = 0;
307: while ((pos = txt.indexOf(item, pos + item.length())) >= 0) {
308: count++;
309: }
310: return count;
311: }
312:
313: public static int count(String txt, char ci) // quicker as the String version
314: {
315: int count = 0;
316: for (int i = 0; i < txt.length(); i++) {
317: if (txt.charAt(i) == ci) {
318: count++;
319: }
320: }
321: return count;
322: }
323:
324: public static String removeQuotes(String txt, char quote) {
325: if (txt.length() < 2)
326: return txt;
327: if (txt.charAt(0) == quote
328: && txt.charAt(txt.length() - 1) == quote) {
329: return txt.substring(1, txt.length() - 1);
330: }
331: return txt;
332: }
333:
334: public static String removeCharsAtEnd(String s, int n) {
335: return s.substring(0, s.length() - n);
336: }
337:
338: /** More robust than simple replace based on index position.
339: */
340: public static String replace(String cont, int line, int col,
341: int lineEnd, int colEnd, String with) {
342: int posS = getPositionFor(cont, line, col);
343: if (posS < 0)
344: throw new RuntimeException("pos not found " + line + ", "
345: + col);
346: int posE = getPositionFor(cont, lineEnd, colEnd);
347: if (posE < 0)
348: throw new RuntimeException("pos not found " + lineEnd
349: + ", " + colEnd);
350:
351: if (posS >= cont.length())
352: throw new RuntimeException("start > str end");
353: if (posE >= cont.length())
354: throw new RuntimeException("end > str end");
355:
356: return cont.substring(0, posS) + with + cont.substring(posE);
357: }
358:
359: /** More robust than simple call based on index position.
360: */
361: public static String getText(String cont, int line, int col,
362: int lineEnd, int colEnd) {
363: int posS = getPositionFor(cont, line, col);
364: if (posS < 0)
365: throw new RuntimeException("pos not found " + line + ", "
366: + col);
367: int posE = getPositionFor(cont, lineEnd, colEnd);
368: if (posE < 0)
369: throw new RuntimeException("pos not found " + lineEnd
370: + ", " + colEnd);
371:
372: if (posS >= cont.length())
373: throw new RuntimeException("start > str end");
374: if (posE >= cont.length())
375: throw new RuntimeException("end > str end");
376:
377: return cont.substring(posS, posE);
378: }
379:
380: public static int countWords(final String txt) {
381: //System.out.println(""+ Arrays.asList(txt.trim().split("\\s+")));
382: // TODO: may be quicker and more accurate to char iterate and look at isLetterOrDigit()
383: return txt.trim().split("\\s+").length;
384: }
385:
386: // test
387: public static void main(String[] a) {
388: System.out.println(""
389: + countWords("/** hello world 1 2 3 */ "));
390: System.out.println(balancedRemoveFirst("<a href=22> aa</a>bb",
391: "<a", "/a>"));
392: System.out.println("" + removeFirstLine("a\nb\nc"));
393: System.out.println(count("aaa", '['));
394: System.out.println(""
395: + removeAfterLastIncluded("Hello.a.baaa", ".")); // => "Hello.a"
396: System.out.println(""
397: + removeLineTailSpaces(" \naaa \nbbb . "));
398: for (int i = 0; i < 10; i++) {
399: String s = "\nabc\nde\nf";
400: System.out.println("" + getColumnNumberForPosition(s, i)
401: + " " + getLineNumberForPosition(s, i));
402: }
403: System.out
404: .println(""
405: + shortFormForDisplay(
406: "a simple example goind from 1 to 10 and above.",
407: 30));
408:
409: String str = "Hallo\nDas ist mein\nBeispiel";
410: System.out.println(replace(str, 1, 4, 1, 7, "Dein"));
411: }
412:
413: public static String balancedRemoveAll(String text, String open,
414: String end) {
415: // fixed point: iterate until no more replacements were made
416: String rep = balancedRemoveFirst(text, open, end);
417: while (true) {
418: String nr = balancedRemoveFirst(rep, open, end);
419: if (rep.length() == nr.length())
420: return nr;
421: rep = nr;
422: }
423: }
424:
425: /** For example removing "< * >" items as appearing in a generics type variable declaration
426: * <1<2<3> 4> 5> 6> 789
427: */
428: public static String balancedRemoveFirst(String text, String open,
429: String end) {
430: // regex are not good: they don't recognize nested items, even if reluctant pattern are used.
431: StringBuilder sb = new StringBuilder(text.length());
432:
433: int firstOpen = text.indexOf(open);
434: if (firstOpen == -1)
435: return text; // quick case.
436:
437: int depth = 1;
438: sb.append(text.substring(0, firstOpen));
439:
440: int nextOpen = firstOpen;
441: int nextEnd = firstOpen;
442: int posToSearchFrom = firstOpen + 1;
443: int n = 0;
444:
445: // step forward in the text. Analyse at each step only the first occuring of {open or end}
446: while (true) {
447: n++;
448: if (n == 10)
449: break;
450:
451: nextOpen = text.indexOf(open, posToSearchFrom);
452: nextEnd = text.indexOf(end, posToSearchFrom);
453:
454: //System.out.println("next: no="+nextOpen+" ne="+nextEnd+" depth="+depth);
455:
456: if (nextOpen < 0 && nextEnd < 0) {
457: System.out.println("Unbalanced1 " + text);
458: return text;
459: }
460:
461: if (nextOpen < 0) {
462: // no next open only nextEnd encountered.
463: depth--;
464: if (depth <= 0) {
465: sb.append(text.substring(nextEnd + end.length()));
466: break;
467: }
468: posToSearchFrom = nextEnd + 1;
469:
470: } else if (nextEnd < 0) {
471: // no next end, only next open encountered...
472: System.out.println("Unbalanced3 " + text);
473: return text;
474: } else {
475: // next open and next end exists...
476: if (nextOpen < nextEnd) {
477: // consider the next open
478: depth++;
479: posToSearchFrom = nextOpen + 1;
480: } else {
481: // consider the next end
482: depth--;
483: posToSearchFrom = nextEnd + 1;
484: if (depth <= 0) {
485: sb.append(text
486: .substring(nextEnd + end.length()));
487: break;
488: }
489:
490: }
491: }
492: }
493:
494: return sb.toString();
495: }
496:
497: }
|