001: package com.quadcap.util.text;
002:
003: /* Copyright 1997 - 2003 Quadcap Software. All rights reserved.
004: *
005: * This software is distributed under the Quadcap Free Software License.
006: * This software may be used or modified for any purpose, personal or
007: * commercial. Open Source redistributions are permitted. Commercial
008: * redistribution of larger works derived from, or works which bundle
009: * this software requires a "Commercial Redistribution License"; see
010: * http://www.quadcap.com/purchase.
011: *
012: * Redistributions qualify as "Open Source" under one of the following terms:
013: *
014: * Redistributions are made at no charge beyond the reasonable cost of
015: * materials and delivery.
016: *
017: * Redistributions are accompanied by a copy of the Source Code or by an
018: * irrevocable offer to provide a copy of the Source Code for up to three
019: * years at the cost of materials and delivery. Such redistributions
020: * must allow further use, modification, and redistribution of the Source
021: * Code under substantially the same terms as this license.
022: *
023: * Redistributions of source code must retain the copyright notices as they
024: * appear in each source code file, these license terms, and the
025: * disclaimer/limitation of liability set forth as paragraph 6 below.
026: *
027: * Redistributions in binary form must reproduce this Copyright Notice,
028: * these license terms, and the disclaimer/limitation of liability set
029: * forth as paragraph 6 below, in the documentation and/or other materials
030: * provided with the distribution.
031: *
032: * The Software is provided on an "AS IS" basis. No warranty is
033: * provided that the Software is free of defects, or fit for a
034: * particular purpose.
035: *
036: * Limitation of Liability. Quadcap Software shall not be liable
037: * for any damages suffered by the Licensee or any third party resulting
038: * from use of the Software.
039: */
040:
041: import java.util.ArrayList;
042:
043: /**
044: * This class is yet another approach to string parsing. It's basically
045: * an anchored glob-style match.
046: *
047: * @author Stan Bailes
048: */
049: public class Text {
050: /**
051: * Extract strings matching glob patterns
052: *
053: * Examples:
054: * sv[0,1,2] = extractN(w, "*(*)*")
055: * sv[0-3] = extractN(w, "*:*:*:*");
056: */
057:
058: public static String[] extractN(String s, String p) {
059: return extractN(s, p, '*');
060: }
061:
062: public static String[] extractMatching(String s, String[] p, char d) {
063: // ret[ret.length-1] = String.valueOf(idx)
064: // where idx is the index of the matching pattern
065: String[] ret = null;
066: int i;
067: for (i = 0; i < p.length; i++) {
068: ret = extractN(s, p[i], d);
069: if (ret != null) {
070: break;
071: }
072: }
073: if (ret != null) {
074: String[] nret = new String[ret.length + 1];
075: System.arraycopy(ret, 0, nret, 0, ret.length);
076: nret[ret.length] = String.valueOf(i);
077: ret = nret;
078: }
079: return ret;
080: }
081:
082: public static String[] extractN(String s, String p, char d) {
083: if (p == null || p.length() == 0)
084: p = "" + d;
085: if (p.charAt(0) != d)
086: p = "" + d + p;
087: if (p.charAt(p.length() - 1) != d)
088: p = p + d;
089:
090: // count glob instances, that's the number of strings to return
091: int cnt = 0;
092: StringBuffer sb = new StringBuffer();
093: ArrayList list = new ArrayList();
094: for (int i = 0; i < p.length(); i++) {
095: char c = p.charAt(i);
096: if (c == d) {
097: cnt++;
098: list.add(sb.toString());
099: //msg("[" + sb + "]");
100: sb.setLength(0);
101: } else {
102: sb.append(c);
103: }
104: }
105: list.add(sb.toString());
106: if (cnt < list.size())
107: cnt = list.size();
108: String[] lits = new String[cnt];
109: for (int i = 0; i < cnt; i++) {
110: lits[i] = list.get(i).toString();
111: }
112: String[] ret = new String[cnt - 1];
113: int[] ixs = new int[cnt];
114: int lev = 0;
115: int rlev = 0;
116: for (int i = 0; lev >= 0 && lev < cnt; i++) {
117: int pre = ixs[lev];
118: final int len = lits[lev].length();
119: int idx = len == 0 ? pre : s.indexOf(lits[lev], pre);
120: if (idx < 0) {
121: lev--;
122: } else {
123: if (len == 0) {
124: ++lev;
125: if (i == 0) { // first time through
126: if (lev < cnt) {
127: rlev--;
128: ixs[lev] = idx;
129: }
130: } else if (lev == 1) {
131: lev = -1;
132: } else {
133: ret[lev + rlev - 1] = s.substring(idx);
134: }
135: } else {
136: ret[lev + rlev] = s.substring(pre, idx);
137: ixs[lev] = idx + len;
138: if (++lev < cnt) {
139: ixs[lev] = idx + len;
140: }
141: }
142: }
143: }
144: //#ifdef DEBUG
145: // sb.setLength(0);
146: // for (int i = 0; i < ret.length; i++) {
147: // sb.append("\n[" + i + "] " + ret[i]);
148: // }
149: // sb.append("\n");
150: //msg("extract(" + s + ") [" + p + "]: " + sb);
151: //#endif
152: return lev < cnt ? null : ret;
153: }
154:
155: /**
156: * Extract a substring
157: *
158: * example:
159: *
160: * extract(s, "*.gif", 0);
161: * extract(s, "http://*index.html", 1);
162: */
163: public static String extract(String s, String p, int n) {
164: return extract(s, p, n, '*');
165: }
166:
167: public static String extract(String s, String p, int n, char d) {
168: String[] ret = extractN(s, p, d);
169: if (ret == null) {
170: return s;
171: } else {
172: return ret[n];
173: }
174: }
175:
176: }
|