001: /*******************************************************************************
002: * Copyright (c) 2000, 2007 IBM Corporation and others.
003: * All rights reserved. This program and the accompanying materials
004: * are made available under the terms of the Eclipse Public License v1.0
005: * which accompanies this distribution, and is available at
006: * http://www.eclipse.org/legal/epl-v10.html
007: *
008: * Contributors:
009: * IBM Corporation - initial API and implementation
010: *******************************************************************************/package org.eclipse.jface.text;
011:
012: import java.util.regex.Matcher;
013: import java.util.regex.Pattern;
014: import java.util.regex.PatternSyntaxException;
015:
016: import org.eclipse.core.runtime.Assert;
017:
018: /**
019: * Provides search and replace operations on
020: * {@link org.eclipse.jface.text.IDocument}.
021: * <p>
022: * Replaces
023: * {@link org.eclipse.jface.text.IDocument#search(int, String, boolean, boolean, boolean)}.
024: *
025: * @since 3.0
026: */
027: public class FindReplaceDocumentAdapter implements CharSequence {
028:
029: /**
030: * Internal type for operation codes.
031: */
032: private static class FindReplaceOperationCode {
033: }
034:
035: // Find/replace operation codes.
036: private static final FindReplaceOperationCode FIND_FIRST = new FindReplaceOperationCode();
037: private static final FindReplaceOperationCode FIND_NEXT = new FindReplaceOperationCode();
038: private static final FindReplaceOperationCode REPLACE = new FindReplaceOperationCode();
039: private static final FindReplaceOperationCode REPLACE_FIND_NEXT = new FindReplaceOperationCode();
040:
041: /**
042: * The adapted document.
043: */
044: private IDocument fDocument;
045:
046: /**
047: * State for findReplace.
048: */
049: private FindReplaceOperationCode fFindReplaceState = null;
050:
051: /**
052: * The matcher used in findReplace.
053: */
054: private Matcher fFindReplaceMatcher;
055:
056: /**
057: * The match offset from the last findReplace call.
058: */
059: private int fFindReplaceMatchOffset;
060:
061: /**
062: * Constructs a new find replace document adapter.
063: *
064: * @param document the adapted document
065: */
066: public FindReplaceDocumentAdapter(IDocument document) {
067: Assert.isNotNull(document);
068: fDocument = document;
069: }
070:
071: /**
072: * Returns the location of a given string in this adapter's document based on a set of search criteria.
073: *
074: * @param startOffset document offset at which search starts
075: * @param findString the string to find
076: * @param forwardSearch the search direction
077: * @param caseSensitive indicates whether lower and upper case should be distinguished
078: * @param wholeWord indicates whether the findString should be limited by white spaces as
079: * defined by Character.isWhiteSpace. Must not be used in combination with <code>regExSearch</code>.
080: * @param regExSearch if <code>true</code> findString represents a regular expression
081: * Must not be used in combination with <code>wholeWord</code>.
082: * @return the find or replace region or <code>null</code> if there was no match
083: * @throws BadLocationException if startOffset is an invalid document offset
084: * @throws PatternSyntaxException if a regular expression has invalid syntax
085: */
086: public IRegion find(int startOffset, String findString,
087: boolean forwardSearch, boolean caseSensitive,
088: boolean wholeWord, boolean regExSearch)
089: throws BadLocationException {
090: Assert.isTrue(!(regExSearch && wholeWord));
091:
092: // Adjust offset to special meaning of -1
093: if (startOffset == -1 && forwardSearch)
094: startOffset = 0;
095: if (startOffset == -1 && !forwardSearch)
096: startOffset = length() - 1;
097:
098: return findReplace(FIND_FIRST, startOffset, findString, null,
099: forwardSearch, caseSensitive, wholeWord, regExSearch);
100: }
101:
102: /**
103: * Stateful findReplace executes a FIND, REPLACE, REPLACE_FIND or FIND_FIRST operation.
104: * In case of REPLACE and REPLACE_FIND it sends a <code>DocumentEvent</code> to all
105: * registered <code>IDocumentListener</code>.
106: *
107: * @param startOffset document offset at which search starts
108: * this value is only used in the FIND_FIRST operation and otherwise ignored
109: * @param findString the string to find
110: * this value is only used in the FIND_FIRST operation and otherwise ignored
111: * @param replaceText the string to replace the current match
112: * this value is only used in the REPLACE and REPLACE_FIND operations and otherwise ignored
113: * @param forwardSearch the search direction
114: * @param caseSensitive indicates whether lower and upper case should be distinguished
115: * @param wholeWord indicates whether the findString should be limited by white spaces as
116: * defined by Character.isWhiteSpace. Must not be used in combination with <code>regExSearch</code>.
117: * @param regExSearch if <code>true</code> this operation represents a regular expression
118: * Must not be used in combination with <code>wholeWord</code>.
119: * @param operationCode specifies what kind of operation is executed
120: * @return the find or replace region or <code>null</code> if there was no match
121: * @throws BadLocationException if startOffset is an invalid document offset
122: * @throws IllegalStateException if a REPLACE or REPLACE_FIND operation is not preceded by a successful FIND operation
123: * @throws PatternSyntaxException if a regular expression has invalid syntax
124: */
125: private IRegion findReplace(
126: final FindReplaceOperationCode operationCode,
127: int startOffset, String findString, String replaceText,
128: boolean forwardSearch, boolean caseSensitive,
129: boolean wholeWord, boolean regExSearch)
130: throws BadLocationException {
131:
132: // Validate option combinations
133: Assert.isTrue(!(regExSearch && wholeWord));
134:
135: // Validate state
136: if ((operationCode == REPLACE || operationCode == REPLACE_FIND_NEXT)
137: && (fFindReplaceState != FIND_FIRST && fFindReplaceState != FIND_NEXT))
138: throw new IllegalStateException(
139: "illegal findReplace state: cannot replace without preceding find"); //$NON-NLS-1$
140:
141: if (operationCode == FIND_FIRST) {
142: // Reset
143:
144: if (findString == null || findString.length() == 0)
145: return null;
146:
147: // Validate start offset
148: if (startOffset < 0 || startOffset >= length())
149: throw new BadLocationException();
150:
151: int patternFlags = 0;
152:
153: if (regExSearch) {
154: patternFlags |= Pattern.MULTILINE;
155: findString = substituteLinebreak(findString);
156: }
157:
158: if (!caseSensitive)
159: patternFlags |= Pattern.CASE_INSENSITIVE
160: | Pattern.UNICODE_CASE;
161:
162: if (wholeWord)
163: findString = "\\b" + findString + "\\b"; //$NON-NLS-1$ //$NON-NLS-2$
164:
165: if (!regExSearch && !wholeWord)
166: findString = asRegPattern(findString);
167:
168: fFindReplaceMatchOffset = startOffset;
169: if (fFindReplaceMatcher != null
170: && fFindReplaceMatcher.pattern().pattern().equals(
171: findString)
172: && fFindReplaceMatcher.pattern().flags() == patternFlags) {
173: /*
174: * Commented out for optimization:
175: * The call is not needed since FIND_FIRST uses find(int) which resets the matcher
176: */
177: // fFindReplaceMatcher.reset();
178: } else {
179: Pattern pattern = Pattern.compile(findString,
180: patternFlags);
181: fFindReplaceMatcher = pattern.matcher(this );
182: }
183: }
184:
185: // Set state
186: fFindReplaceState = operationCode;
187:
188: if (operationCode == REPLACE
189: || operationCode == REPLACE_FIND_NEXT) {
190: if (regExSearch) {
191: Pattern pattern = fFindReplaceMatcher.pattern();
192: Matcher replaceTextMatcher = pattern
193: .matcher(fFindReplaceMatcher.group());
194: try {
195: replaceText = interpretReplaceEscapes(replaceText);
196: replaceText = replaceTextMatcher
197: .replaceFirst(replaceText);
198: } catch (IndexOutOfBoundsException ex) {
199: throw new PatternSyntaxException(ex
200: .getLocalizedMessage(), replaceText, -1);
201: }
202: }
203:
204: int offset = fFindReplaceMatcher.start();
205: fDocument.replace(offset, fFindReplaceMatcher.group()
206: .length(), replaceText);
207:
208: if (operationCode == REPLACE) {
209: return new Region(offset, replaceText.length());
210: }
211: }
212:
213: if (operationCode != REPLACE) {
214: if (forwardSearch) {
215:
216: boolean found = false;
217: if (operationCode == FIND_FIRST)
218: found = fFindReplaceMatcher.find(startOffset);
219: else
220: found = fFindReplaceMatcher.find();
221:
222: if (operationCode == REPLACE_FIND_NEXT)
223: fFindReplaceState = FIND_NEXT;
224:
225: if (found && fFindReplaceMatcher.group().length() > 0)
226: return new Region(fFindReplaceMatcher.start(),
227: fFindReplaceMatcher.group().length());
228: return null;
229: }
230:
231: // backward search
232: boolean found = fFindReplaceMatcher.find(0);
233: int index = -1;
234: int length = -1;
235: while (found
236: && fFindReplaceMatcher.start()
237: + fFindReplaceMatcher.group().length() <= fFindReplaceMatchOffset + 1) {
238: index = fFindReplaceMatcher.start();
239: length = fFindReplaceMatcher.group().length();
240: found = fFindReplaceMatcher.find(index + 1);
241: }
242: fFindReplaceMatchOffset = index;
243: if (index > -1) {
244: // must set matcher to correct position
245: fFindReplaceMatcher.find(index);
246: return new Region(index, length);
247: }
248: return null;
249: }
250:
251: return null;
252: }
253:
254: /**
255: * Substitutes \R in a regex find pattern with (?>\r\n?|\n)
256: *
257: * @param findString the original find pattern
258: * @return the transformed find pattern
259: * @throws PatternSyntaxException if \R is added at an illegal position (e.g. in a character set)
260: * @since 3.4
261: */
262: private String substituteLinebreak(String findString)
263: throws PatternSyntaxException {
264: int length = findString.length();
265: StringBuffer buf = new StringBuffer(length);
266:
267: int inCharGroup = 0;
268: int inBraces = 0;
269: boolean inQuote = false;
270: for (int i = 0; i < length; i++) {
271: char ch = findString.charAt(i);
272: switch (ch) {
273: case '[':
274: buf.append(ch);
275: if (!inQuote)
276: inCharGroup++;
277: break;
278:
279: case ']':
280: buf.append(ch);
281: if (!inQuote)
282: inCharGroup--;
283: break;
284:
285: case '{':
286: buf.append(ch);
287: if (!inQuote && inCharGroup == 0)
288: inBraces++;
289: break;
290:
291: case '}':
292: buf.append(ch);
293: if (!inQuote && inCharGroup == 0)
294: inBraces--;
295: break;
296:
297: case '\\':
298: if (i + 1 < length) {
299: char ch1 = findString.charAt(i + 1);
300: if (inQuote) {
301: if (ch1 == 'E')
302: inQuote = false;
303: buf.append(ch).append(ch1);
304: i++;
305:
306: } else if (ch1 == 'R') {
307: if (inCharGroup > 0 || inBraces > 0) {
308: String msg = TextMessages
309: .getString("FindReplaceDocumentAdapter.illegalLinebreak"); //$NON-NLS-1$
310: throw new PatternSyntaxException(msg,
311: findString, i);
312: }
313: buf.append("(?>\\r\\n?|\\n)"); //$NON-NLS-1$
314: i++;
315:
316: } else {
317: if (ch1 == 'Q') {
318: inQuote = true;
319: }
320: buf.append(ch).append(ch1);
321: i++;
322: }
323: } else {
324: buf.append(ch);
325: }
326: break;
327:
328: default:
329: buf.append(ch);
330: break;
331: }
332:
333: }
334: return buf.toString();
335: }
336:
337: /**
338: * Interprets escaped characters in the given replace pattern.
339: *
340: * @param replaceText the replace pattern
341: * @return a replace pattern with escaped characters substituted by the respective characters
342: * @since 3.4
343: */
344: private String interpretReplaceEscapes(String replaceText) {
345: int length = replaceText.length();
346: boolean inEscape = false;
347: StringBuffer buf = new StringBuffer(length);
348:
349: for (int i = 0; i < length; i++) {
350: final char ch = replaceText.charAt(i);
351: if (inEscape) {
352: i = interpretReplaceEscape(ch, i, buf, replaceText);
353: inEscape = false;
354:
355: } else if (ch == '\\') {
356: inEscape = true;
357:
358: } else if (ch == '$') {
359: buf.append(ch);
360:
361: /*
362: * Feature in java.util.regex.Matcher#replaceFirst(String):
363: * $00, $000, etc. are interpreted as $0 and
364: * $01, $001, etc. are interpreted as $1, etc. .
365: * If we support \0 as replacement pattern for capturing group 0,
366: * it would not be possible any more to write a replacement pattern
367: * that appends 0 to a capturing group (like $0\0).
368: * The fix is to interpret \00 and $00 as $0\0, and
369: * \01 and $01 as $0\1, etc.
370: */
371: if (i + 2 < length) {
372: char ch1 = replaceText.charAt(i + 1);
373: char ch2 = replaceText.charAt(i + 2);
374: if (ch1 == '0' && '0' <= ch2 && ch2 <= '9') {
375: buf.append("0\\"); //$NON-NLS-1$
376: i++; // consume the 0
377: }
378: }
379: } else {
380: buf.append(ch);
381: }
382: }
383:
384: if (inEscape) {
385: // '\' as last character is invalid, but we still add it to get an error message
386: buf.append('\\');
387: }
388: return buf.toString();
389: }
390:
391: /**
392: * Interprets the escaped character <code>ch</code> at offset <code>i</code>
393: * of the <code>replaceText</code> and appends the interpretation to <code>buf</code>.
394: *
395: * @param ch the escaped character
396: * @param i the offset
397: * @param buf the output buffer
398: * @param replaceText the original replace pattern
399: * @return the new offset
400: * @since 3.4
401: */
402: private int interpretReplaceEscape(final char ch, int i,
403: StringBuffer buf, String replaceText) {
404: int length = replaceText.length();
405: switch (ch) {
406: case 'r':
407: buf.append('\r');
408: break;
409: case 'n':
410: buf.append('\n');
411: break;
412: case 't':
413: buf.append('\t');
414: break;
415: case 'f':
416: buf.append('\f');
417: break;
418: case 'a':
419: buf.append('\u0007');
420: break;
421: case 'e':
422: buf.append('\u001B');
423: break;
424: case 'R': //see http://www.unicode.org/unicode/reports/tr18/#Line_Boundaries
425: buf
426: .append(TextUtilities
427: .getDefaultLineDelimiter(fDocument));
428: break;
429: /*
430: * \0 for octal is not supported in replace string, since it
431: * would conflict with capturing group \0, etc.
432: */
433: case '0':
434: buf.append('$').append(ch);
435: /*
436: * See explanation in "Feature in java.util.regex.Matcher#replaceFirst(String)"
437: * in interpretReplaceEscape(String) above.
438: */
439: if (i + 1 < length) {
440: char ch1 = replaceText.charAt(i + 1);
441: if ('0' <= ch1 && ch1 <= '9') {
442: buf.append('\\');
443: }
444: }
445: break;
446:
447: case '1':
448: case '2':
449: case '3':
450: case '4':
451: case '5':
452: case '6':
453: case '7':
454: case '8':
455: case '9':
456: buf.append('$').append(ch);
457: break;
458:
459: case 'c':
460: if (i + 1 < length) {
461: char ch1 = replaceText.charAt(i + 1);
462: buf.append((char) (ch1 ^ 64));
463: i++;
464: } else {
465: String msg = TextMessages
466: .getFormattedString(
467: "FindReplaceDocumentAdapter.illegalControlEscape", "\\c"); //$NON-NLS-1$ //$NON-NLS-2$
468: throw new PatternSyntaxException(msg, replaceText, i);
469: }
470: break;
471:
472: case 'x':
473: if (i + 2 < length) {
474: int parsedInt;
475: try {
476: parsedInt = Integer.parseInt(replaceText.substring(
477: i + 1, i + 3), 16);
478: if (parsedInt < 0)
479: throw new NumberFormatException();
480: } catch (NumberFormatException e) {
481: String msg = TextMessages
482: .getFormattedString(
483: "FindReplaceDocumentAdapter.illegalHexEscape", replaceText.substring(i - 1, i + 3)); //$NON-NLS-1$
484: throw new PatternSyntaxException(msg, replaceText,
485: i);
486: }
487: buf.append((char) parsedInt);
488: i += 2;
489: } else {
490: String msg = TextMessages
491: .getFormattedString(
492: "FindReplaceDocumentAdapter.illegalHexEscape", replaceText.substring(i - 1, length)); //$NON-NLS-1$
493: throw new PatternSyntaxException(msg, replaceText, i);
494: }
495: break;
496:
497: case 'u':
498: if (i + 4 < length) {
499: int parsedInt;
500: try {
501: parsedInt = Integer.parseInt(replaceText.substring(
502: i + 1, i + 5), 16);
503: if (parsedInt < 0)
504: throw new NumberFormatException();
505: } catch (NumberFormatException e) {
506: String msg = TextMessages
507: .getFormattedString(
508: "FindReplaceDocumentAdapter.illegalUnicodeEscape", replaceText.substring(i - 1, i + 5)); //$NON-NLS-1$
509: throw new PatternSyntaxException(msg, replaceText,
510: i);
511: }
512: buf.append((char) parsedInt);
513: i += 4;
514: } else {
515: String msg = TextMessages
516: .getFormattedString(
517: "FindReplaceDocumentAdapter.illegalUnicodeEscape", replaceText.substring(i - 1, length)); //$NON-NLS-1$
518: throw new PatternSyntaxException(msg, replaceText, i);
519: }
520: break;
521:
522: default:
523: // unknown escape k: append uninterpreted \k
524: buf.append('\\').append(ch);
525: break;
526: }
527: return i;
528: }
529:
530: /**
531: * Converts a non-regex string to a pattern
532: * that can be used with the regex search engine.
533: *
534: * @param string the non-regex pattern
535: * @return the string converted to a regex pattern
536: */
537: private String asRegPattern(String string) {
538: StringBuffer out = new StringBuffer(string.length());
539: boolean quoting = false;
540:
541: for (int i = 0, length = string.length(); i < length; i++) {
542: char ch = string.charAt(i);
543: if (ch == '\\') {
544: if (quoting) {
545: out.append("\\E"); //$NON-NLS-1$
546: quoting = false;
547: }
548: out.append("\\\\"); //$NON-NLS-1$
549: continue;
550: }
551: if (!quoting) {
552: out.append("\\Q"); //$NON-NLS-1$
553: quoting = true;
554: }
555: out.append(ch);
556: }
557: if (quoting)
558: out.append("\\E"); //$NON-NLS-1$
559:
560: return out.toString();
561: }
562:
563: /**
564: * Substitutes the previous match with the given text.
565: * Sends a <code>DocumentEvent</code> to all registered <code>IDocumentListener</code>.
566: *
567: * @param text the substitution text
568: * @param regExReplace if <code>true</code> <code>text</code> represents a regular expression
569: * @return the replace region or <code>null</code> if there was no match
570: * @throws BadLocationException if startOffset is an invalid document offset
571: * @throws IllegalStateException if a REPLACE or REPLACE_FIND operation is not preceded by a successful FIND operation
572: * @throws PatternSyntaxException if a regular expression has invalid syntax
573: *
574: * @see DocumentEvent
575: * @see IDocumentListener
576: */
577: public IRegion replace(String text, boolean regExReplace)
578: throws BadLocationException {
579: return findReplace(REPLACE, -1, null, text, false, false,
580: false, regExReplace);
581: }
582:
583: // ---------- CharSequence implementation ----------
584:
585: /*
586: * @see java.lang.CharSequence#length()
587: */
588: public int length() {
589: return fDocument.getLength();
590: }
591:
592: /*
593: * @see java.lang.CharSequence#charAt(int)
594: */
595: public char charAt(int index) {
596: try {
597: return fDocument.getChar(index);
598: } catch (BadLocationException e) {
599: throw new IndexOutOfBoundsException();
600: }
601: }
602:
603: /*
604: * @see java.lang.CharSequence#subSequence(int, int)
605: */
606: public CharSequence subSequence(int start, int end) {
607: try {
608: return fDocument.get(start, end - start);
609: } catch (BadLocationException e) {
610: throw new IndexOutOfBoundsException();
611: }
612: }
613:
614: /*
615: * @see java.lang.Object#toString()
616: */
617: public String toString() {
618: return fDocument.get();
619: }
620: }
|