01: /* PatternMatcherRecycler
02: *
03: * $Id: PatternMatcherRecycler.java 4644 2006-09-20 22:40:21Z paul_jack $
04: *
05: * Created on Dec 21, 2004
06: *
07: * Copyright (C) 2004 Internet Archive.
08: *
09: * This file is part of the Heritrix web crawler (crawler.archive.org).
10: *
11: * Heritrix is free software; you can redistribute it and/or modify
12: * it under the terms of the GNU Lesser Public License as published by
13: * the Free Software Foundation; either version 2.1 of the License, or
14: * any later version.
15: *
16: * Heritrix is distributed in the hope that it will be useful,
17: * but WITHOUT ANY WARRANTY; without even the implied warranty of
18: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: * GNU Lesser Public License for more details.
20: *
21: * You should have received a copy of the GNU Lesser Public License
22: * along with Heritrix; if not, write to the Free Software
23: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: */
25: package org.archive.util;
26:
27: import java.util.EmptyStackException;
28: import java.util.Stack;
29: import java.util.regex.Matcher;
30: import java.util.regex.Pattern;
31:
32: /**
33: * Utility class to retain a compiled Pattern and multiple corresponding
34: * Matcher instances for reuse.
35: *
36: * @author gojomo
37: */
38: public class PatternMatcherRecycler {
39: /**
40: * Upper-bound on Matcher Stacks.
41: * Profiling has the size of these Stacks tending upward over
42: * the life of a crawl. TODO: do something better than an
43: * a coarse upperbound; do something that can get GC'd in
44: * low-memory conditions.
45: */
46: private final static int MAXIMUM_STACK_SIZE = 10;
47:
48: private Pattern pattern;
49: private Stack<Matcher> matchers;
50:
51: public PatternMatcherRecycler(Pattern p) {
52: this .pattern = p;
53: this .matchers = new Stack<Matcher>();
54: }
55:
56: public Pattern getPattern() {
57: return this .pattern;
58: }
59:
60: /**
61: * Get a Matcher for the internal Pattern, against the given
62: * input sequence. Reuse an old Matcher if possible, otherwise
63: * create a new one.
64: *
65: * @param input CharSequence to match
66: * @return Matcher set against the the input sequence
67: */
68: public Matcher getMatcher(CharSequence input) {
69: if (input == null) {
70: throw new IllegalArgumentException(
71: "CharSequence 'input' must not be null");
72: }
73: try {
74: return ((Matcher) matchers.pop()).reset(input);
75: } catch (EmptyStackException e) {
76: return this .pattern.matcher(input);
77: }
78: }
79:
80: /**
81: * Return the given Matcher to the reuse stack, if stack is
82: * not already at its maximum size.
83: *
84: * @param m the Matcher to save for reuse
85: */
86: public void freeMatcher(Matcher m) {
87: if (this.matchers.size() < MAXIMUM_STACK_SIZE) {
88: matchers.push(m);
89: }
90: }
91: }
|