001: /*
002: * WebSphinx web-crawling toolkit
003: *
004: * Copyright (c) 1998-2002 Carnegie Mellon University. All rights
005: * reserved.
006: *
007: * Redistribution and use in source and binary forms, with or without
008: * modification, are permitted provided that the following conditions
009: * are met:
010: *
011: * 1. Redistributions of source code must retain the above copyright
012: * notice, this list of conditions and the following disclaimer.
013: *
014: * 2. Redistributions in binary form must reproduce the above copyright
015: * notice, this list of conditions and the following disclaimer in
016: * the documentation and/or other materials provided with the
017: * distribution.
018: *
019: * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
020: * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
021: * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
022: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
023: * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
024: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
025: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
026: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
027: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
028: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
029: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
030: *
031: */
032:
033: package websphinx.workbench;
034:
035: import java.awt.*;
036: import websphinx.*;
037: import rcm.awt.Constrain;
038:
039: public class LinkPredicateEditor extends Panel { // FIX: consider implementing java.beans.PropertyEditor
040:
041: LinkFeatureChoice choice;
042:
043: /**
044: * Make a LinkPredicateEditor.
045: */
046: public LinkPredicateEditor() {
047: setLayout(new GridBagLayout());
048: choice = new LinkFeatureChoice();
049: Constrain.add(this , choice, Constrain.labelLike(0, 0));
050: Constrain.add(this , choice.getArgs(), Constrain.areaLike(0, 1));
051: setLinkPredicate(null);
052: }
053:
054: public void setLinkPredicate(LinkPredicate pred) {
055: choice.setLinkPredicate(pred);
056: }
057:
058: public LinkPredicate getLinkPredicate() {
059: return choice.getLinkPredicate();
060: }
061:
062: }
063:
064: class LinkFeatureChoice extends FeatureChoice {
065: LinkFeatureArgs args = new LinkFeatureArgs();
066:
067: final static String NULL_FEATURE = "all links";
068: final static String URL_FEATURE = "URL";
069: final static String HTML_FEATURE = "HTML tag";
070: final static String TEXT_FEATURE = "anchor text";
071: final static String LABEL_FEATURE = "labels";
072: final static String SCRIPT_FEATURE = "script";
073:
074: public LinkFeatureChoice() {
075: addItem(NULL_FEATURE);
076: addItem(LABEL_FEATURE);
077: addItem(URL_FEATURE);
078: addItem(TEXT_FEATURE);
079: addItem(HTML_FEATURE);
080: addItem(SCRIPT_FEATURE);
081: }
082:
083: public Panel getArgs() {
084: return args;
085: }
086:
087: public void setLinkPredicate(LinkPredicate pred) {
088: LinkPredicate neg = null;
089: if (pred instanceof DualPredicate) {
090: neg = (LinkPredicate) ((DualPredicate) pred)
091: .getNegativePredicate();
092: pred = (LinkPredicate) ((DualPredicate) pred)
093: .getPositivePredicate();
094: }
095:
096: if (pred == null) {
097: select(NULL_FEATURE);
098: } else if (pred instanceof URLPredicate) {
099: URLPredicate urlpred = (URLPredicate) pred;
100: URLPredicate urlneg = (URLPredicate) neg;
101: select(URL_FEATURE);
102: args.setURLPattern(urlpred.getPattern().toString());
103: args.setURLNegPattern(urlneg != null ? urlneg.getPattern()
104: .toString() : "");
105: } else if (pred instanceof ContentPredicate) {
106: ContentPredicate contpred = (ContentPredicate) pred;
107: ContentPredicate contneg = (ContentPredicate) neg;
108: if (contpred.getOverHTML()) {
109: select(HTML_FEATURE);
110: args.setHTMLPattern(contpred.getPattern().toString());
111: args.setHTMLNegPattern(contneg != null ? contneg
112: .getPattern().toString() : "");
113: } else {
114: select(TEXT_FEATURE);
115: args.setTextPattern(contpred.getPattern().toString());
116: args.setTextNegPattern(contneg != null ? contneg
117: .getPattern().toString() : "");
118: }
119: } else if (pred instanceof LabelPredicate) {
120: LabelPredicate labelpred = (LabelPredicate) pred;
121: select(LABEL_FEATURE);
122: args.setOrTerms(labelpred.getOrTerms());
123: args.setLabels(labelpred.getLabels());
124: } else if (pred instanceof Script) {
125: Script script = (Script) pred;
126: select(SCRIPT_FEATURE);
127: args.setScript(script.getScript());
128: } else {
129: select(NULL_FEATURE);
130: }
131: }
132:
133: public LinkPredicate getLinkPredicate() {
134: String feat = getSelectedItem();
135: if (feat.equals(URL_FEATURE))
136: return makeSingleOrDual(new URLPredicate(new Wildcard(args
137: .getURLPattern())), args.getURLNegPattern()
138: .length() == 0 ? null : new URLPredicate(
139: new Wildcard(args.getURLNegPattern())));
140: else if (feat.equals(HTML_FEATURE))
141: return makeSingleOrDual(new ContentPredicate(new Tagexp(
142: args.getHTMLPattern()), true), args
143: .getHTMLNegPattern().length() == 0 ? null
144: : new ContentPredicate(new Tagexp(args
145: .getHTMLNegPattern()), true));
146: else if (feat.equals(TEXT_FEATURE))
147: return makeSingleOrDual(new ContentPredicate(new Regexp(
148: args.getTextPattern()), false), args
149: .getTextNegPattern().length() == 0 ? null
150: : new ContentPredicate(new Regexp(args
151: .getTextNegPattern()), false));
152: else if (feat.equals(LABEL_FEATURE))
153: return new LabelPredicate(args.getLabels(), args
154: .getOrTerms());
155: else if (feat.equals(SCRIPT_FEATURE))
156: return new Script(args.getScript(), true);
157: else
158: return null;
159: }
160:
161: private static LinkPredicate makeSingleOrDual(
162: LinkPredicate positive, LinkPredicate negative) {
163: return negative == null ? positive : new DualPredicate(
164: positive, negative);
165: }
166: }
167:
168: class LinkFeatureArgs extends Panel {
169:
170: final static String ANY_TERMS = "any";
171: final static String ALL_TERMS = "all";
172:
173: TextField urlPattern;
174: TextField urlNegPattern;
175: TextField textPattern;
176: TextField textNegPattern;
177: TextField htmlPattern;
178: TextField htmlNegPattern;
179: TextField labels;
180: Choice orTerms;
181: TextArea script;
182:
183: public LinkFeatureArgs() {
184: Panel panel;
185:
186: setLayout(new CardLayout());
187:
188: add(LinkFeatureChoice.NULL_FEATURE, panel = new Panel());
189:
190: add(LinkFeatureChoice.URL_FEATURE, panel = Constrain
191: .makeConstrainedPanel(1, 4));
192: Constrain.add(panel, new Label(
193: " matches the wildcard expression "), Constrain
194: .labelLike(0, 0));
195: Constrain.add(panel, urlPattern = new TextField(), Constrain
196: .fieldLike(0, 1));
197: Constrain.add(panel, new Label(" but not the expression "),
198: Constrain.labelLike(0, 2));
199: Constrain.add(panel, urlNegPattern = new TextField(), Constrain
200: .fieldLike(0, 3));
201:
202: add(LinkFeatureChoice.HTML_FEATURE, panel = Constrain
203: .makeConstrainedPanel(1, 4));
204: Constrain.add(panel, new Label(
205: " matches the HTML tag expression "), Constrain
206: .labelLike(0, 0));
207: Constrain.add(panel, htmlPattern = new TextField(), Constrain
208: .fieldLike(0, 1));
209: Constrain.add(panel, new Label(" but not the expression "),
210: Constrain.labelLike(0, 2));
211: Constrain.add(panel, htmlNegPattern = new TextField(),
212: Constrain.fieldLike(0, 3));
213:
214: add(LinkFeatureChoice.TEXT_FEATURE, panel = Constrain
215: .makeConstrainedPanel(1, 4));
216: Constrain.add(panel, new Label(
217: " matches the regular expression "), Constrain
218: .labelLike(0, 0));
219: Constrain.add(panel, textPattern = new TextField(), Constrain
220: .fieldLike(0, 1));
221: Constrain.add(panel, new Label(" but not the expression "),
222: Constrain.labelLike(0, 2));
223: Constrain.add(panel, textNegPattern = new TextField(),
224: Constrain.fieldLike(0, 3));
225:
226: add(LinkFeatureChoice.LABEL_FEATURE, panel = Constrain
227: .makeConstrainedPanel(3, 2));
228: Constrain.add(panel, new Label(" include "), Constrain
229: .labelLike(0, 0));
230: Constrain.add(panel, orTerms = new Choice(), Constrain
231: .labelLike(1, 0));
232: orTerms.addItem(ANY_TERMS);
233: orTerms.addItem(ALL_TERMS);
234: orTerms.select(ANY_TERMS);
235: Constrain.add(panel, new Label(" of the labels "), Constrain
236: .labelLike(2, 0));
237: Constrain.add(panel, labels = new TextField(), Constrain
238: .fieldLike(0, 1, 3));
239:
240: ScriptInterpreter interp = Context.getScriptInterpreter();
241: if (interp != null) {
242: add(LinkFeatureChoice.SCRIPT_FEATURE, panel = Constrain
243: .makeConstrainedPanel(1, 2));
244: Constrain.add(panel, new Label(interp.getLanguage()
245: + " Function (crawler, link)"), Constrain
246: .labelLike(0, 0));
247: Constrain.add(panel,
248: script = new TextArea("return true;\n"), Constrain
249: .areaLike(0, 1));
250: } else {
251: add(LinkFeatureChoice.SCRIPT_FEATURE, panel = Constrain
252: .makeConstrainedPanel(1, 1));
253: Constrain.add(panel, new Label(
254: "No scripting language is available."), Constrain
255: .labelLike(0, 0));
256: }
257: }
258:
259: public void setURLPattern(String pattern) {
260: urlPattern.setText(pattern);
261: }
262:
263: public String getURLPattern() {
264: return urlPattern.getText();
265: }
266:
267: public void setURLNegPattern(String pattern) {
268: urlNegPattern.setText(pattern);
269: }
270:
271: public String getURLNegPattern() {
272: return urlNegPattern.getText();
273: }
274:
275: public void setTextPattern(String pattern) {
276: textPattern.setText(pattern);
277: }
278:
279: public String getTextPattern() {
280: return textPattern.getText();
281: }
282:
283: public void setTextNegPattern(String pattern) {
284: textNegPattern.setText(pattern);
285: }
286:
287: public String getTextNegPattern() {
288: return textNegPattern.getText();
289: }
290:
291: public void setHTMLPattern(String pattern) {
292: htmlPattern.setText(pattern);
293: }
294:
295: public String getHTMLPattern() {
296: return htmlPattern.getText();
297: }
298:
299: public void setHTMLNegPattern(String pattern) {
300: htmlNegPattern.setText(pattern);
301: }
302:
303: public String getHTMLNegPattern() {
304: return htmlNegPattern.getText();
305: }
306:
307: public void setLabels(String pattern) {
308: labels.setText(pattern);
309: }
310:
311: public String getLabels() {
312: return labels.getText();
313: }
314:
315: public void setOrTerms(boolean orTerms) {
316: this .orTerms.select(orTerms ? ANY_TERMS : ALL_TERMS);
317: }
318:
319: public boolean getOrTerms() {
320: return orTerms.getSelectedItem().equals(ANY_TERMS);
321: }
322:
323: public void setScript(String script) {
324: this .script.setText(script);
325: }
326:
327: public String getScript() {
328: return script != null ? script.getText() : null;
329: }
330:
331: }
|