001: /*
002: * WebSphinx web-crawling toolkit
003: *
004: * Copyright (c) 1998-2002 Carnegie Mellon University. All rights
005: * reserved.
006: *
007: * Redistribution and use in source and binary forms, with or without
008: * modification, are permitted provided that the following conditions
009: * are met:
010: *
011: * 1. Redistributions of source code must retain the above copyright
012: * notice, this list of conditions and the following disclaimer.
013: *
014: * 2. Redistributions in binary form must reproduce the above copyright
015: * notice, this list of conditions and the following disclaimer in
016: * the documentation and/or other materials provided with the
017: * distribution.
018: *
019: * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
020: * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
021: * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
022: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
023: * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
024: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
025: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
026: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
027: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
028: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
029: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
030: *
031: */
032:
033: package websphinx.workbench;
034:
035: import java.awt.*;
036: import websphinx.*;
037: import rcm.awt.Constrain;
038:
039: public class PagePredicateEditor extends Panel { // FIX: consider implementing java.beans.PropertyEditor
040:
041: PageFeatureChoice choice;
042:
043: /**
044: * Make a PagePredicateEditor.
045: */
046: public PagePredicateEditor() {
047: setLayout(new GridBagLayout());
048: choice = new PageFeatureChoice();
049: Constrain.add(this , choice, Constrain.labelLike(0, 0));
050: Constrain.add(this , choice.getArgs(), Constrain.areaLike(0, 1));
051: setPagePredicate(null);
052: }
053:
054: public void setPagePredicate(PagePredicate pred) {
055: // FIX: handle compound predicates
056: choice.setPagePredicate(pred);
057: }
058:
059: public PagePredicate getPagePredicate() {
060: return choice.getPagePredicate();
061: }
062:
063: }
064:
065: class PageFeatureChoice extends FeatureChoice {
066: PageFeatureArgs args = new PageFeatureArgs();
067:
068: final static String NULL_FEATURE = "all pages";
069: final static String URL_FEATURE = "URL";
070: final static String HTML_FEATURE = "HTML";
071: final static String TEXT_FEATURE = "text";
072: final static String LABEL_FEATURE = "labels";
073: final static String TITLE_FEATURE = "title";
074: final static String SCRIPT_FEATURE = "script";
075:
076: public PageFeatureChoice() {
077: addItem(NULL_FEATURE);
078: addItem(LABEL_FEATURE);
079: addItem(TITLE_FEATURE);
080: addItem(URL_FEATURE);
081: addItem(TEXT_FEATURE);
082: addItem(HTML_FEATURE);
083: addItem(SCRIPT_FEATURE);
084: }
085:
086: public void setPagePredicate(PagePredicate pred) {
087: PagePredicate neg = null;
088: if (pred instanceof DualPredicate) {
089: neg = (PagePredicate) ((DualPredicate) pred)
090: .getNegativePredicate();
091: pred = (PagePredicate) ((DualPredicate) pred)
092: .getPositivePredicate();
093: }
094:
095: if (pred == null) {
096: select(NULL_FEATURE);
097: } else if (pred instanceof URLPredicate) {
098: URLPredicate urlpred = (URLPredicate) pred;
099: URLPredicate urlneg = (URLPredicate) neg;
100: select(URL_FEATURE);
101: args.setURLPattern(urlpred.getPattern().toString());
102: args.setURLNegPattern(urlneg != null ? urlneg.getPattern()
103: .toString() : "");
104: } else if (pred instanceof ContentPredicate) {
105: ContentPredicate contpred = (ContentPredicate) pred;
106: ContentPredicate contneg = (ContentPredicate) neg;
107: if (contpred.getOverHTML()) {
108: select(HTML_FEATURE);
109: args.setHTMLPattern(contpred.getPattern().toString());
110: args.setHTMLNegPattern(contneg != null ? contneg
111: .getPattern().toString() : "");
112: } else {
113: select(TEXT_FEATURE);
114: args.setTextPattern(contpred.getPattern().toString());
115: args.setTextNegPattern(contneg != null ? contneg
116: .getPattern().toString() : "");
117: }
118: } else if (pred instanceof TitlePredicate) {
119: TitlePredicate titlepred = (TitlePredicate) pred;
120: TitlePredicate titleneg = (TitlePredicate) neg;
121: select(TITLE_FEATURE);
122: args.setTitlePattern(titlepred.getPattern().toString());
123: args.setTitleNegPattern(titleneg != null ? titleneg
124: .getPattern().toString() : "");
125: } else if (pred instanceof LabelPredicate) {
126: LabelPredicate labelpred = (LabelPredicate) pred;
127: select(LABEL_FEATURE);
128: args.setOrTerms(labelpred.getOrTerms());
129: args.setLabels(labelpred.getLabels());
130: } else if (pred instanceof Script) {
131: Script script = (Script) pred;
132: select(SCRIPT_FEATURE);
133: args.setScript(script.getScript());
134: } else {
135: select(NULL_FEATURE);
136: }
137: }
138:
139: public Panel getArgs() {
140: return args;
141: }
142:
143: public PagePredicate getPagePredicate() {
144: String feat = getSelectedItem();
145: if (feat.equals(URL_FEATURE))
146: return makeSingleOrDual(new URLPredicate(new Wildcard(args
147: .getURLPattern())), args.getURLNegPattern()
148: .length() == 0 ? null : new URLPredicate(
149: new Wildcard(args.getURLNegPattern())));
150: else if (feat.equals(HTML_FEATURE))
151: return makeSingleOrDual(new ContentPredicate(new Tagexp(
152: args.getHTMLPattern()), true), args
153: .getHTMLNegPattern().length() == 0 ? null
154: : new ContentPredicate(new Tagexp(args
155: .getHTMLNegPattern()), true));
156: else if (feat.equals(TEXT_FEATURE))
157: return makeSingleOrDual(new ContentPredicate(new Regexp(
158: args.getTextPattern()), false), args
159: .getTextNegPattern().length() == 0 ? null
160: : new ContentPredicate(new Regexp(args
161: .getTextNegPattern()), false));
162: else if (feat.equals(TITLE_FEATURE))
163: return makeSingleOrDual(new TitlePredicate(new Regexp(args
164: .getTitlePattern())), args.getTitleNegPattern()
165: .length() == 0 ? null : new TitlePredicate(
166: new Regexp(args.getTitleNegPattern())));
167: else if (feat.equals(LABEL_FEATURE))
168: return new LabelPredicate(args.getLabels(), args
169: .getOrTerms());
170: else if (feat.equals(SCRIPT_FEATURE))
171: return new Script(args.getScript(), false);
172: else
173: return null;
174: }
175:
176: private static PagePredicate makeSingleOrDual(
177: PagePredicate positive, PagePredicate negative) {
178: return negative == null ? positive : new DualPredicate(
179: positive, negative);
180: }
181: }
182:
183: class PageFeatureArgs extends Panel {
184:
185: TextField urlPattern;
186: TextField urlNegPattern;
187: TextField textPattern;
188: TextField textNegPattern;
189: TextField htmlPattern;
190: TextField htmlNegPattern;
191: TextField titlePattern;
192: TextField titleNegPattern;
193: TextField labels;
194: Choice orTerms;
195: TextArea script;
196:
197: final static String ANY_TERMS = "any";
198: final static String ALL_TERMS = "all";
199:
200: public PageFeatureArgs() {
201: Panel panel;
202:
203: setLayout(new CardLayout());
204:
205: add(PageFeatureChoice.NULL_FEATURE, panel = new Panel());
206:
207: add(PageFeatureChoice.URL_FEATURE, panel = Constrain
208: .makeConstrainedPanel(1, 4));
209: Constrain.add(panel, new Label(
210: " matches the wildcard expression "), Constrain
211: .labelLike(0, 0));
212: Constrain.add(panel, urlPattern = new TextField(), Constrain
213: .fieldLike(0, 1));
214: Constrain.add(panel, new Label(" but not the expression "),
215: Constrain.labelLike(0, 2));
216: Constrain.add(panel, urlNegPattern = new TextField(), Constrain
217: .fieldLike(0, 3));
218:
219: add(PageFeatureChoice.HTML_FEATURE, panel = Constrain
220: .makeConstrainedPanel(1, 4));
221: Constrain.add(panel, new Label(
222: " matches the HTML tag expression "), Constrain
223: .labelLike(0, 0));
224: Constrain.add(panel, htmlPattern = new TextField(), Constrain
225: .fieldLike(0, 1));
226: Constrain.add(panel, new Label(" but not the expression "),
227: Constrain.labelLike(0, 2));
228: Constrain.add(panel, htmlNegPattern = new TextField(),
229: Constrain.fieldLike(0, 3));
230:
231: add(PageFeatureChoice.TEXT_FEATURE, panel = Constrain
232: .makeConstrainedPanel(1, 4));
233: Constrain.add(panel, new Label(
234: " matches the regular expression "), Constrain
235: .labelLike(0, 0));
236: Constrain.add(panel, textPattern = new TextField(), Constrain
237: .fieldLike(0, 1));
238: Constrain.add(panel, new Label(" but not the expression "),
239: Constrain.labelLike(0, 2));
240: Constrain.add(panel, textNegPattern = new TextField(),
241: Constrain.fieldLike(0, 3));
242:
243: add(PageFeatureChoice.TITLE_FEATURE, panel = Constrain
244: .makeConstrainedPanel(1, 4));
245: Constrain.add(panel, new Label(
246: " matches the regular expression "), Constrain
247: .labelLike(0, 0));
248: Constrain.add(panel, titlePattern = new TextField(), Constrain
249: .fieldLike(0, 1));
250: Constrain.add(panel, new Label(" but not the expression "),
251: Constrain.labelLike(0, 2));
252: Constrain.add(panel, titleNegPattern = new TextField(),
253: Constrain.fieldLike(0, 3));
254:
255: add(PageFeatureChoice.LABEL_FEATURE, panel = Constrain
256: .makeConstrainedPanel(3, 2));
257: Constrain.add(panel, new Label(" include "), Constrain
258: .labelLike(0, 0));
259: Constrain.add(panel, orTerms = new Choice(), Constrain
260: .labelLike(1, 0));
261: orTerms.addItem(ANY_TERMS);
262: orTerms.addItem(ALL_TERMS);
263: orTerms.select(ANY_TERMS);
264: Constrain.add(panel, new Label(" of the labels "), Constrain
265: .labelLike(2, 0));
266: Constrain.add(panel, labels = new TextField(), Constrain
267: .fieldLike(0, 1, 3));
268:
269: ScriptInterpreter interp = Context.getScriptInterpreter();
270: if (interp != null) {
271: add(PageFeatureChoice.SCRIPT_FEATURE, panel = Constrain
272: .makeConstrainedPanel(1, 2));
273: Constrain.add(panel, new Label(interp.getLanguage()
274: + " Function (crawler, page)"), Constrain
275: .labelLike(0, 0));
276: Constrain.add(panel,
277: script = new TextArea("return true;\n"), Constrain
278: .areaLike(0, 1));
279: } else {
280: add(PageFeatureChoice.SCRIPT_FEATURE, panel = Constrain
281: .makeConstrainedPanel(1, 1));
282: Constrain.add(panel, new Label(
283: "No scripting language is available."), Constrain
284: .labelLike(0, 0));
285: }
286: }
287:
288: public void setURLPattern(String pattern) {
289: urlPattern.setText(pattern);
290: }
291:
292: public String getURLPattern() {
293: return urlPattern.getText();
294: }
295:
296: public void setURLNegPattern(String pattern) {
297: urlNegPattern.setText(pattern);
298: }
299:
300: public String getURLNegPattern() {
301: return urlNegPattern.getText();
302: }
303:
304: public void setTextPattern(String pattern) {
305: textPattern.setText(pattern);
306: }
307:
308: public String getTextPattern() {
309: return textPattern.getText();
310: }
311:
312: public void setTextNegPattern(String pattern) {
313: textNegPattern.setText(pattern);
314: }
315:
316: public String getTextNegPattern() {
317: return textNegPattern.getText();
318: }
319:
320: public void setHTMLPattern(String pattern) {
321: htmlPattern.setText(pattern);
322: }
323:
324: public String getHTMLPattern() {
325: return htmlPattern.getText();
326: }
327:
328: public void setHTMLNegPattern(String pattern) {
329: htmlNegPattern.setText(pattern);
330: }
331:
332: public String getHTMLNegPattern() {
333: return htmlNegPattern.getText();
334: }
335:
336: public void setTitlePattern(String pattern) {
337: titlePattern.setText(pattern);
338: }
339:
340: public String getTitlePattern() {
341: return titlePattern.getText();
342: }
343:
344: public void setTitleNegPattern(String pattern) {
345: titleNegPattern.setText(pattern);
346: }
347:
348: public String getTitleNegPattern() {
349: return titleNegPattern.getText();
350: }
351:
352: public void setLabels(String pattern) {
353: labels.setText(pattern);
354: }
355:
356: public String getLabels() {
357: return labels.getText();
358: }
359:
360: public void setOrTerms(boolean orTerms) {
361: this .orTerms.select(orTerms ? ANY_TERMS : ALL_TERMS);
362: }
363:
364: public boolean getOrTerms() {
365: return orTerms.getSelectedItem().equals(ANY_TERMS);
366: }
367:
368: public void setScript(String script) {
369: this .script.setText(script);
370: }
371:
372: public String getScript() {
373: return script != null ? script.getText() : null;
374: }
375:
376: }
|