001: /*
002: * Copyright 2001 Sun Microsystems, Inc. All rights reserved.
003: * PROPRIETARY/CONFIDENTIAL. Use of this product is subject to license terms.
004: */
005:
006: package com.sun.portal.search.robot;
007:
008: import java.util.*;
009: import java.io.*;
010: import java.net.*;
011:
012: import com.sun.portal.search.util.*;
013:
014: public class RobotConfig {
015:
016: HashMap rules = null;
017: HashMap rulesets = null;
018: int lastRulesetID = 0;
019: public int lastRuleID = 0;
020: public static final int ALLOW = 1;
021: public static final int DENY = 2;
022: public static final int GO_NEXT = 3;
023: public String filterrules_conf = null;
024: public String process_conf = null;
025: StringBuffer frs_header_comment = null;
026: ArrayList pre_comment = null;
027: String[] RulesetIDs = null;
028: String[] RuleIDs = null;
029: String[] RuleNicknames = null;
030: public ProcessConfig processConf = null;
031: ArrayList filterConf = null;
032: int extractSizeNdx = -1;
033: int extractSize = -1;
034: static final int full_text_size = 32000;
035: public ConverterConfig convertConf = null;
036:
037: static public void main(String[] args) {
038: RobotConfig rc = new RobotConfig(args[0]);
039: if (args.length == 3) {
040: rc.newRuleset(args[1], Integer.parseInt(args[2]), false,
041: null);
042: }
043: //rc.processConf.list(System.out);
044: //rc.updateFile();
045: //System.out.println("numRuleset=" + rc.numRuleset());
046: /*String[] ids = rc.getRulsetIDs();
047: for (int i=0; i<ids.length; i++) {
048: System.out.println("Ruleset ID=" + ids[i]);
049: }*/
050: //rc.updateFile();
051: //System.out.println("ExtractSize="+ Integer.toString(rc.getExtractSize()));
052: System.out.println("ROBOT.CONF:\n" + rc.processConf.toString());
053: }
054:
055: public RobotConfig(String conf_dir) {
056: filterrules_conf = conf_dir + File.separator
057: + "filterrules.conf";
058: process_conf = conf_dir + File.separator + "robot.conf";
059: rules = new HashMap();
060: rulesets = new HashMap();
061: parse_filterrules(filterrules_conf);
062: try {
063: processConf = ProcessConfig.parseProcessConf(process_conf);
064: } catch (Exception e) {
065: //ignore for now
066: }
067: parse_filterConf(conf_dir + File.separator + "filter.conf");
068: this .convertConf = new ConverterConfig(conf_dir);
069: }
070:
071: void parse_filterConf(String file) {
072: BufferedReader in = null;
073: String line = null;
074: filterConf = new ArrayList();
075: try {
076: in = new BufferedReader(new FileReader(file));
077: line = in.readLine();
078: while (line != null) {
079: filterConf.add(line);
080: line = in.readLine();
081: }
082: } catch (IOException e) {
083: return;
084: }
085:
086: }
087:
088: int getExtractSize() {
089: if (filterConf == null) {
090: return this .full_text_size;
091: }
092: if (this .extractSize > 0) {
093: return this .extractSize;
094: }
095: for (int i = 0; i < filterConf.size(); i++) {
096: String line = (String) filterConf.get(i);
097: if (line != null && line.startsWith("Generate")) {
098: if (line.indexOf("fn=extract-full-text") > 0) {
099: int ndx = line.indexOf("truncate=");
100: if (ndx > 0) {
101: int end = line.indexOf(' ', ndx + 9);
102: if (end > 0) {
103: String value = line.substring(ndx + 9, end);
104: this .extractSizeNdx = i;
105: return Integer.parseInt(value);
106: }
107: }
108: }
109: }
110: }
111: return this .full_text_size;
112: }
113:
114: void parse_filterrules(String file) {
115: int status = 0;
116: FilterRule curr_fr = null;
117: FilterRuleset curr_frset = null;
118: String line = null;
119: BufferedReader in = null;
120:
121: String ruleTag = "";
122: String rulesetTag = "";
123: try {
124: in = new BufferedReader(new InputStreamReader(
125: new FileInputStream(file), "UTF-8"));
126: line = in.readLine();
127: } catch (IOException e) {
128: return;
129: }
130: boolean afterComment = false;
131: while (line != null) {
132: String nline = line.trim();
133: if (nline.length() == 0) {
134: try {
135: line = in.readLine();
136: } catch (IOException e) {
137: break;
138: }
139: continue;
140: }
141: if (nline.startsWith("#")) {
142: switch (status) {
143: case 0:
144: if (nline.startsWith("#FR#")) {
145: afterComment = true;
146: } else {
147: if (!afterComment) {
148: this .addComment(nline);
149: } else {
150: this .addPreComment(nline);
151: }
152: }
153: break;
154: case 2: //rule comment
155: curr_fr.addComment(line.substring(1));
156: break;
157: case 4: //ruleset comment
158: curr_frset.addComment(line.substring(1));
159: break;
160: }
161:
162: try {
163: line = in.readLine();
164: } catch (IOException e) {
165: break;
166: }
167: continue;
168: }
169:
170: switch (status) {
171: case 0:
172: if (nline.startsWith("<Rule ")) {
173: ruleTag = ruleTag + " " + nline;
174: if (nline.endsWith(">")) {
175: curr_fr = new FilterRule(ruleTag);
176: curr_fr.addPreComment(this .pre_comment);
177: this .pre_comment = null;
178: rules.put(curr_fr.id, curr_fr);
179: if (Integer.parseInt(curr_fr.id) > lastRuleID) {
180: lastRuleID = Integer.parseInt(curr_fr.id);
181: }
182: ruleTag = "";
183: status = 2;
184: } else
185: status = 1;
186: } else if (nline.startsWith("<Ruleset ")) {
187: rulesetTag = rulesetTag + " " + nline;
188: if (nline.endsWith(">")) {
189: curr_frset = new FilterRuleset(rulesetTag);
190: curr_frset.addPreComment(this .pre_comment);
191: this .pre_comment = null;
192: rulesets.put(curr_frset.id, curr_frset);
193: if (Integer.parseInt(curr_frset.id) > lastRulesetID) {
194: lastRulesetID = Integer
195: .parseInt(curr_frset.id);
196: }
197: rulesetTag = "";
198: status = 4;
199: } else {
200: status = 3;
201: }
202: } else {
203: System.out.println("Invalid statement:" + nline);
204: }
205: afterComment = true;
206: break;
207: case 1:
208: ruleTag = ruleTag + " " + nline;
209: if (nline.endsWith(">")) {
210: curr_fr = new FilterRule(ruleTag);
211: curr_fr.addPreComment(this .pre_comment);
212: this .pre_comment = null;
213: rules.put(curr_fr.id, curr_fr);
214: if (Integer.parseInt(curr_fr.id) > lastRuleID) {
215: lastRuleID = Integer.parseInt(curr_fr.id);
216: }
217: ruleTag = "";
218: status = 2;
219: }
220: break;
221: case 2:
222: if (nline.startsWith("</Rule>")) {
223: //System.out.println(curr_fr.toXMLString());
224: status = 0;
225: } else if (nline.startsWith("Filter")) {
226: curr_fr.addFilter(nline);
227: }
228: break;
229: case 3:
230: rulesetTag = rulesetTag + " " + nline;
231: if (nline.endsWith(">")) {
232: curr_frset = new FilterRuleset(rulesetTag);
233: curr_frset.addPreComment(this .pre_comment);
234: this .pre_comment = null;
235:
236: rulesets.put(curr_frset.id, curr_frset);
237: if (Integer.parseInt(curr_frset.id) > lastRulesetID) {
238: lastRulesetID = Integer.parseInt(curr_frset.id);
239: }
240: rulesetTag = "";
241: status = 4;
242: }
243: break;
244: case 4:
245: if (nline.startsWith("</Ruleset>")) {
246: //System.out.println(curr_frset.toXMLString());
247: status = 0;
248: } else if (nline.startsWith("Rule")) {
249: curr_frset.AddFilterbyString(nline);
250: }
251: break;
252:
253: }
254: try {
255: line = in.readLine();
256: } catch (IOException e) {
257: break;
258: }
259: }
260: this .getRuleIDs();
261: this .getRulsetIDs();
262: }
263:
264: public void updateFile() {
265: PrintWriter out = null;
266: try {
267: FileOutputStream fout = new FileOutputStream(
268: this .filterrules_conf);
269: out = new PrintWriter(new BufferedWriter(
270: new OutputStreamWriter(fout, "UTF-8")), true);
271: } catch (Exception e) {
272: System.out.println("[updateFile]Exception:"
273: + e.getMessage());
274: return;
275: }
276: if (this .frs_header_comment != null) {
277: out.print(this .frs_header_comment);
278: }
279: out
280: .print("#FR###########################################################################\n");
281: out.print("#FR# Rules\n");
282: out
283: .print("#FR###########################################################################\n\n");
284: this .printRules(out);
285: out
286: .print("#FR###########################################################################\n");
287: out.print("#FR# Rulesets\n");
288: out
289: .print("#FR###########################################################################\n\n");
290: this .printRuleSets(out);
291: out
292: .print("#FR######################################################\n");
293: out.print("#FR# Nothing after the last </ruleset> is saved.\n");
294: out
295: .print("#FR######################################################\n");
296: out.close();
297: }
298:
299: public String newPathFilterRule(String path) {
300: String pathNick = "pathname " + path;
301: for (int i = 0; i < RuleNicknames.length; i++) {
302: if (pathNick.equals(RuleNicknames[i])) {
303: //FilterRule fr = (FilterRule) rules.get(RuleIDs[i]);
304: return RuleIDs[i];
305: }
306: }
307: String ruleID = Integer.toString(lastRuleID + 1);
308: FilterRule fr = new FilterRule(ruleID, true,
309: "pathname " + path, true, false);
310: if (fr != null) {
311: fr.addFilter(FilterRule.filter_srcs[3],
312: FilterRule.filter_methods[2], path);
313: AddRule(ruleID, fr);
314: }
315: return ruleID;
316: }
317:
318: public String getIDByNick(String nick) {
319: for (int i = 0; i < RuleNicknames.length; i++) {
320: if (nick.equals(RuleNicknames[i])) {
321: return RuleIDs[i];
322: }
323: }
324: return null;
325: }
326:
327: public FilterRuleset newRuleset(String urlText, int depth,
328: boolean isDomain, String ruleID) {
329: FilterRuleset frs = null;
330: try {
331: URL url = new URL(urlText);
332: frs = newRuleset(url, depth, isDomain, ruleID);
333: } catch (Exception e) {
334: }
335: return frs;
336: }
337:
338: public FilterRuleset findFiletrRulesetHasDomain(String domain) {
339: for (int i = 0; i < this .RulesetIDs.length; i++) {
340: FilterRuleset frs = (FilterRuleset) rulesets
341: .get(RulesetIDs[i]);
342: if (frs.getType() == FilterRuleset.TYPE_DOMAINS) {
343: for (int j = 0; j < frs.numOfDomains(); j++) {
344: String domainname = frs.getDomainByIndex(j);
345: if (domain.equals(domainname)) {
346: return frs;
347: }
348: }
349: }
350: }
351: return null;
352: }
353:
354: public FilterRuleset findFiletrRulesetHasStartingPoint(
355: String urlText) {
356: for (int i = 0; i < this .RulesetIDs.length; i++) {
357: FilterRuleset frs = (FilterRuleset) rulesets
358: .get(RulesetIDs[i]);
359: for (int j = 0; j < frs.numOfStartingPoints(); j++) {
360: String sp = frs.getStartingPointByIndex(j);
361: if (sp.startsWith(urlText.trim() + " ")) {
362: return frs;
363: }
364: }
365: }
366: return null;
367: }
368:
369: FilterRuleset findFiletrRulesetFor(URL url) {
370: for (int i = 0; i < this .RulesetIDs.length; i++) {
371: FilterRuleset frs = (FilterRuleset) rulesets
372: .get(RulesetIDs[i]);
373: if (frs.isForMe(url)) {
374: return frs;
375: }
376: }
377: return null;
378: }
379:
380: public String[] getFiletrRulesetIDsForFilterRule(String id) {
381: ArrayList ids = new ArrayList();
382: for (int i = 0; i < this .RulesetIDs.length; i++) {
383: FilterRuleset frs = (FilterRuleset) rulesets
384: .get(RulesetIDs[i]);
385: if (frs.isFilterInUsed(id)) {
386: ids.add(frs.id);
387: }
388: }
389: if (ids.size() > 0) {
390: String a[] = { "" };
391: return (String[]) ids.toArray(a);
392: }
393: return null;
394: }
395:
396: public FilterRuleset newRuleset(URL url, int depth,
397: boolean isDomain, String ruleID) {
398: String s_points = url.toString() + " depth=" + depth;
399: FilterRuleset frs = null;
400: if (!isDomain) {
401: frs = this .findFiletrRulesetFor(url);
402: if (frs != null) {
403: frs.addStartingPoint(url, depth);
404: if (ruleID != null) {
405: FilterRule fr = (FilterRule) this .rules.get(ruleID);
406: if (fr != null && fr.enable) {
407: if (!frs.onExit) { //only need while excluded on exit
408: frs.AddFilter(fr.id, fr.default_ad);
409: }
410: }
411: }
412: return frs;
413: }
414: }
415:
416: // need to create a new site
417:
418: String host = url.getHost();
419: String proto = url.getProtocol();
420: int port = url.getPort();
421: String server = proto + "://" + host
422: + (port > 0 ? ":" + Integer.toString(port) : "") + "/";
423: int newID = lastRulesetID + 1;
424: if (isDomain) {
425: String domain = host;
426: int ndx = host.indexOf('.');
427: if (ndx > 0) {
428: domain = host.substring(ndx + 1);
429: }
430: frs = new FilterRuleset(Integer.toString(newID), true,
431: false, true, domain, domain, "",
432: "http,file,ftp,https", s_points, null);
433: } else {
434: frs = new FilterRuleset(Integer.toString(newID), true,
435: false, (ruleID != null ? false : true), host,
436: server, s_points, null);
437: }
438: if (this .RuleIDs == null) {
439: freshRulesArray();
440: }
441: for (int i = 0; i < this .RuleIDs.length; i++) {
442: FilterRule fr = (FilterRule) this .rules
443: .get(this .RuleIDs[i]);
444: if (fr != null && fr.enable && fr.quickcf) {
445: frs.AddFilter(fr.id, fr.default_ad);
446: }
447: }
448: if (ruleID != null) {
449: FilterRule fr = (FilterRule) this .rules.get(ruleID);
450: if (fr != null && fr.enable) {
451: frs.AddFilter(fr.id, fr.default_ad);
452: }
453: }
454: this .rulesets.put(Integer.toString(newID), frs);
455: RulesetIDs = null;
456: this .getRulsetIDs();
457: this .lastRulesetID = newID;
458: return frs;
459: }
460:
461: /** delete Filter Ruleset by index
462: *
463: * @param index in RulesetIDs
464: */
465: public boolean delRuleset(int index) {
466: if (index < 0 || index >= this .RulesetIDs.length) {
467: return false;
468: }
469: return this .delRuleset(this .RulesetIDs[index]);
470: }
471:
472: void removeAssoicatedPathFilter(String id) {
473: FilterRuleset frs = GetRuleSet(id);
474: if (frs == null) {
475: return;
476: }
477: int numOfFilter = frs.numOfFilters();
478: ArrayList delIDs = new ArrayList();
479:
480: for (int i = 0; i < numOfFilter; i++) {
481: String ruleID = frs.getFiltersIDByIndex(i);
482: FilterRule fr = GetRule(ruleID);
483: if (fr.nickname.startsWith("pathname ")) {
484: String[] frss = getFiletrRulesetIDsForFilterRule(ruleID);
485: if (frss != null && frss.length == 1
486: && frss[0].equals(id)) {
487: delIDs.add(ruleID);
488: }
489: }
490:
491: }
492: for (int i = 0; i < delIDs.size(); i++) {
493: delRule((String) delIDs.get(i));
494: }
495: }
496:
497: /** delete Filter Ruleset by ID
498: *
499: * @param RulesetID
500: */
501: public boolean delRuleset(String id) {
502: if (id == null || this .rulesets == null) {
503: return false;
504: }
505: removeAssoicatedPathFilter(id);
506: if (this .rulesets.remove(id) != null) {
507: RulesetIDs = null;
508: this .getRulsetIDs();
509: return true;
510: }
511: return false;
512: }
513:
514: public boolean delRule(int index) {
515: if (index < 0 || index >= this .RuleIDs.length) {
516: return false;
517: }
518: return this .delRule(this .RuleIDs[index]);
519: }
520:
521: /** delete Filter Rule by ID
522: *
523: * @param RuleID
524: */
525: public boolean delRule(String id) {
526: if (id == null || this .rules == null) {
527: return false;
528: }
529: if (this .rules.remove(id) != null) {
530: freshRulesArray();
531: rmRuleFromRuleset(id);
532: return true;
533: }
534: return false;
535: }
536:
537: void rmRuleFromRuleset(String ruleid) {
538: Iterator it = rulesets.keySet().iterator();
539: while (it.hasNext()) {
540: Object key = it.next();
541: FilterRuleset frs = (FilterRuleset) rulesets.get(key);
542: if (frs != null) {
543: frs.deleteFilterByID(ruleid);
544: }
545: }
546: }
547:
548: public void AddRule(String id, FilterRule fr) {
549: rules.put(id, fr);
550: freshRulesArray();
551: if (Integer.parseInt(id) > lastRuleID) {
552: lastRuleID = Integer.parseInt(id);
553: }
554: }
555:
556: public int numOfRules() {
557: return this .rules.size();
558: }
559:
560: public FilterRule GetRule(String id) {
561: return (FilterRule) rules.get(id);
562: }
563:
564: public FilterRule GetRuleByIndex(int index) {
565: if (this .RuleIDs == null) {
566: this .freshRulesArray();
567: }
568: if (index < 0 || index >= this .RuleIDs.length) {
569: return null;
570: }
571: return (FilterRule) rules.get(this .RuleIDs[index]);
572: }
573:
574: public void AddRuleSet(String key, FilterRuleset frs) {
575: rulesets.put(key, frs);
576: }
577:
578: public FilterRuleset GetRuleSet(String key) {
579: return (FilterRuleset) rulesets.get(key);
580: }
581:
582: public FilterRuleset GetRuleSet(int index) {
583: if (index < 0 || index >= this .RulesetIDs.length) {
584: return null;
585: }
586: return (FilterRuleset) rulesets.get(this .RulesetIDs[index]);
587: }
588:
589: public int numRuleset() {
590: return rulesets.size();
591: }
592:
593: public int numRule() {
594: return rules.size();
595: }
596:
597: public String[] getRulsetIDs() {
598: if (RulesetIDs != null) {
599: return RulesetIDs;
600: }
601: RulesetIDs = new String[rulesets.size()];
602: Iterator it = rulesets.keySet().iterator();
603: int i = 0;
604: while (it.hasNext()) {
605: RulesetIDs[i++] = (String) it.next();
606: }
607: return RulesetIDs;
608: }
609:
610: public void freshRulesArray() {
611: RuleIDs = new String[rules.size()];
612: RuleNicknames = new String[rules.size()];
613: int ids[] = new int[rules.size()];
614: Iterator it = rules.keySet().iterator();
615: int i = 0;
616: while (it.hasNext()) {
617: Object key = it.next();
618: FilterRule fr = (FilterRule) rules.get(key);
619: ids[i++] = Integer.parseInt(fr.id);
620: }
621: java.util.Arrays.sort(ids);
622: for (i = 0; i < ids.length; i++) {
623: String key = Integer.toString(ids[i]);
624: FilterRule fr = (FilterRule) rules.get(key);
625: RuleIDs[i] = key;
626: RuleNicknames[i] = fr.nickname;
627: }
628: }
629:
630: /** get rules DIs in an array
631: *
632: * @param RulesetID
633: */
634: public String[] getRuleIDs() {
635: if (RuleIDs == null || RuleNicknames == null) {
636: freshRulesArray();
637: }
638: return RuleIDs;
639: }
640:
641: public String[] getRuleNicknames() {
642: if (RuleIDs == null || RuleNicknames == null) {
643: freshRulesArray();
644: }
645: return RuleNicknames;
646: }
647:
648: public String getRuleNickname(String ruleID) {
649: if (RuleIDs == null || RuleNicknames == null) {
650: freshRulesArray();
651: }
652: int index = 0;
653: String[] nicks = getRuleNicknames();
654: String[] ruleids = getRuleIDs();
655: for (int i = 0; i < ruleids.length; i++) {
656: if (ruleID.equalsIgnoreCase(ruleids[i])) {
657: index = i;
658: }
659: }
660: return nicks[index];
661: }
662:
663: public boolean FilterURL(String u) {
664: boolean r = false;
665: URL url = null;
666: String ustring;
667: try {
668: url = new URL(u);
669: String portocol = url.getProtocol();
670: String host = url.getHost();
671: String port = Integer.toString(url.getPort());
672:
673: ustring = portocol + "://" + host + ":" + port + "/";
674: System.out.println("Finding FRS for url:" + ustring);
675: //this.GetRuleSet(ustring);
676: } catch (MalformedURLException e) {
677: System.out.println("Invalid URL:<" + u + ">");
678: return true;
679: }
680: FilterRuleset frs = GetRuleSet(ustring);
681: if (frs == null) {
682: return true;
683: }
684: return frs.Filter(u);
685: }
686:
687: public void printRules(PrintWriter out) {
688: for (int i = 0; i < this .RuleIDs.length; i++) {
689: FilterRule fr = (FilterRule) rules.get(RuleIDs[i]);
690: if (fr != null) {
691: out.println();
692: out.print(fr.toConfigString());
693: }
694: }
695: }
696:
697: public int numOfStartingPoint(boolean checkEnable) {
698: Iterator it = this .rulesets.keySet().iterator();
699: int num = 0;
700: while (it.hasNext()) {
701: try {
702: String key = (String) it.next();
703: FilterRuleset frs = (FilterRuleset) rulesets.get(key);
704: if (!checkEnable || frs.enable) {
705: num = num + frs.starting_points.size();
706: }
707: } catch (NoSuchElementException e) {
708: break;
709: }
710: }
711: return num;
712: }
713:
714: public void printRuleSets(PrintWriter out) {
715: Iterator it = rulesets.keySet().iterator();
716: while (it.hasNext()) {
717: try {
718: String key = (String) it.next();
719: FilterRuleset frs = (FilterRuleset) rulesets.get(key);
720: out.print(frs.toConfigString());
721: } catch (NoSuchElementException e) {
722: break;
723: }
724: }
725: }
726:
727: public boolean RuleMatch(String key, String u) {
728: FilterRule fr = (FilterRule) rules.get(key);
729: if (fr == null) {
730: return false;
731: }
732: return fr.Match(u);
733: }
734:
735: public static String ADtoString(boolean b) {
736: if (b) {
737: return "allow";
738: } else {
739: return "deny";
740: }
741: }
742:
743: public static boolean ADStringtoBoolean(String ad) {
744: if (ad.compareToIgnoreCase("allow") == 0) {
745: return true;
746: }
747: return false;
748: }
749:
750: public static String BtoString(boolean b) {
751: if (b) {
752: return "true";
753: } else {
754: return "false";
755: }
756: }
757:
758: public static boolean StringtoBoolean(String s) {
759: if (s.compareToIgnoreCase("true") == 0) {
760: return true;
761: }
762: return false;
763: }
764:
765: public void addComment(String comm) {
766: if (this .frs_header_comment == null) {
767: this .frs_header_comment = new StringBuffer(comm + "\n");
768: } else {
769: this .frs_header_comment.append(comm + "\n");
770: }
771: }
772:
773: public void addPreComment(String comm) {
774: if (this .pre_comment == null) {
775: this .pre_comment = new ArrayList();
776: }
777: this .pre_comment.add(comm.substring(1));
778: }
779:
780: }
|