001: // listManager.java
002: // -------------------------------------
003: // part of YACY
004: //
005: // (C) 2005, 2006 by Alexander Schier
006: // (C) 2007 by Bjoern 'Fuchs' Krombholz; fox.box@gmail.com
007: //
008: // last change: $LastChangedDate: 2008-02-04 21:46:18 +0000 (Mo, 04 Feb 2008) $ by $LastChangedBy: lulabad $
009: // $LastChangedRevision: 4443 $
010: //
011: // This program is free software; you can redistribute it and/or modify
012: // it under the terms of the GNU General Public License as published by
013: // the Free Software Foundation; either version 2 of the License, or
014: // (at your option) any later version.
015: //
016: // This program is distributed in the hope that it will be useful,
017: // but WITHOUT ANY WARRANTY; without even the implied warranty of
018: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: // GNU General Public License for more details.
020: //
021: // You should have received a copy of the GNU General Public License
022: // along with this program; if not, write to the Free Software
023: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: //
025: // Using this software in any meaning (reading, learning, copying, compiling,
026: // running) means that you agree that the Author(s) is (are) not responsible
027: // for cost, loss of data or any harm that may be caused directly or indirectly
028: // by usage of this softare or this documentation. The usage of this software
029: // is on your own risk. The installation and usage (starting/running) of this
030: // software may allow other people or application to access your computer and
031: // any attached devices and is highly dependent on the configuration of the
032: // software which must be done by the user of the software; the author(s) is
033: // (are) also not responsible for proper configuration and usage of the
034: // software, even if provoked by documentation provided together with
035: // the software.
036: //
037: // Any changes to this file according to the GPL as documented in the file
038: // gpl.txt aside this file in the shipment you received can be done to the
039: // lines that follows this copyright notice here, but changes must not be
040: // done inside the copyright notive above. A re-distribution must contain
041: // the intact and unchanged copyright notice.
042: // Contributions and changes to the program code must be marked as such.
043:
044: package de.anomic.data;
045:
046: import java.io.BufferedReader;
047: import java.io.BufferedWriter;
048: import java.io.File;
049: import java.io.FileInputStream;
050: import java.io.FileWriter;
051: import java.io.IOException;
052: import java.io.InputStreamReader;
053: import java.io.PrintWriter;
054: import java.util.ArrayList;
055: import java.util.Arrays;
056: import java.util.Collection;
057: import java.util.HashSet;
058: import java.util.Iterator;
059: import java.util.Set;
060: import java.util.Vector;
061:
062: import de.anomic.plasma.plasmaSwitchboard;
063: import de.anomic.plasma.urlPattern.abstractURLPattern;
064: import de.anomic.plasma.urlPattern.plasmaURLPattern.blacklistFile;
065: import de.anomic.server.serverCore;
066:
067: // The Naming of the functions is a bit strange...
068:
069: public class listManager {
070: public static plasmaSwitchboard switchboard;
071: public static File listsPath;
072:
073: /**
074: * Get ListSet from configuration file and return it as a unified Set.
075: *
076: * <b>Meaning of ListSet</b>: There are various "lists" in YaCy which are
077: * actually disjunct (pairwise unequal) sets which themselves can be seperated
078: * into different subsets. E.g., there can be more than one blacklist of a type.
079: * A ListSet is the set of all those "lists" (subsets) of an equal type.
080: *
081: * @param setName name of the ListSet
082: * @return a ListSet from configuration file
083: */
084: public static Set<String> getListSet(String setName) {
085: return string2set(switchboard.getConfig(setName, ""));
086: }
087:
088: /**
089: * Removes an element from a ListSet and updates the configuration file
090: * accordingly. If the element doesn't exist, then nothing will be changed.
091: *
092: * @param setName name of the ListSet.
093: * @param listName name of the element to remove from the ListSet.
094: */
095: public static void removeFromListSet(String setName, String listName) {
096: Set<String> listSet = getListSet(setName);
097:
098: if (listSet.size() > 0) {
099: listSet.remove(listName);
100: switchboard.setConfig(setName, collection2string(listSet));
101: }
102: }
103:
104: /**
105: * Adds an element to an existing ListSet. If the ListSet doesn't exist yet,
106: * a new one will be added. If the ListSet already contains an identical element,
107: * then nothing happens.
108: *
109: * The new list will be written to the configuartion file.
110: *
111: * @param setName
112: * @param newListName
113: */
114: public static void updateListSet(String setName, String newListName) {
115: Set<String> listSet = getListSet(setName);
116: listSet.add(newListName);
117:
118: switchboard.setConfig(setName, collection2string(listSet));
119: }
120:
121: /**
122: * @param setName ListSet in which to search for an element.
123: * @param listName the element to search for.
124: * @return <code>true</code> if the ListSet "setName" contains an element
125: * "listName", <code>false</code> otherwise.
126: */
127: public static boolean listSetContains(String setName,
128: String listName) {
129: Set<String> Lists = getListSet(setName);
130:
131: return Lists.contains(listName);
132: }
133:
134: //================general Lists==================
135:
136: /**
137: * Read lines of a file into an ArrayList.
138: *
139: * @param listFile the file
140: * @return the resulting array as an ArrayList
141: */
142: public static ArrayList<String> getListArray(File listFile) {
143: String line;
144: ArrayList<String> list = new ArrayList<String>();
145: int count = 0;
146: BufferedReader br = null;
147: try {
148: br = new BufferedReader(new InputStreamReader(
149: new FileInputStream(listFile), "UTF-8"));
150:
151: while ((line = br.readLine()) != null) {
152: list.add(line);
153: count++;
154: }
155: br.close();
156: } catch (IOException e) {
157: // list is empty
158: } finally {
159: if (br != null)
160: try {
161: br.close();
162: } catch (Exception e) {
163: }
164: }
165: return list;
166: }
167:
168: /**
169: * Write a String to a file (used for string representation of lists).
170: *
171: * @param listFile the file to write to
172: * @param out the String to write
173: * @return returns <code>true</code> if successful, <code>false</code> otherwise
174: */
175: public static boolean writeList(File listFile, String out) {
176: BufferedWriter bw = null;
177: try {
178: bw = new BufferedWriter(new PrintWriter(new FileWriter(
179: listFile)));
180: bw.write(out);
181: bw.close();
182: return true;
183: } catch (IOException e) {
184: return false;
185: } finally {
186: if (bw != null)
187: try {
188: bw.close();
189: } catch (Exception e) {
190: }
191: }
192: }
193:
194: /**
195: * Write elements of an Array of Strings to a file (one element per line).
196: *
197: * @param listFile the file to write to
198: * @param list the Array to write
199: * @return returns <code>true</code> if successful, <code>false</code> otherwise
200: */
201: public static boolean writeList(File listFile, String[] list) {
202: StringBuffer out = new StringBuffer();
203: for (int i = 0; i < list.length; i++) {
204: out.append(list[i]).append(serverCore.CRLF_STRING);
205: }
206: return writeList(listFile, new String(out)); //(File, String)
207: }
208:
209: // same as below
210: public static String getListString(String filename,
211: boolean withcomments) {
212: File listFile = new File(listsPath, filename);
213: return getListString(listFile, withcomments);
214: }
215:
216: /**
217: * Read lines of a text file into a String, optionally ignoring comments.
218: *
219: * @param listFile the File to read from.
220: * @param withcomments If <code>false</code> ignore lines starting with '#'.
221: * @return String representation of the file content.
222: */
223: public static String getListString(File listFile,
224: boolean withcomments) {
225: StringBuffer temp = new StringBuffer();
226:
227: BufferedReader br = null;
228: try {
229: br = new BufferedReader(new InputStreamReader(
230: new FileInputStream(listFile)));
231: temp.ensureCapacity((int) listFile.length());
232:
233: // Read the List
234: String line = "";
235: while ((line = br.readLine()) != null) {
236: if ((!line.startsWith("#") || withcomments)
237: || !line.equals("")) {
238: //temp += line + serverCore.CRLF_STRING;
239: temp.append(line).append(serverCore.CRLF_STRING);
240: }
241: }
242: br.close();
243: } catch (IOException e) {
244: } finally {
245: if (br != null)
246: try {
247: br.close();
248: } catch (Exception e) {
249: }
250: }
251:
252: return new String(temp);
253: }
254:
255: // get a Directory Listing as a String Array
256: public static String[] getDirListing(String dirname) {
257: final File dir = new File(dirname);
258: return getDirListing(dir);
259: }
260:
261: /**
262: * Read content of a directory into a String array of file names.
263: *
264: * @param dir The directory to get the file listing from. If it doesn't exist yet,
265: * it will be created.
266: * @return array of file names
267: */
268: public static String[] getDirListing(File dir) {
269: String[] fileListString;
270: File[] fileList;
271:
272: if (dir != null) {
273: if (!dir.exists()) {
274: dir.mkdir();
275: }
276: fileList = dir.listFiles();
277: fileListString = new String[fileList.length];
278: for (int i = 0; i <= fileList.length - 1; i++) {
279: fileListString[i] = fileList[i].getName();
280: }
281: return fileListString;
282: }
283: return null;
284: }
285:
286: // same as below
287: public static ArrayList<File> getDirsRecursive(File dir,
288: String notdir) {
289: return getDirsRecursive(dir, notdir, true);
290: }
291:
292: /**
293: * Returns a List of all dirs and subdirs as File Objects
294: *
295: * Warning: untested
296: */
297: public static ArrayList<File> getDirsRecursive(File dir,
298: String notdir, boolean excludeDotfiles) {
299: final File[] dirList = dir.listFiles();
300: final ArrayList<File> resultList = new ArrayList<File>();
301: ArrayList<File> recursive;
302: Iterator<File> iter;
303: for (int i = 0; i < dirList.length; i++) {
304: if (dirList[i].isDirectory()
305: && (!excludeDotfiles || !dirList[i].getName()
306: .startsWith("."))
307: && !dirList[i].getName().equals(notdir)) {
308: resultList.add(dirList[i]);
309: recursive = getDirsRecursive(dirList[i], notdir,
310: excludeDotfiles);
311: iter = recursive.iterator();
312: while (iter.hasNext()) {
313: resultList.add(iter.next());
314: }
315: }
316: }
317: return resultList;
318: }
319:
320: //================Helper functions for collection conversion==================
321:
322: /**
323: * Simple conversion of a Collection of Strings to a comma separated String.
324: * If the implementing Collection subclass guaranties an order of its elements,
325: * the substrings of the result will have the same order.
326: *
327: * @param col a Collection of Strings.
328: * @return String with elements from set separated by comma.
329: */
330: public static String collection2string(Collection<String> col) {
331: StringBuffer str = new StringBuffer();
332:
333: if (col != null && (col.size() > 0)) {
334: Iterator<String> it = col.iterator();
335: str.append(it.next());
336: while (it.hasNext()) {
337: str.append(",").append(it.next());
338: }
339: }
340:
341: return str.toString();
342: }
343:
344: /**
345: * @see listManager#string2vector(String)
346: */
347: public static ArrayList<String> string2arraylist(String string) {
348: ArrayList<String> l;
349:
350: if (string != null && string.length() > 0) {
351: l = new ArrayList<String>(Arrays.asList(string.split(",")));
352: } else {
353: l = new ArrayList<String>();
354: }
355:
356: return l;
357: }
358:
359: /**
360: * Simple conversion of a comma separated list to a unified Set.
361: *
362: * @param string list of comma separated Strings
363: * @return resulting Set or empty Set if string is <code>null</code>
364: */
365: public static Set<String> string2set(String string) {
366: HashSet<String> set;
367:
368: if (string != null) {
369: set = new HashSet<String>(Arrays.asList(string.split(",")));
370: } else {
371: set = new HashSet<String>();
372: }
373:
374: return set;
375: }
376:
377: /**
378: * Simple conversion of a comma separated list to a Vector containing
379: * the order of the substrings.
380: *
381: * @param string list of comma separated Strings
382: * @return resulting Vector or empty Vector if string is <code>null</code>
383: */
384: public static Vector<String> string2vector(String string) {
385: Vector<String> v;
386:
387: if (string != null) {
388: v = new Vector<String>(Arrays.asList(string.split(",")));
389: } else {
390: v = new Vector<String>();
391: }
392:
393: return v;
394: }
395:
396: //=============Blacklist specific================
397:
398: /**
399: * Load or reload all active Blacklists
400: */
401: public static void reloadBlacklists() {
402: String supportedBlacklistTypesStr = abstractURLPattern.BLACKLIST_TYPES_STRING;
403: String[] supportedBlacklistTypes = supportedBlacklistTypesStr
404: .split(",");
405:
406: ArrayList<blacklistFile> blacklistFiles = new ArrayList<blacklistFile>(
407: supportedBlacklistTypes.length);
408: for (int i = 0; i < supportedBlacklistTypes.length; i++) {
409: blacklistFile blFile = new blacklistFile(switchboard
410: .getConfig(supportedBlacklistTypes[i]
411: + ".BlackLists", switchboard.getConfig(
412: "BlackLists.DefaultList",
413: "url.default.black")),
414: supportedBlacklistTypes[i]);
415: blacklistFiles.add(blFile);
416: }
417:
418: plasmaSwitchboard.urlBlacklist.clear();
419: plasmaSwitchboard.urlBlacklist
420: .loadList(blacklistFiles
421: .toArray(new blacklistFile[blacklistFiles
422: .size()]), "/");
423:
424: // switchboard.urlBlacklist.clear();
425: // if (f != "") switchboard.urlBlacklist.loadLists("black", f, "/");
426: }
427: }
|