001: /*
002: * $Id: Manager.java,v 1.4 2004/10/10 14:19:47 csaltos Exp $
003: *
004: * Copyright 1999 PUCE [http://www.puce.edu.ec]
005: *
006: * Licensed under the Apache License, Version 2.0 (the "License");
007: * you may not use this file except in compliance with the License.
008: * You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS,
014: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: * See the License for the specific language governing permissions and
016: * limitations under the License.
017: */
018: package org.oxyus.admin;
019:
020: import java.io.BufferedReader;
021: import java.io.File;
022: import java.io.FileInputStream;
023: import java.io.FileNotFoundException;
024: import java.io.FileOutputStream;
025: import java.io.IOException;
026: import java.io.InputStreamReader;
027: import java.io.PrintWriter;
028: import java.net.URL;
029: import java.sql.SQLException;
030: import java.util.Date;
031: import java.util.Enumeration;
032: import java.util.Properties;
033:
034: import org.apache.log4j.Logger;
035: import org.apache.log4j.PropertyConfigurator;
036: import org.oxyus.crawler.Crawler;
037: import org.oxyus.crawler.Page;
038: import org.oxyus.crawler.Scope;
039: import org.oxyus.store.Store;
040: import org.quartz.JobDetail;
041: import org.quartz.Scheduler;
042: import org.quartz.SchedulerException;
043: import org.quartz.SchedulerFactory;
044: import org.quartz.SimpleTrigger;
045: import org.quartz.impl.StdSchedulerFactory;
046:
047: /**
048: * @author Carlos Saltos (csaltos[@]users.sourceforge.net)
049: */
050: public class Manager {
051:
052: /**
053: * Reads the logging configuration from the file specified in
054: * Constants.LOGGER_CONFIGURATION of the Oxyus Configuration directory.
055: */
056: public static void initLogger() throws ConfigurationException {
057: // Get the the log configuration filename
058: String logPropertiesFile = Configuration
059: .getLoggerConfigFilename();
060: // Load the logging properties
061: Properties logProperties = new Properties();
062: try {
063: logProperties.load(new FileInputStream(logPropertiesFile));
064: } catch (IOException ioe) {
065: throw new ConfigurationException(
066: "Unable to load logging configuration", ioe);
067: }
068: // Replace environment variables in the logging properties
069: Enumeration propertyNames = logProperties.propertyNames();
070: while (propertyNames.hasMoreElements()) {
071: String propertyName = (String) propertyNames.nextElement();
072: String value = logProperties.getProperty(propertyName);
073: String parsedValue = Configuration.translateProperty(value);
074: if (!value.equals(parsedValue)) {
075: logProperties.setProperty(propertyName, parsedValue);
076: }
077: }
078: // Configure the logging environment with the loaded properties
079: // TODO: create an Oxyus Logging Configurator for configure and watch
080: PropertyConfigurator.configure(logProperties);
081: // Set true an internal flag to know if the logger is
082: // active to produce logging messages.
083: Configuration.setLoggerActive(true);
084: }
085:
086: /**
087: * Sets environment for crawling an specific page.
088: */
089: public static void setDefaultInitialPage(URL page)
090: throws ConfigurationException {
091: Logger log = Logger.getLogger(Configuration.class);
092: // Add the page host to the scope configuration file
093: String scopeFilename = Configuration.getScopeConfigFilename();
094: try {
095: File scopeFile = new File(scopeFilename);
096: if (scopeFile.exists()) {
097: scopeFile.delete();
098: }
099: PrintWriter out = new PrintWriter(new FileOutputStream(
100: scopeFile));
101: String pattern = Scope.domainToPattern(page.getHost())
102: .pattern();
103: out.print("DOMAIN " + pattern + " ACCEPT");
104: out.flush();
105: out.close();
106: } catch (FileNotFoundException fnfe) {
107: log.error("unable to create scope config file "
108: + scopeFilename, fnfe);
109: throw new ConfigurationException("unable to create scope "
110: + "config file " + scopeFilename, fnfe);
111: }
112: // Add the page to the initial pages configuration file
113: String initialPagesFilename = Configuration
114: .getInitialPagesConfigFilename();
115: try {
116: File initialPagesFile = new File(initialPagesFilename);
117: if (initialPagesFile.exists()) {
118: initialPagesFile.delete();
119: }
120: PrintWriter out = new PrintWriter(new FileOutputStream(
121: initialPagesFile));
122: out.print(page.toExternalForm());
123: out.flush();
124: out.close();
125: } catch (FileNotFoundException fnfe) {
126: log.error("unable to create initial pages config file "
127: + initialPagesFilename, fnfe);
128: throw new ConfigurationException(
129: "unable to create initial " + "pages config file "
130: + initialPagesFilename, fnfe);
131: }
132: }
133:
134: public static void initScheduler() throws SchedulerException,
135: ConfigurationException {
136: SchedulerFactory schedulerFactory = new StdSchedulerFactory();
137: Scheduler scheduler = schedulerFactory.getScheduler();
138: scheduler.start();
139: // schedule crawling if a previous scheduler configuration
140: // exists
141: try {
142: String schedulerConfigFilename = Configuration
143: .getSchedulerConfigFilename();
144: if (new File(schedulerConfigFilename).exists()) {
145: String update = Configuration.getProperty(
146: schedulerConfigFilename,
147: "org.oxyus.scheduler.update");
148: Manager.scheduleCrawling(Long.parseLong(update));
149: }
150: } catch (ConfigurationException ce) {
151: Logger log = Logger.getLogger(Configuration.class);
152: log.warn("Delaying scheduling since no configuration is "
153: + "setted up", ce);
154: }
155: }
156:
157: public static void shutdownScheduler() throws SchedulerException {
158: Scheduler scheduler = StdSchedulerFactory.getDefaultScheduler();
159: if (scheduler != null) {
160: scheduler.shutdown();
161: }
162: }
163:
164: /**
165: * @param hours time between crawling executions in hours
166: */
167: public static void scheduleCrawling(long hours)
168: throws SchedulerException, ConfigurationException {
169: Scheduler scheduler = StdSchedulerFactory.getDefaultScheduler();
170: if (scheduler == null) {
171: Logger log = Logger.getLogger(Configuration.class);
172: log.error("unable to get the scheduler");
173: throw new SchedulerException("unable to get the scheduler");
174: }
175: // lookup if the crawler is already scheduled and remove it
176: // for rescheduling
177: try {
178: scheduler.deleteJob("oxyus", Scheduler.DEFAULT_GROUP);
179: } catch (SchedulerException se) {
180: Logger log = Logger.getLogger(Configuration.class);
181: log
182: .debug("No previous oxyus job was found for rescheduling");
183: }
184: JobDetail oxyusJob = new JobDetail("oxyus",
185: Scheduler.DEFAULT_GROUP, Crawler.class);
186: SimpleTrigger trigger = new SimpleTrigger("oxyustrigger",
187: Scheduler.DEFAULT_GROUP, new Date(), null,
188: SimpleTrigger.REPEAT_INDEFINITELY,
189: hours * 60L * 60L * 1000L);
190: scheduler.scheduleJob(oxyusJob, trigger);
191: // Add the update period to the scheduler configuration file
192: String schedulerConfigFilename = Configuration
193: .getSchedulerConfigFilename();
194: try {
195: File schedulerConfigFile = new File(schedulerConfigFilename);
196: if (schedulerConfigFile.exists()) {
197: schedulerConfigFile.delete();
198: }
199: PrintWriter out = new PrintWriter(new FileOutputStream(
200: schedulerConfigFile));
201: out.print("org.oxyus.scheduler.update=" + hours);
202: out.flush();
203: out.close();
204: } catch (FileNotFoundException fnfe) {
205: Logger log = Logger.getLogger(ConfigurationException.class);
206: log.error("unable to create scheduler config file "
207: + schedulerConfigFilename, fnfe);
208: throw new ConfigurationException(
209: "unable to create scheduler "
210: + "pages config file "
211: + schedulerConfigFilename, fnfe);
212: }
213: }
214:
215: public static void initStore() throws ConfigurationException,
216: IOException, SQLException {
217: // verify if the oxyus database objects are created
218: Store.verifyObjects();
219: }
220:
221: public static void shutdownStore() throws IOException, SQLException {
222: // TODO: shutdown store if using internal database
223: }
224:
225: public static void initEnvironment() throws ConfigurationException {
226: boolean hasErrors = false;
227: // initialize oxyus logging system
228: initLogger();
229: Logger log = Logger.getLogger(Manager.class);
230: // initilize scheduling
231: try {
232: initScheduler();
233: } catch (SchedulerException se) {
234: log.error("Unable to initialize scheduler for oxyus", se);
235: hasErrors = true;
236: }
237: // initialize oxyus store
238: try {
239: initStore();
240: } catch (IOException ioe) {
241: log
242: .error(
243: "Unable to initialize index store for oxyus",
244: ioe);
245: hasErrors = true;
246: } catch (SQLException se) {
247: log.error("Unable to initialize database store for oxyus",
248: se);
249: hasErrors = true;
250: }
251: if (!hasErrors) {
252: log.info("Oxyus successfully initilized");
253: }
254: }
255:
256: public static void shutdownEnvironment() {
257: boolean hasErrors = false;
258: Logger log = Logger.getLogger(Manager.class);
259: try {
260: shutdownScheduler();
261: } catch (SchedulerException se) {
262: log.error("Unable to shutdown scheduler for oxyus", se);
263: hasErrors = true;
264: }
265: try {
266: shutdownStore();
267: } catch (IOException ioe) {
268: log.error("Unable to shutdown index for oxyus", ioe);
269: hasErrors = true;
270: } catch (SQLException sqle) {
271: log.error("Unable to shutdown database store for oxyus",
272: sqle);
273: hasErrors = true;
274: }
275: if (!hasErrors) {
276: log.info("Oxyus shutdown successfully");
277: }
278: }
279:
280: public static void loadInitialPages(Scope scope, Store store)
281: throws ConfigurationException {
282: try {
283: String initPagesFilename = null;
284: initPagesFilename = Configuration
285: .getInitialPagesConfigFilename();
286: BufferedReader initPagesFile = new BufferedReader(
287: new InputStreamReader(new FileInputStream(
288: initPagesFilename)));
289: String addressLine;
290: Page pagina = new Page();
291: pagina.setStore(store);
292: pagina.setScope(scope);
293: while ((addressLine = initPagesFile.readLine()) != null) {
294: addressLine = addressLine.trim();
295: if (addressLine.length() > 0
296: && addressLine.charAt(0) != '#') {
297: // Record the new initial page adding a rule to the scope
298: pagina.recordLink(addressLine, true);
299: }
300: }
301: } catch (FileNotFoundException fnfe) {
302: Logger log = Logger.getLogger(Manager.class);
303: log.error("Unable to load initial pages", fnfe);
304: throw new ConfigurationException(
305: "Unable to load initial pages", fnfe);
306: } catch (IOException ioe) {
307: Logger log = Logger.getLogger(Manager.class);
308: log.error("Unable to load initial pages", ioe);
309: throw new ConfigurationException(
310: "Unable to load initial pages", ioe);
311: }
312: }
313:
314: }
|