0001: /*
0002: * This program is free software; you can redistribute it and/or modify
0003: * it under the terms of the GNU General Public License as published by
0004: * the Free Software Foundation; either version 2 of the License, or
0005: * (at your option) any later version.
0006: *
0007: * This program is distributed in the hope that it will be useful,
0008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0010: * GNU General Public License for more details.
0011: *
0012: * You should have received a copy of the GNU General Public License
0013: * along with this program; if not, write to the Free Software
0014: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
0015: */
0016:
0017: /*
0018: * ConverterUtils.java
0019: * Copyright (C) 2000 University of Waikato, Hamilton, New Zealand
0020: *
0021: */
0022:
0023: package weka.core.converters;
0024:
0025: import weka.core.ClassDiscovery;
0026: import weka.core.Instance;
0027: import weka.core.Instances;
0028: import weka.gui.GenericPropertiesCreator;
0029:
0030: import java.io.File;
0031: import java.io.IOException;
0032: import java.io.InputStream;
0033: import java.io.OutputStream;
0034: import java.io.Serializable;
0035: import java.io.StreamTokenizer;
0036: import java.net.URL;
0037: import java.util.Arrays;
0038: import java.util.Collections;
0039: import java.util.Enumeration;
0040: import java.util.Hashtable;
0041: import java.util.Properties;
0042: import java.util.Vector;
0043:
0044: /**
0045: * Utility routines for the converter package.
0046: *
0047: * @author Mark Hall (mhall@cs.waikato.ac.nz)
0048: * @author FracPete (fracpete at waikato dot ac dot nz)
0049: * @version $Revision: 1.13 $
0050: * @see Serializable
0051: */
0052: public class ConverterUtils implements Serializable {
0053:
0054: /** for serialization */
0055: static final long serialVersionUID = -2460855349276148760L;
0056:
0057: /**
0058: * Helper class for loading data from files and URLs. Via the ConverterUtils
0059: * class it determines which converter to use for loading the data into
0060: * memory. If the chosen converter is an incremental one, then the data
0061: * will be loaded incrementally, otherwise as batch. In both cases the
0062: * same interface will be used (<code>hasMoreElements</code>,
0063: * <code>nextElement</code>). Before the
0064: * data can be read again, one has to call the <code>reset</code> method.
0065: * The data source can also be initialized with an Instances object, in
0066: * order to provide a unified interface to files and already loaded datasets.
0067: *
0068: * @author FracPete (fracpete at waikato dot ac dot nz)
0069: * @version $Revision: 1.13 $
0070: * @see #hasMoreElements(Instances)
0071: * @see #nextElement(Instances)
0072: * @see #reset()
0073: * @see DataSink
0074: */
0075: public static class DataSource implements Serializable {
0076:
0077: /** for serialization */
0078: private static final long serialVersionUID = -613122395928757332L;
0079:
0080: /** the file to load */
0081: protected File m_File;
0082:
0083: /** the URL to load */
0084: protected URL m_URL;
0085:
0086: /** the loader */
0087: protected Loader m_Loader;
0088:
0089: /** whether the loader is incremental */
0090: protected boolean m_Incremental;
0091:
0092: /** the instance counter for the batch case */
0093: protected int m_BatchCounter;
0094:
0095: /** the last internally read instance */
0096: protected Instance m_IncrementalBuffer;
0097:
0098: /** the batch buffer */
0099: protected Instances m_BatchBuffer;
0100:
0101: /**
0102: * Tries to load the data from the file. Can be either a regular file or
0103: * a web location (http://, https://, ftp:// or file://).
0104: *
0105: * @param location the name of the file to load
0106: * @throws Exception if initialization fails
0107: */
0108: public DataSource(String location) throws Exception {
0109: super ();
0110:
0111: // file or URL?
0112: if (location.startsWith("http://")
0113: || location.startsWith("https://")
0114: || location.startsWith("ftp://")
0115: || location.startsWith("file://"))
0116: m_URL = new URL(location);
0117: else
0118: m_File = new File(location);
0119:
0120: // quick check: is it ARFF?
0121: if (isArff(location)) {
0122: m_Loader = new ArffLoader();
0123: } else {
0124: if (m_File != null)
0125: m_Loader = ConverterUtils
0126: .getLoaderForFile(location);
0127: else
0128: m_Loader = ConverterUtils
0129: .getURLLoaderForFile(location);
0130:
0131: // do we have a converter?
0132: if (m_Loader == null)
0133: throw new IllegalArgumentException(
0134: "No suitable converter found for '"
0135: + location + "'!");
0136: }
0137:
0138: // incremental loader?
0139: m_Incremental = (m_Loader instanceof IncrementalConverter);
0140:
0141: reset();
0142: }
0143:
0144: /**
0145: * Initializes the datasource with the given dataset
0146: *
0147: * @param inst the dataset to use
0148: */
0149: public DataSource(Instances inst) {
0150: super ();
0151:
0152: m_BatchBuffer = inst;
0153: m_Loader = null;
0154: m_File = null;
0155: m_URL = null;
0156: m_Incremental = false;
0157: }
0158:
0159: /**
0160: * Initializes the datasource with the given Loader
0161: *
0162: * @param loader the Loader to use
0163: */
0164: public DataSource(Loader loader) {
0165: super ();
0166:
0167: m_BatchBuffer = null;
0168: m_Loader = loader;
0169: m_File = null;
0170: m_URL = null;
0171: m_Incremental = (m_Loader instanceof IncrementalConverter);
0172:
0173: initBatchBuffer();
0174: }
0175:
0176: /**
0177: * Initializes the datasource with the given input stream. This stream
0178: * is always interpreted as ARFF.
0179: *
0180: * @param stream the stream to use
0181: */
0182: public DataSource(InputStream stream) {
0183: super ();
0184:
0185: m_BatchBuffer = null;
0186: m_Loader = new ArffLoader();
0187: try {
0188: m_Loader.setSource(stream);
0189: } catch (Exception e) {
0190: m_Loader = null;
0191: }
0192: m_File = null;
0193: m_URL = null;
0194: m_Incremental = (m_Loader instanceof IncrementalConverter);
0195:
0196: initBatchBuffer();
0197: }
0198:
0199: /**
0200: * initializes the batch buffer if necessary, i.e., for non-incremental
0201: * loaders
0202: */
0203: protected void initBatchBuffer() {
0204: try {
0205: if (!isIncremental())
0206: m_BatchBuffer = m_Loader.getDataSet();
0207: else
0208: m_BatchBuffer = null;
0209: } catch (Exception e) {
0210: e.printStackTrace();
0211: }
0212: }
0213:
0214: /**
0215: * returns whether the extension of the location is likely to be of ARFF
0216: * format, i.e., ending in ".arff" or ".arff.gz" (case-insensitive)
0217: *
0218: * @param location the file location to check
0219: * @return true if the location seems to be of ARFF format
0220: */
0221: public static boolean isArff(String location) {
0222: if (location.toLowerCase().endsWith(
0223: ArffLoader.FILE_EXTENSION.toLowerCase())
0224: || location.toLowerCase().endsWith(
0225: ArffLoader.FILE_EXTENSION_COMPRESSED
0226: .toLowerCase()))
0227: return true;
0228: else
0229: return false;
0230: }
0231:
0232: /**
0233: * returns whether the loader is an incremental one
0234: *
0235: * @return true if the loader is a true incremental one
0236: */
0237: public boolean isIncremental() {
0238: return m_Incremental;
0239: }
0240:
0241: /**
0242: * returns the determined loader, null if the DataSource was initialized
0243: * with data alone and not a file/URL.
0244: *
0245: * @return the loader used for retrieving the data
0246: */
0247: public Loader getLoader() {
0248: return m_Loader;
0249: }
0250:
0251: /**
0252: * returns the full dataset, can be null in case of an error
0253: *
0254: * @return the full dataset
0255: * @throws Exception if resetting of loader fails
0256: */
0257: public Instances getDataSet() throws Exception {
0258: Instances result;
0259:
0260: result = null;
0261:
0262: // reset the loader
0263: reset();
0264:
0265: try {
0266: if (m_Loader != null)
0267: result = m_Loader.getDataSet();
0268: else
0269: result = m_BatchBuffer;
0270: } catch (Exception e) {
0271: e.printStackTrace();
0272: result = null;
0273: }
0274:
0275: return result;
0276: }
0277:
0278: /**
0279: * returns the full dataset with the specified class index set,
0280: * can be null in case of an error
0281: *
0282: * @param classIndex the class index for the dataset
0283: * @return the full dataset
0284: * @throws Exception if resetting of loader fails
0285: */
0286: public Instances getDataSet(int classIndex) throws Exception {
0287: Instances result;
0288:
0289: result = getDataSet();
0290: if (result != null)
0291: result.setClassIndex(classIndex);
0292:
0293: return result;
0294: }
0295:
0296: /**
0297: * resets the loader
0298: *
0299: * @throws Exception if resetting fails
0300: */
0301: public void reset() throws Exception {
0302: if (m_File != null)
0303: ((AbstractFileLoader) m_Loader).setFile(m_File);
0304: else if (m_URL != null)
0305: ((URLSourcedLoader) m_Loader).setURL(m_URL.toString());
0306: else if (m_Loader != null)
0307: m_Loader.reset();
0308:
0309: m_BatchCounter = 0;
0310: m_IncrementalBuffer = null;
0311:
0312: if (m_Loader != null) {
0313: if (!isIncremental())
0314: m_BatchBuffer = m_Loader.getDataSet();
0315: else
0316: m_BatchBuffer = null;
0317: }
0318: }
0319:
0320: /**
0321: * returns the structure of the data
0322: *
0323: * @return the structure of the data
0324: * @throws Exception if something goes wrong
0325: */
0326: public Instances getStructure() throws Exception {
0327: if (m_Loader != null)
0328: return m_Loader.getStructure();
0329: else
0330: return new Instances(m_BatchBuffer, 0);
0331: }
0332:
0333: /**
0334: * returns the structure of the data, with the defined class index
0335: *
0336: * @param classIndex the class index for the dataset
0337: * @return the structure of the data
0338: * @throws Exception if something goes wrong
0339: */
0340: public Instances getStructure(int classIndex) throws Exception {
0341: Instances result;
0342:
0343: result = getStructure();
0344: if (result != null)
0345: result.setClassIndex(classIndex);
0346:
0347: return result;
0348: }
0349:
0350: /**
0351: * returns whether there are more Instance objects in the data
0352: *
0353: * @return true if there are more Instance objects
0354: * available
0355: * @see #nextElement(Instances)
0356: */
0357: public boolean hasMoreElements(Instances structure) {
0358: boolean result;
0359:
0360: result = false;
0361:
0362: if (isIncremental()) {
0363: // user still hasn't collected the last one?
0364: if (m_IncrementalBuffer != null) {
0365: result = true;
0366: } else {
0367: try {
0368: m_IncrementalBuffer = m_Loader
0369: .getNextInstance(structure);
0370: result = (m_IncrementalBuffer != null);
0371: } catch (Exception e) {
0372: e.printStackTrace();
0373: result = false;
0374: }
0375: }
0376: } else {
0377: result = (m_BatchCounter < m_BatchBuffer.numInstances());
0378: }
0379:
0380: return result;
0381: }
0382:
0383: /**
0384: * returns the next element and sets the specified dataset, null if
0385: * none available
0386: *
0387: * @param dataset the dataset to set for the instance
0388: * @return the next Instance
0389: */
0390: public Instance nextElement(Instances dataset) {
0391: Instance result;
0392:
0393: result = null;
0394:
0395: if (isIncremental()) {
0396: // is there still an instance in the buffer?
0397: if (m_IncrementalBuffer != null) {
0398: result = m_IncrementalBuffer;
0399: m_IncrementalBuffer = null;
0400: } else {
0401: try {
0402: result = m_Loader.getNextInstance(dataset);
0403: } catch (Exception e) {
0404: e.printStackTrace();
0405: result = null;
0406: }
0407: }
0408: } else {
0409: if (m_BatchCounter < m_BatchBuffer.numInstances()) {
0410: result = m_BatchBuffer.instance(m_BatchCounter);
0411: m_BatchCounter++;
0412: }
0413: }
0414:
0415: result.setDataset(dataset);
0416:
0417: return result;
0418: }
0419:
0420: /**
0421: * convencience method for loading a dataset in batch mode
0422: *
0423: * @param location the dataset to load
0424: * @return the dataset
0425: * @throws Exception if loading fails
0426: * @see #DataSource(String)
0427: */
0428: public static Instances read(String location) throws Exception {
0429: DataSource source;
0430: Instances result;
0431:
0432: source = new DataSource(location);
0433: result = source.getDataSet();
0434:
0435: return result;
0436: }
0437:
0438: /**
0439: * convencience method for loading a dataset in batch mode from a stream
0440: *
0441: * @param stream the stream to load the dataset from
0442: * @return the dataset
0443: * @throws Exception if loading fails
0444: * @see #DataSource(InputStream)
0445: */
0446: public static Instances read(InputStream stream)
0447: throws Exception {
0448: DataSource source;
0449: Instances result;
0450:
0451: source = new DataSource(stream);
0452: result = source.getDataSet();
0453:
0454: return result;
0455: }
0456:
0457: /**
0458: * convencience method for loading a dataset in batch mode
0459: *
0460: * @param loader the loader to get the dataset from
0461: * @return the dataset
0462: * @throws Exception if loading fails
0463: * @see #DataSource(Loader)
0464: */
0465: public static Instances read(Loader loader) throws Exception {
0466: DataSource source;
0467: Instances result;
0468:
0469: source = new DataSource(loader);
0470: result = source.getDataSet();
0471:
0472: return result;
0473: }
0474:
0475: /**
0476: * for testing only - takes a data file as input
0477: *
0478: * @param args the commandline arguments
0479: * @throws Exception if something goes wrong
0480: */
0481: public static void main(String[] args) throws Exception {
0482: if (args.length != 1) {
0483: System.out.println("\nUsage: "
0484: + DataSource.class.getName() + " <file>\n");
0485: System.exit(1);
0486: }
0487:
0488: DataSource loader = new DataSource(args[0]);
0489:
0490: System.out
0491: .println("Incremental? " + loader.isIncremental());
0492: System.out.println("Loader: "
0493: + loader.getLoader().getClass().getName());
0494: System.out.println("Data:\n");
0495: Instances structure = loader.getStructure();
0496: System.out.println(structure);
0497: while (loader.hasMoreElements(structure))
0498: System.out.println(loader.nextElement(structure));
0499:
0500: Instances inst = loader.getDataSet();
0501: loader = new DataSource(inst);
0502: System.out.println("\n\nProxy-Data:\n");
0503: System.out.println(loader.getStructure());
0504: while (loader.hasMoreElements(structure))
0505: System.out.println(loader.nextElement(inst));
0506: }
0507: }
0508:
0509: /**
0510: * Helper class for saving data to files. Via the ConverterUtils
0511: * class it determines which converter to use for saving the data.
0512: * It is the logical counterpart to <code>DataSource</code>.
0513: *
0514: * @author FracPete (fracpete at waikato dot ac dot nz)
0515: * @version $Revision: 1.13 $
0516: * @see DataSource
0517: */
0518: public static class DataSink implements Serializable {
0519:
0520: /** for serialization */
0521: private static final long serialVersionUID = -1504966891136411204L;
0522:
0523: /** the saver to use for storing the data */
0524: protected Saver m_Saver = null;
0525:
0526: /** the stream to store the data in (always in ARFF format) */
0527: protected OutputStream m_Stream = null;
0528:
0529: /**
0530: * initializes the sink to save the data to the given file
0531: *
0532: * @param filename the file to save data to
0533: * @throws Exception if set of saver fails
0534: */
0535: public DataSink(String filename) throws Exception {
0536: m_Stream = null;
0537:
0538: if (DataSource.isArff(filename))
0539: m_Saver = new ArffSaver();
0540: else
0541: m_Saver = getSaverForFile(filename);
0542:
0543: ((AbstractFileSaver) m_Saver).setFile(new File(filename));
0544: }
0545:
0546: /**
0547: * initializes the sink to save the data to the given Saver (expected to be
0548: * fully configured)
0549: *
0550: * @param saver the saver to use for saving the data
0551: */
0552: public DataSink(Saver saver) {
0553: m_Saver = saver;
0554: m_Stream = null;
0555: }
0556:
0557: /**
0558: * initializes the sink to save the data in the stream (always in ARFF
0559: * format)
0560: *
0561: * @param stream the output stream to use for storing the data in ARFF
0562: * format
0563: */
0564: public DataSink(OutputStream stream) {
0565: m_Saver = null;
0566: m_Stream = stream;
0567: }
0568:
0569: /**
0570: * writes the given data either via the saver or to the defined
0571: * output stream (depending on the constructor). In case of the stream,
0572: * the stream is only flushed, but not closed.
0573: *
0574: * @param data the data to save
0575: * @throws Exception if saving fails
0576: */
0577: public void write(Instances data) throws Exception {
0578: if (m_Saver != null) {
0579: m_Saver.setInstances(data);
0580: m_Saver.writeBatch();
0581: } else {
0582: m_Stream.write(data.toString().getBytes());
0583: m_Stream.flush();
0584: }
0585: }
0586:
0587: /**
0588: * writes the data to the given file
0589: *
0590: * @param filename the file to write the data to
0591: * @param data the data to store
0592: * @throws Exception if writing fails
0593: */
0594: public static void write(String filename, Instances data)
0595: throws Exception {
0596: DataSink sink;
0597:
0598: sink = new DataSink(filename);
0599: sink.write(data);
0600: }
0601:
0602: /**
0603: * writes the data via the given saver
0604: *
0605: * @param saver the saver to use for writing the data
0606: * @param data the data to store
0607: * @throws Exception if writing fails
0608: */
0609: public static void write(Saver saver, Instances data)
0610: throws Exception {
0611: DataSink sink;
0612:
0613: sink = new DataSink(saver);
0614: sink.write(data);
0615: }
0616:
0617: /**
0618: * writes the data to the given stream (always in ARFF format)
0619: *
0620: * @param stream the stream to write the data to (ARFF format)
0621: * @param data the data to store
0622: * @throws Exception if writing fails
0623: */
0624: public static void write(OutputStream stream, Instances data)
0625: throws Exception {
0626: DataSink sink;
0627:
0628: sink = new DataSink(stream);
0629: sink.write(data);
0630: }
0631:
0632: /**
0633: * for testing only - takes a data file as input and a data file for the
0634: * output
0635: *
0636: * @param args the commandline arguments
0637: * @throws Exception if something goes wrong
0638: */
0639: public static void main(String[] args) throws Exception {
0640: if (args.length != 2) {
0641: System.out.println("\nUsage: "
0642: + DataSource.class.getName()
0643: + " <input-file> <output-file>\n");
0644: System.exit(1);
0645: }
0646:
0647: // load data
0648: Instances data = DataSource.read(args[0]);
0649:
0650: // save data
0651: DataSink.write(args[1], data);
0652: }
0653: }
0654:
0655: /** the core loaders - hardcoded list necessary for RMI/Remote Experiments
0656: * (comma-separated list) */
0657: public final static String CORE_FILE_LOADERS = weka.core.converters.ArffLoader.class
0658: .getName()
0659: + ","
0660: + weka.core.converters.C45Loader.class.getName()
0661: + ","
0662: + weka.core.converters.CSVLoader.class.getName()
0663: + ","
0664: + weka.core.converters.DatabaseConverter.class.getName()
0665: + ","
0666: + weka.core.converters.LibSVMLoader.class.getName()
0667: + ","
0668: + weka.core.converters.SerializedInstancesLoader.class
0669: .getName()
0670: + ","
0671: + weka.core.converters.TextDirectoryLoader.class.getName()
0672: + "," + weka.core.converters.XRFFLoader.class.getName();
0673:
0674: /** the core savers - hardcoded list necessary for RMI/Remote Experiments
0675: * (comma-separated list) */
0676: public final static String CORE_FILE_SAVERS = weka.core.converters.ArffSaver.class
0677: .getName()
0678: + ","
0679: + weka.core.converters.C45Saver.class.getName()
0680: + ","
0681: + weka.core.converters.CSVSaver.class.getName()
0682: + ","
0683: + weka.core.converters.DatabaseConverter.class.getName()
0684: + ","
0685: + weka.core.converters.LibSVMSaver.class.getName()
0686: + ","
0687: + weka.core.converters.SerializedInstancesSaver.class
0688: .getName()
0689: + ","
0690: + weka.core.converters.XRFFSaver.class.getName();
0691:
0692: /** all available loaders (extension <-> classname) */
0693: protected static Hashtable<String, String> m_FileLoaders;
0694:
0695: /** all available URL loaders (extension <-> classname) */
0696: protected static Hashtable<String, String> m_URLFileLoaders;
0697:
0698: /** all available savers (extension <-> classname) */
0699: protected static Hashtable<String, String> m_FileSavers;
0700:
0701: // determine all loaders/savers
0702: static {
0703: Vector classnames;
0704:
0705: try {
0706: // generate properties
0707: // Note: does NOT work with RMI, hence m_FileLoadersCore/m_FileSaversCore
0708: GenericPropertiesCreator creator = new GenericPropertiesCreator();
0709: creator.execute(false);
0710: Properties props = creator.getOutputProperties();
0711:
0712: // init
0713: m_FileLoaders = new Hashtable<String, String>();
0714: m_URLFileLoaders = new Hashtable<String, String>();
0715: m_FileSavers = new Hashtable<String, String>();
0716:
0717: // loaders
0718: m_FileLoaders = getFileConverters(
0719: props.getProperty(Loader.class.getName(),
0720: CORE_FILE_LOADERS),
0721: new String[] { FileSourcedConverter.class.getName() });
0722:
0723: // URL loaders
0724: m_URLFileLoaders = getFileConverters(props.getProperty(
0725: Loader.class.getName(), CORE_FILE_LOADERS),
0726: new String[] {
0727: FileSourcedConverter.class.getName(),
0728: URLSourcedLoader.class.getName() });
0729:
0730: // savers
0731: m_FileSavers = getFileConverters(
0732: props.getProperty(Saver.class.getName(),
0733: CORE_FILE_SAVERS),
0734: new String[] { FileSourcedConverter.class.getName() });
0735: } catch (Exception e) {
0736: // ignore
0737: } finally {
0738: // loaders
0739: if (m_FileLoaders.size() == 0) {
0740: classnames = ClassDiscovery
0741: .find(AbstractFileLoader.class,
0742: AbstractFileLoader.class.getPackage()
0743: .getName());
0744: if (classnames.size() > 0)
0745: m_FileLoaders = getFileConverters(classnames,
0746: new String[] { FileSourcedConverter.class
0747: .getName() });
0748: else
0749: m_FileLoaders = getFileConverters(
0750: CORE_FILE_LOADERS,
0751: new String[] { FileSourcedConverter.class
0752: .getName() });
0753: }
0754:
0755: // URL loaders
0756: if (m_URLFileLoaders.size() == 0) {
0757: classnames = ClassDiscovery
0758: .find(AbstractFileLoader.class,
0759: AbstractFileLoader.class.getPackage()
0760: .getName());
0761: if (classnames.size() > 0)
0762: m_URLFileLoaders = getFileConverters(classnames,
0763: new String[] {
0764: FileSourcedConverter.class
0765: .getName(),
0766: URLSourcedLoader.class.getName() });
0767: else
0768: m_URLFileLoaders = getFileConverters(
0769: CORE_FILE_LOADERS, new String[] {
0770: FileSourcedConverter.class
0771: .getName(),
0772: URLSourcedLoader.class.getName() });
0773: }
0774:
0775: // savers
0776: if (m_FileSavers.size() == 0) {
0777: classnames = ClassDiscovery.find(
0778: AbstractFileSaver.class,
0779: AbstractFileSaver.class.getPackage().getName());
0780: if (classnames.size() > 0)
0781: m_FileSavers = getFileConverters(classnames,
0782: new String[] { FileSourcedConverter.class
0783: .getName() });
0784: else
0785: m_FileSavers = getFileConverters(CORE_FILE_SAVERS,
0786: new String[] { FileSourcedConverter.class
0787: .getName() });
0788: }
0789: }
0790: }
0791:
0792: /**
0793: * returns a hashtable with the association
0794: * "file extension <-> converter classname" for the comma-separated list
0795: * of converter classnames.
0796: *
0797: * @param classnames comma-separated list of converter classnames
0798: * @param intf interfaces the converters have to implement
0799: * @return hashtable with ExtensionFileFilters
0800: */
0801: protected static Hashtable<String, String> getFileConverters(
0802: String classnames, String[] intf) {
0803: Vector list;
0804: String[] names;
0805: int i;
0806:
0807: list = new Vector();
0808: names = classnames.split(",");
0809: for (i = 0; i < names.length; i++)
0810: list.add(names[i]);
0811:
0812: return getFileConverters(list, intf);
0813: }
0814:
0815: /**
0816: * returns a hashtable with the association
0817: * "file extension <-> converter classname" for the list of converter
0818: * classnames.
0819: *
0820: * @param classnames list of converter classnames
0821: * @param intf interfaces the converters have to implement
0822: * @return hashtable with ExtensionFileFilters
0823: */
0824: protected static Hashtable<String, String> getFileConverters(
0825: Vector classnames, String[] intf) {
0826: Hashtable<String, String> result;
0827: String classname;
0828: Class cls;
0829: String[] ext;
0830: FileSourcedConverter converter;
0831: int i;
0832: int n;
0833:
0834: result = new Hashtable<String, String>();
0835:
0836: for (i = 0; i < classnames.size(); i++) {
0837: classname = (String) classnames.get(i);
0838:
0839: // all necessary interfaces implemented?
0840: for (n = 0; n < intf.length; n++) {
0841: if (!ClassDiscovery.hasInterface(intf[n], classname))
0842: continue;
0843: }
0844:
0845: // get data from converter
0846: try {
0847: cls = Class.forName(classname);
0848: converter = (FileSourcedConverter) cls.newInstance();
0849: ext = converter.getFileExtensions();
0850: } catch (Exception e) {
0851: cls = null;
0852: converter = null;
0853: ext = new String[0];
0854: }
0855:
0856: if (converter == null)
0857: continue;
0858:
0859: for (n = 0; n < ext.length; n++)
0860: result.put(ext[n], classname);
0861: }
0862:
0863: return result;
0864: }
0865:
0866: /**
0867: * Gets token, skipping empty lines.
0868: *
0869: * @param tokenizer the stream tokenizer
0870: * @throws IOException if reading the next token fails
0871: */
0872: public static void getFirstToken(StreamTokenizer tokenizer)
0873: throws IOException {
0874:
0875: while (tokenizer.nextToken() == StreamTokenizer.TT_EOL) {
0876: }
0877: ;
0878: if ((tokenizer.ttype == '\'') || (tokenizer.ttype == '"')) {
0879: tokenizer.ttype = StreamTokenizer.TT_WORD;
0880: } else if ((tokenizer.ttype == StreamTokenizer.TT_WORD)
0881: && (tokenizer.sval.equals("?"))) {
0882: tokenizer.ttype = '?';
0883: }
0884: }
0885:
0886: /**
0887: * Gets token.
0888: *
0889: * @param tokenizer the stream tokenizer
0890: * @throws IOException if reading the next token fails
0891: */
0892: public static void getToken(StreamTokenizer tokenizer)
0893: throws IOException {
0894:
0895: tokenizer.nextToken();
0896: if (tokenizer.ttype == StreamTokenizer.TT_EOL) {
0897: return;
0898: }
0899:
0900: if ((tokenizer.ttype == '\'') || (tokenizer.ttype == '"')) {
0901: tokenizer.ttype = StreamTokenizer.TT_WORD;
0902: } else if ((tokenizer.ttype == StreamTokenizer.TT_WORD)
0903: && (tokenizer.sval.equals("?"))) {
0904: tokenizer.ttype = '?';
0905: }
0906: }
0907:
0908: /**
0909: * Throws error message with line number and last token read.
0910: *
0911: * @param theMsg the error message to be thrown
0912: * @param tokenizer the stream tokenizer
0913: * @throws IOException containing the error message
0914: */
0915: public static void errms(StreamTokenizer tokenizer, String theMsg)
0916: throws IOException {
0917:
0918: throw new IOException(theMsg + ", read " + tokenizer.toString());
0919: }
0920:
0921: /**
0922: * returns a vector with the classnames of all the loaders from the
0923: * given hashtable
0924: *
0925: * @param ht the hashtable with the extension/converter relation
0926: * @return the classnames of the loaders
0927: */
0928: protected static Vector<String> getConverters(
0929: Hashtable<String, String> ht) {
0930: Vector<String> result;
0931: Enumeration<String> enm;
0932: String converter;
0933:
0934: result = new Vector<String>();
0935:
0936: // get all classnames
0937: enm = ht.elements();
0938: while (enm.hasMoreElements()) {
0939: converter = enm.nextElement();
0940: if (!result.contains(converter))
0941: result.add(converter);
0942: }
0943:
0944: // sort names
0945: Collections.sort(result);
0946:
0947: return result;
0948: }
0949:
0950: /**
0951: * tries to determine the converter to use for this kind of file, returns
0952: * null if none can be found in the given hashtable
0953: *
0954: * @param filename the file to return a converter for
0955: * @param ht the hashtable with the relation extension/converter
0956: * @return the converter if one was found, null otherwise
0957: */
0958: protected static Object getConverterForFile(String filename,
0959: Hashtable<String, String> ht) {
0960: Object result;
0961: String extension;
0962: int index;
0963:
0964: result = null;
0965:
0966: index = filename.lastIndexOf('.');
0967: if (index > -1) {
0968: extension = filename.substring(index).toLowerCase();
0969: result = getConverterForExtension(extension, ht);
0970: // is it a compressed format?
0971: if (extension.equals(".gz") && result == null) {
0972: index = filename.lastIndexOf('.', index - 1);
0973: extension = filename.substring(index).toLowerCase();
0974: result = getConverterForExtension(extension, ht);
0975: }
0976: }
0977:
0978: return result;
0979: }
0980:
0981: /**
0982: * tries to determine the loader to use for this kind of extension, returns
0983: * null if none can be found
0984: *
0985: * @param extension the file extension to return a converter for
0986: * @param ht the hashtable with the relation extension/converter
0987: * @return the converter if one was found, null otherwise
0988: */
0989: protected static Object getConverterForExtension(String extension,
0990: Hashtable<String, String> ht) {
0991: Object result;
0992: String classname;
0993:
0994: result = null;
0995: classname = (String) ht.get(extension);
0996: if (classname != null) {
0997: try {
0998: result = Class.forName(classname).newInstance();
0999: } catch (Exception e) {
1000: result = null;
1001: e.printStackTrace();
1002: }
1003: }
1004:
1005: return result;
1006: }
1007:
1008: /**
1009: * checks whether the given class is one of the hardcoded core file loaders
1010: *
1011: * @param classname the class to check
1012: * @return true if the class is one of the core loaders
1013: * @see #CORE_FILE_LOADERS
1014: */
1015: public static boolean isCoreFileLoader(String classname) {
1016: boolean result;
1017: String[] classnames;
1018:
1019: classnames = CORE_FILE_LOADERS.split(",");
1020: result = (Arrays.binarySearch(classnames, classname) >= 0);
1021:
1022: return result;
1023: }
1024:
1025: /**
1026: * returns a vector with the classnames of all the file loaders
1027: *
1028: * @return the classnames of the loaders
1029: */
1030: public static Vector<String> getFileLoaders() {
1031: return getConverters(m_FileLoaders);
1032: }
1033:
1034: /**
1035: * tries to determine the loader to use for this kind of file, returns
1036: * null if none can be found
1037: *
1038: * @param filename the file to return a converter for
1039: * @return the converter if one was found, null otherwise
1040: */
1041: public static AbstractFileLoader getLoaderForFile(String filename) {
1042: return (AbstractFileLoader) getConverterForFile(filename,
1043: m_FileLoaders);
1044: }
1045:
1046: /**
1047: * tries to determine the loader to use for this kind of file, returns
1048: * null if none can be found
1049: *
1050: * @param file the file to return a converter for
1051: * @return the converter if one was found, null otherwise
1052: */
1053: public static AbstractFileLoader getLoaderForFile(File file) {
1054: return getLoaderForFile(file.getAbsolutePath());
1055: }
1056:
1057: /**
1058: * tries to determine the loader to use for this kind of extension, returns
1059: * null if none can be found
1060: *
1061: * @param extension the file extension to return a converter for
1062: * @return the converter if one was found, null otherwise
1063: */
1064: public static AbstractFileLoader getLoaderForExtension(
1065: String extension) {
1066: return (AbstractFileLoader) getConverterForExtension(extension,
1067: m_FileLoaders);
1068: }
1069:
1070: /**
1071: * returns a vector with the classnames of all the URL file loaders
1072: *
1073: * @return the classnames of the loaders
1074: */
1075: public static Vector<String> getURLFileLoaders() {
1076: return getConverters(m_URLFileLoaders);
1077: }
1078:
1079: /**
1080: * tries to determine the URL loader to use for this kind of file, returns
1081: * null if none can be found
1082: *
1083: * @param filename the file to return a URL converter for
1084: * @return the converter if one was found, null otherwise
1085: */
1086: public static AbstractFileLoader getURLLoaderForFile(String filename) {
1087: return (AbstractFileLoader) getConverterForFile(filename,
1088: m_URLFileLoaders);
1089: }
1090:
1091: /**
1092: * tries to determine the URL loader to use for this kind of file, returns
1093: * null if none can be found
1094: *
1095: * @param file the file to return a URL converter for
1096: * @return the converter if one was found, null otherwise
1097: */
1098: public static AbstractFileLoader getURLLoaderForFile(File file) {
1099: return getURLLoaderForFile(file.getAbsolutePath());
1100: }
1101:
1102: /**
1103: * tries to determine the URL loader to use for this kind of extension, returns
1104: * null if none can be found
1105: *
1106: * @param extension the file extension to return a URL converter for
1107: * @return the converter if one was found, null otherwise
1108: */
1109: public static AbstractFileLoader getURLLoaderForExtension(
1110: String extension) {
1111: return (AbstractFileLoader) getConverterForExtension(extension,
1112: m_URLFileLoaders);
1113: }
1114:
1115: /**
1116: * checks whether the given class is one of the hardcoded core file savers
1117: *
1118: * @param classname the class to check
1119: * @return true if the class is one of the core savers
1120: * @see #CORE_FILE_SAVERS
1121: */
1122: public static boolean isCoreFileSaver(String classname) {
1123: boolean result;
1124: String[] classnames;
1125:
1126: classnames = CORE_FILE_SAVERS.split(",");
1127: result = (Arrays.binarySearch(classnames, classname) >= 0);
1128:
1129: return result;
1130: }
1131:
1132: /**
1133: * returns a vector with the classnames of all the file savers
1134: *
1135: * @return the classnames of the savers
1136: */
1137: public static Vector<String> getFileSavers() {
1138: return getConverters(m_FileSavers);
1139: }
1140:
1141: /**
1142: * tries to determine the saver to use for this kind of file, returns
1143: * null if none can be found
1144: *
1145: * @param filename the file to return a converter for
1146: * @return the converter if one was found, null otherwise
1147: */
1148: public static AbstractFileSaver getSaverForFile(String filename) {
1149: return (AbstractFileSaver) getConverterForFile(filename,
1150: m_FileSavers);
1151: }
1152:
1153: /**
1154: * tries to determine the saver to use for this kind of file, returns
1155: * null if none can be found
1156: *
1157: * @param file the file to return a converter for
1158: * @return the converter if one was found, null otherwise
1159: */
1160: public static AbstractFileSaver getSaverForFile(File file) {
1161: return getSaverForFile(file.getAbsolutePath());
1162: }
1163:
1164: /**
1165: * tries to determine the saver to use for this kind of extension, returns
1166: * null if none can be found
1167: *
1168: * @param extension the file extension to return a converter for
1169: * @return the converter if one was found, null otherwise
1170: */
1171: public static AbstractFileSaver getSaverForExtension(
1172: String extension) {
1173: return (AbstractFileSaver) getConverterForExtension(extension,
1174: m_FileSavers);
1175: }
1176: }
|