001: /*
002: * (c) Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
003: * [See end of file]
004: */
005:
006: package com.hp.hpl.jena.util;
007:
008: import java.io.*;
009: import java.net.URL;
010: import java.nio.charset.Charset;
011:
012: import org.apache.commons.logging.LogFactory;
013:
014: import com.hp.hpl.jena.JenaRuntime;
015: import com.hp.hpl.jena.shared.JenaException;
016: import com.hp.hpl.jena.shared.WrappedIOException;
017:
018: public class FileUtils {
019: public static final String langXML = "RDF/XML";
020: public static final String langXMLAbbrev = "RDF/XML-ABBREV";
021: public static final String langNTriple = "N-TRIPLE";
022: public static final String langN3 = "N3";
023: public static final String langTurtle = "TURTLE";
024: // Non-standard
025: public static final String langBDB = "RDF/BDB";
026: public static final String langSQL = "RDF/SQL";
027:
028: /** Java name for UTF-8 encoding */
029: public static final String encodingUTF8 = "utf-8";
030:
031: static Charset utf8 = null;
032: static {
033: try {
034: utf8 = Charset.forName(encodingUTF8);
035: } catch (Throwable ex) {
036: LogFactory.getLog(FileUtils.class).warn(
037: "Failed to get charset for UTF-8");
038: }
039: }
040:
041: /** Create a reader that uses UTF-8 encoding */
042:
043: static public Reader asUTF8(InputStream in) {
044: if (JenaRuntime.runUnder(JenaRuntime.featureNoCharset))
045: return new InputStreamReader(in);
046: // Not ,utf8 -- GNUClassPath (0.20) apparently fails on passing in a charset
047: // but if passed not the decoder or the name of the charset.
048: // Reported and fixed.
049: return new InputStreamReader(in, utf8.newDecoder());
050: }
051:
052: /** Create a buffered reader that uses UTF-8 encoding */
053:
054: static public BufferedReader asBufferedUTF8(InputStream in) {
055: return new BufferedReader(asUTF8(in));
056: }
057:
058: /** Create a writer that uses UTF-8 encoding */
059:
060: static public Writer asUTF8(OutputStream out) {
061: if (JenaRuntime.runUnder(JenaRuntime.featureNoCharset))
062: return new OutputStreamWriter(out);
063: return new OutputStreamWriter(out, utf8.newEncoder());
064: }
065:
066: /** Create a print writer that uses UTF-8 encoding */
067:
068: static public PrintWriter asPrintWriterUTF8(OutputStream out) {
069: return new PrintWriter(asUTF8(out));
070: }
071:
072: /** Guess the language/type of model data. Updated by Chris, hived off the
073: * model-suffix part to FileUtils as part of unifying it with similar code in FileGraph.
074: *
075: * <ul>
076: * <li> If the URI of the model starts jdbc: it is assumed to be an RDB model</li>
077: * <li> If the URI ends ".rdf", it is assumed to be RDF/XML</li>
078: * <li> If the URI end .nt, it is assumed to be N-Triples</li>
079: * <li> If the URI end .bdb, it is assumed to be BerkeleyDB model [suppressed at present]</li>
080: * </ul>
081: * @param name URL to base the guess on
082: * @param otherwise Default guess
083: * @return String Guessed syntax - or the default supplied
084: */
085:
086: public static String guessLang(String name, String otherwise) {
087: if (name.startsWith("jdbc:") || name.startsWith("JDBC:"))
088: return langSQL;
089:
090: String suffix = getFilenameExt(name);
091: if (suffix.equals("n3"))
092: return langN3;
093: if (suffix.equals("nt"))
094: return langNTriple;
095: if (suffix.equals("ttl"))
096: return langTurtle;
097: if (suffix.equals("rdf"))
098: return langXML;
099: if (suffix.equals("owl"))
100: return langXML;
101: return otherwise;
102: }
103:
104: /** Guess the language/type of model data
105: *
106: * <ul>
107: * <li> If the URI of the model starts jdbc: it is assumed to be an RDB model</li>
108: * <li> If the URI ends .rdf, it is assumed to be RDF/XML</li>
109: * <li> If the URI ends .n3, it is assumed to be N3</li>
110: * <li> If the URI ends .nt, it is assumed to be N-Triples</li>
111: * <li> If the URI ends .bdb, it is assumed to be BerkeleyDB model</li>
112: * </ul>
113: * @param urlStr URL to base the guess on
114: * @return String Guessed syntax - default is RDF/XML
115: */
116:
117: public static String guessLang(String urlStr) {
118: return guessLang(urlStr, langXML);
119: }
120:
121: /** Turn a file: URL or file name into a plain file name */
122:
123: public static String toFilename(String filenameOrURI) {
124: // Requirements of windows and Linux differ slightly here
125: // Windows wants "file:///c:/foo" => "c:/foo"
126: // but Linux only wants "file:///foo" => "/foo"
127: // Pragmatically, a path of "/c:/foo", or "/foo" works everywhere.
128: // but not "//c:/foo" or "///c:/foo"
129: // else IKVM thinks its a network path on Windows.
130:
131: // If it's a a file: we apply %-decoding.
132: // If there is no scheme name, we don't.
133:
134: if (!isFile(filenameOrURI))
135: return null;
136: // No scheme of file:
137: String fn = filenameOrURI;
138:
139: if (!fn.startsWith("file:"))
140: return fn;
141:
142: // file:
143: // Convert absolute file names
144: if (fn.startsWith("file:///"))
145: fn = fn.substring("file://".length());
146: else if (fn.startsWith("file://localhost/"))
147: // NB Leaves the leading slash on.
148: fn = fn.substring("file://localhost".length());
149: else
150: // Just trim off the file:
151: fn = fn.substring("file:".length());
152:
153: return decodeFileName(fn);
154: }
155:
156: public static String decodeFileName(String s) {
157: if (s.indexOf('%') < 0)
158: return s;
159: int len = s.length();
160: StringBuffer sbuff = new StringBuffer(len);
161:
162: // This is URIRef.decode()? Is that code used?
163: // Just decode % escapes.
164: // Not http://www.daml.org/2001/03/daml+oil
165: for (int i = 0; i < len; i++) {
166: char c = s.charAt(i);
167: switch (c) {
168: case '%':
169: int codepoint = Integer.parseInt(s.substring(i + 1,
170: i + 3), 16);
171: char ch = (char) codepoint;
172: sbuff.append(ch);
173: i = i + 2;
174: break;
175: default:
176: sbuff.append(c);
177: }
178: }
179: return sbuff.toString();
180: }
181:
182: /** Turn a plain filename into a "file:" URL */
183: public static String toURL(String filename) {
184: if (filename.length() > 5
185: && filename.substring(0, 5).equalsIgnoreCase("file:"))
186: return filename;
187:
188: /**
189: * Convert a File, note java.net.URI appears to do the right thing.
190: * viz:
191: * Convert to absolute path.
192: * Convert all % to %25.
193: * then convert all ' ' to %20.
194: * It quite probably does more e.g. ? #
195: * But has bug in only having one / not three at beginning
196:
197: */
198: return "file://"
199: + new File(filename).toURI().toString().substring(5);
200: }
201:
202: /**
203: *
204: * @deprecated Broken: use toURL()
205: */
206: public static String encodeFileName(String s) {
207: int len = s.length();
208: StringBuffer sbuff = new StringBuffer(len);
209:
210: // Convert a few charcaters that occur in filenames into a safe form.
211: for (int i = 0; i < len; i++) {
212: char c = s.charAt(i);
213: switch (c) {
214: case ' ':
215: case '~':
216: sbuff.append('%');
217: sbuff.append(Integer.toHexString(c).toUpperCase());
218: break;
219: default:
220: sbuff.append(c);
221: }
222: }
223: return sbuff.toString();
224: }
225:
226: /** Check whether 'name' is possibly a file reference
227: *
228: * @param name
229: * @return boolean False if clearly not a filename.
230: */
231: public static boolean isFile(String name) {
232: String scheme = getScheme(name);
233:
234: if (scheme == null)
235: // No URI scheme - treat as filename
236: return true;
237:
238: if (scheme.equals("file"))
239: // file: URI scheme
240: return true;
241:
242: // Windows: "c:" etc
243: if (scheme.length() == 1)
244: // file: URI scheme
245: return true;
246:
247: return false;
248: }
249:
250: /** Check whether a name is an absolute URI (has a scheme name)
251: *
252: * @param name
253: * @return boolean True if there is a scheme name
254: */
255: public static boolean isURI(String name) {
256: return (getScheme(name) != null);
257: }
258:
259: public static String getScheme(String uri) {
260: // Find "[^/:]*:.*"
261: for (int i = 0; i < uri.length(); i++) {
262: char ch = uri.charAt(i);
263: if (ch == ':')
264: return uri.substring(0, i);
265: if (!isASCIILetter(ch))
266: // Some illegal character before the ':'
267: break;
268: }
269: return null;
270: }
271:
272: private static boolean isASCIILetter(char ch) {
273: return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
274: }
275:
276: /**
277: * Get the directory part of a filename
278: * @param filename
279: * @return Directory name
280: */
281: public static String getDirname(String filename) {
282: File f = new File(filename);
283: return f.getParent();
284: }
285:
286: /** Get the basename of a filename
287: *
288: * @param filename
289: * @return Base filename.
290: */
291: public static String getBasename(String filename) {
292: File f = new File(filename);
293: return f.getName();
294: }
295:
296: /**
297: Get the suffix part of a file name or a URL in file-like format.
298: */
299: public static String getFilenameExt(String filename) {
300: int iSlash = filename.lastIndexOf('/');
301: int iBack = filename.lastIndexOf('\\');
302: int iExt = filename.lastIndexOf('.');
303: if (iBack > iSlash)
304: iSlash = iBack;
305: return iExt > iSlash ? filename.substring(iExt + 1)
306: .toLowerCase() : "";
307: }
308:
309: /**
310: create a temporary file that will be deleted on exit, and do something
311: sensible with any IO exceptions - namely, throw them up wrapped in
312: a JenaException.
313:
314: @param prefix the prefix for File.createTempFile
315: @param suffix the suffix for File.createTempFile
316: @return the temporary File
317: */
318: public static File tempFileName(String prefix, String suffix) {
319: File result = new File(getTempDirectory(), prefix
320: + randomNumber() + suffix);
321: if (result.exists())
322: return tempFileName(prefix, suffix);
323: result.deleteOnExit();
324: return result;
325: }
326:
327: /**
328: Answer a File naming a freshly-created directory in the temporary directory. This
329: directory should be deleted on exit.
330: TODO handle threading issues, mkdir failure, and better cleanup
331:
332: @param prefix the prefix for the directory name
333: @return a File naming the new directory
334: */
335: public static File getScratchDirectory(String prefix) {
336: File result = new File(getTempDirectory(), prefix
337: + randomNumber());
338: if (result.exists())
339: return getScratchDirectory(prefix);
340: if (result.mkdir() == false)
341: throw new JenaException("mkdir failed on " + result);
342: result.deleteOnExit();
343: return result;
344: }
345:
346: public static String getTempDirectory() {
347: return JenaRuntime.getSystemProperty("java.io.tmpdir");
348: }
349:
350: private static int counter = 0;
351:
352: private static int randomNumber() {
353: return ++counter;
354: }
355:
356: // TODO Replace with a FileManager
357: /**
358: Answer a BufferedReader than reads from the named resource file as
359: UTF-8, possibly throwing WrappedIOExceptions.
360: */
361: public static BufferedReader openResourceFile(String filename) {
362: try {
363: InputStream is = FileUtils
364: .openResourceFileAsStream(filename);
365: return new BufferedReader(
366: new InputStreamReader(is, "UTF-8"));
367: } catch (IOException e) {
368: throw new WrappedIOException(e);
369: }
370: }
371:
372: /**
373: * Open an resource file for reading.
374: */
375: public static InputStream openResourceFileAsStream(String filename)
376: throws FileNotFoundException {
377: InputStream is = ClassLoader
378: .getSystemResourceAsStream(filename);
379: if (is == null) {
380: // Try local loader with absolute path
381: is = FileUtils.class.getResourceAsStream("/" + filename);
382: if (is == null) {
383: // Try local loader, relative, just in case
384: is = FileUtils.class.getResourceAsStream(filename);
385: if (is == null) {
386: // Can't find it on classpath, so try relative to current directory
387: // Will throw security exception under and applet but there's not other choice left
388: is = new FileInputStream(filename);
389: }
390: }
391: }
392: return is;
393: }
394:
395: // TODO Replace with FileManager
396: public static BufferedReader readerFromURL(String urlStr) {
397: try {
398: return asBufferedUTF8(new URL(urlStr).openStream());
399: } catch (java.net.MalformedURLException e) { // Try as a plain filename.
400: try {
401: return asBufferedUTF8(new FileInputStream(urlStr));
402: } catch (FileNotFoundException f) {
403: throw new WrappedIOException(f);
404: }
405: } catch (IOException e) {
406: throw new WrappedIOException(e);
407: }
408: }
409:
410: /** Read a whole file as UTF-8
411: * @param filename
412: * @return String
413: * @throws IOException
414: */
415:
416: public static String readWholeFileAsUTF8(String filename)
417: throws IOException {
418: InputStream in = new FileInputStream(filename);
419: return readWholeFileAsUTF8(in);
420: }
421:
422: /** Read a whole stream as UTF-8
423: *
424: * @param in InputStream to be read
425: * @return String
426: * @throws IOException
427: */
428: public static String readWholeFileAsUTF8(InputStream in)
429: throws IOException {
430: Reader r = new BufferedReader(asUTF8(in), 1024);
431: return readWholeFileAsUTF8(r);
432: }
433:
434: /** Read a whole file as UTF-8
435: *
436: * @param r
437: * @return String The whole file
438: * @throws IOException
439: */
440:
441: // Private worker as we are trying to force UTF-8.
442: private static String readWholeFileAsUTF8(Reader r)
443: throws IOException {
444: StringWriter sw = new StringWriter(1024);
445: char buff[] = new char[1024];
446: while (r.ready()) {
447: int l = r.read(buff);
448: if (l <= 0)
449: break;
450: sw.write(buff, 0, l);
451: }
452: r.close();
453: sw.close();
454: return sw.toString();
455: }
456:
457: }
458:
459: /*
460: * (c) Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
461: * All rights reserved.
462: *
463: * Redistribution and use in source and binary forms, with or without
464: * modification, are permitted provided that the following conditions
465: * are met:
466: * 1. Redistributions of source code must retain the above copyright
467: * notice, this list of conditions and the following disclaimer.
468: * 2. Redistributions in binary form must reproduce the above copyright
469: * notice, this list of conditions and the following disclaimer in the
470: * documentation and/or other materials provided with the distribution.
471: * 3. The name of the author may not be used to endorse or promote products
472: * derived from this software without specific prior written permission.
473: *
474: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
475: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
476: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
477: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
478: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
479: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
480: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
481: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
482: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
483: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
484: */
|