001: /* FileUtils
002: *
003: * $Id: FileUtils.java 4936 2007-02-23 02:19:00Z gojomo $
004: *
005: * Created on Feb 2, 2004
006: *
007: * Copyright (C) 2004 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.util;
026:
027: import java.io.BufferedReader;
028: import java.io.File;
029: import java.io.FileFilter;
030: import java.io.FileInputStream;
031: import java.io.FileNotFoundException;
032: import java.io.FileOutputStream;
033: import java.io.FilenameFilter;
034: import java.io.IOException;
035: import java.io.InputStreamReader;
036: import java.nio.channels.FileChannel;
037: import java.util.Arrays;
038: import java.util.HashSet;
039: import java.util.Iterator;
040: import java.util.List;
041: import java.util.Set;
042: import java.util.logging.Level;
043: import java.util.logging.Logger;
044: import java.util.regex.Pattern;
045:
046: /** Utility methods for manipulating files and directories.
047: *
048: * @author John Erik Halse
049: */
050: public class FileUtils {
051: private static final Logger LOGGER = Logger
052: .getLogger(FileUtils.class.getName());
053:
054: public static final File TMPDIR = new File(System.getProperty(
055: "java.io.tmpdir", "/tmp"));
056:
057: private static final boolean DEFAULT_OVERWRITE = true;
058:
059: /**
060: * Constructor made private because all methods of this class are static.
061: */
062: private FileUtils() {
063: super ();
064: }
065:
066: public static int copyFiles(final File srcDir, Set srcFile,
067: final File dest) throws IOException {
068: int count = 0;
069: for (Iterator i = srcFile.iterator(); i.hasNext();) {
070: String name = (String) i.next();
071: File src = new File(srcDir, name);
072: File tgt = new File(dest, name);
073: if (LOGGER.isLoggable(Level.FINE)) {
074: LOGGER.fine("Before " + src.getAbsolutePath() + " "
075: + src.exists() + ", " + tgt.getAbsolutePath()
076: + " " + tgt.exists());
077: }
078: copyFiles(src, tgt);
079: if (LOGGER.isLoggable(Level.FINE)) {
080: LOGGER.fine("After " + src.getAbsolutePath() + " "
081: + src.exists() + ", " + tgt.getAbsolutePath()
082: + " " + tgt.exists());
083: }
084: count++;
085: }
086: return count;
087: }
088:
089: /** Recursively copy all files from one directory to another.
090: *
091: * @param src file or directory to copy from.
092: * @param dest file or directory to copy to.
093: * @throws IOException
094: */
095: public static void copyFiles(File src, File dest)
096: throws IOException {
097: copyFiles(src, null, dest, false, true);
098: }
099:
100: /**
101: * @param src Directory of files to fetch.
102: * @param filter Filter to apply to filenames.
103: * @return Files in directory sorted.
104: */
105: public static String[] getSortedDirContent(final File src,
106: final FilenameFilter filter) {
107: if (!src.exists()) {
108: if (LOGGER.isLoggable(Level.FINE)) {
109: LOGGER.fine(src.getAbsolutePath() + " does not exist");
110: }
111: return null;
112: }
113:
114: if (!src.isDirectory()) {
115: if (LOGGER.isLoggable(Level.FINE)) {
116: LOGGER.fine(src.getAbsolutePath()
117: + " is not directory.");
118: }
119: return null;
120: }
121: // Go through the contents of the directory
122: String[] list = (filter == null) ? src.list() : src
123: .list(filter);
124: if (list != null) {
125: Arrays.sort(list);
126: }
127: return list;
128: }
129:
130: /**
131: * Recursively copy all files from one directory to another.
132: *
133: * @param src File or directory to copy from.
134: * @param filter Filename filter to apply to src. May be null if no
135: * filtering wanted.
136: * @param dest File or directory to copy to.
137: * @param inSortedOrder Copy in order of natural sort.
138: * @param overwrite If target file already exits, and this parameter is
139: * true, overwrite target file (We do this by first deleting the target
140: * file before we begin the copy).
141: * @throws IOException
142: */
143: public static void copyFiles(final File src,
144: final FilenameFilter filter, final File dest,
145: final boolean inSortedOrder, final boolean overwrite)
146: throws IOException {
147: // TODO: handle failures at any step
148: if (!src.exists()) {
149: if (LOGGER.isLoggable(Level.FINE)) {
150: LOGGER.fine(src.getAbsolutePath() + " does not exist");
151: }
152: return;
153: }
154:
155: if (src.isDirectory()) {
156: if (LOGGER.isLoggable(Level.FINE)) {
157: LOGGER.fine(src.getAbsolutePath() + " is a directory.");
158: }
159: // Create destination directory
160: if (!dest.exists()) {
161: dest.mkdirs();
162: }
163: // Go through the contents of the directory
164: String list[] = (filter == null) ? src.list() : src
165: .list(filter);
166: if (inSortedOrder) {
167: Arrays.sort(list);
168: }
169: for (int i = 0; i < list.length; i++) {
170: copyFiles(new File(src, list[i]), filter, new File(
171: dest, list[i]), inSortedOrder, overwrite);
172: }
173: } else {
174: copyFile(src, dest, overwrite);
175: }
176: }
177:
178: /**
179: * Copy the src file to the destination.
180: *
181: * @param src
182: * @param dest
183: * @return True if the extent was greater than actual bytes copied.
184: * @throws FileNotFoundException
185: * @throws IOException
186: */
187: public static boolean copyFile(final File src, final File dest)
188: throws FileNotFoundException, IOException {
189: return copyFile(src, dest, -1, DEFAULT_OVERWRITE);
190: }
191:
192: /**
193: * Copy the src file to the destination.
194: *
195: * @param src
196: * @param dest
197: * @param overwrite If target file already exits, and this parameter is
198: * true, overwrite target file (We do this by first deleting the target
199: * file before we begin the copy).
200: * @return True if the extent was greater than actual bytes copied.
201: * @throws FileNotFoundException
202: * @throws IOException
203: */
204: public static boolean copyFile(final File src, final File dest,
205: final boolean overwrite) throws FileNotFoundException,
206: IOException {
207: return copyFile(src, dest, -1, overwrite);
208: }
209:
210: /**
211: * Copy up to extent bytes of the source file to the destination
212: *
213: * @param src
214: * @param dest
215: * @param extent Maximum number of bytes to copy
216: * @return True if the extent was greater than actual bytes copied.
217: * @throws FileNotFoundException
218: * @throws IOException
219: */
220: public static boolean copyFile(final File src, final File dest,
221: long extent) throws FileNotFoundException, IOException {
222: return copyFile(src, dest, extent, DEFAULT_OVERWRITE);
223: }
224:
225: /**
226: * Copy up to extent bytes of the source file to the destination
227: *
228: * @param src
229: * @param dest
230: * @param extent Maximum number of bytes to copy
231: * @param overwrite If target file already exits, and this parameter is
232: * true, overwrite target file (We do this by first deleting the target
233: * file before we begin the copy).
234: * @return True if the extent was greater than actual bytes copied.
235: * @throws FileNotFoundException
236: * @throws IOException
237: */
238: public static boolean copyFile(final File src, final File dest,
239: long extent, final boolean overwrite)
240: throws FileNotFoundException, IOException {
241: boolean result = false;
242: if (LOGGER.isLoggable(Level.FINE)) {
243: LOGGER.fine("Copying file " + src + " to " + dest
244: + " extent " + extent + " exists " + dest.exists());
245: }
246: if (dest.exists()) {
247: if (overwrite) {
248: dest.delete();
249: LOGGER.finer(dest.getAbsolutePath()
250: + " removed before copy.");
251: } else {
252: // Already in place and we're not to overwrite. Return.
253: return result;
254: }
255: }
256: FileInputStream fis = null;
257: FileOutputStream fos = null;
258: FileChannel fcin = null;
259: FileChannel fcout = null;
260: try {
261: // Get channels
262: fis = new FileInputStream(src);
263: fos = new FileOutputStream(dest);
264: fcin = fis.getChannel();
265: fcout = fos.getChannel();
266: if (extent < 0) {
267: extent = fcin.size();
268: }
269:
270: // Do the file copy
271: long trans = fcin.transferTo(0, extent, fcout);
272: if (trans < extent) {
273: result = false;
274: }
275: result = true;
276: } catch (IOException e) {
277: // Add more info to the exception. Preserve old stacktrace.
278: // We get 'Invalid argument' on some file copies. See
279: // http://intellij.net/forums/thread.jsp?forum=13&thread=63027&message=853123
280: // for related issue.
281: String message = "Copying " + src.getAbsolutePath()
282: + " to " + dest.getAbsolutePath() + " with extent "
283: + extent + " got IOE: " + e.getMessage();
284: if (e.getMessage().equals("Invalid argument")) {
285: LOGGER.severe("Failed copy, trying workaround: "
286: + message);
287: workaroundCopyFile(src, dest);
288: } else {
289: IOException newE = new IOException(message);
290: newE.setStackTrace(e.getStackTrace());
291: throw newE;
292: }
293: } finally {
294: // finish up
295: if (fcin != null) {
296: fcin.close();
297: }
298: if (fcout != null) {
299: fcout.close();
300: }
301: if (fis != null) {
302: fis.close();
303: }
304: if (fos != null) {
305: fos.close();
306: }
307: }
308: return result;
309: }
310:
311: protected static void workaroundCopyFile(final File src,
312: final File dest) throws IOException {
313: FileInputStream from = null;
314: FileOutputStream to = null;
315: try {
316: from = new FileInputStream(src);
317: to = new FileOutputStream(dest);
318: byte[] buffer = new byte[4096];
319: int bytesRead;
320: while ((bytesRead = from.read(buffer)) != -1) {
321: to.write(buffer, 0, bytesRead);
322: }
323: } finally {
324: if (from != null) {
325: try {
326: from.close();
327: } catch (IOException e) {
328: e.printStackTrace();
329: }
330: }
331: if (to != null) {
332: try {
333: to.close();
334: } catch (IOException e) {
335: e.printStackTrace();
336: }
337: }
338: }
339: }
340:
341: /** Deletes all files and subdirectories under dir.
342: * @param dir
343: * @return true if all deletions were successful. If a deletion fails, the
344: * method stops attempting to delete and returns false.
345: */
346: public static boolean deleteDir(File dir) {
347: if (dir.isDirectory()) {
348: String[] children = dir.list();
349: for (int i = 0; i < children.length; i++) {
350: boolean success = deleteDir(new File(dir, children[i]));
351: if (!success) {
352: return false;
353: }
354: }
355: }
356: // The directory is now empty so delete it
357: return dir.delete();
358: }
359:
360: /**
361: * Utility method to read an entire file as a String.
362: *
363: * @param file
364: * @return File as String.
365: * @throws IOException
366: */
367: public static String readFileAsString(File file) throws IOException {
368: StringBuffer sb = new StringBuffer((int) file.length());
369: String line;
370: BufferedReader br = new BufferedReader(new InputStreamReader(
371: new FileInputStream(file)));
372: try {
373: line = br.readLine();
374: while (line != null) {
375: sb.append(line);
376: sb.append("\n");
377: line = br.readLine();
378: }
379: } finally {
380: br.close();
381: }
382: return sb.toString();
383: }
384:
385: /**
386: * Get a list of all files in directory that have passed prefix.
387: *
388: * @param dir Dir to look in.
389: * @param prefix Basename of files to look for. Compare is case insensitive.
390: *
391: * @return List of files in dir that start w/ passed basename.
392: */
393: public static File[] getFilesWithPrefix(File dir,
394: final String prefix) {
395: FileFilter prefixFilter = new FileFilter() {
396: public boolean accept(File pathname) {
397: return pathname.getName().toLowerCase().startsWith(
398: prefix.toLowerCase());
399: }
400: };
401: return dir.listFiles(prefixFilter);
402: }
403:
404: /** Get a @link java.io.FileFilter that filters files based on a regular
405: * expression.
406: *
407: * @param regexp the regular expression the files must match.
408: * @return the newly created filter.
409: */
410: public static FileFilter getRegexpFileFilter(String regexp) {
411: // Inner class defining the RegexpFileFilter
412: class RegexpFileFilter implements FileFilter {
413: Pattern pattern;
414:
415: protected RegexpFileFilter(String re) {
416: pattern = Pattern.compile(re);
417: }
418:
419: public boolean accept(File pathname) {
420: return pattern.matcher(pathname.getName()).matches();
421: }
422: }
423:
424: return new RegexpFileFilter(regexp);
425: }
426:
427: /**
428: * Use for case where files are being added to src. Will break off copy
429: * when tgt is same as src.
430: * @param src Source directory to copy from.
431: * @param tgt Target to copy to.
432: * @param filter Filter to apply to files to copy.
433: * @throws IOException
434: */
435: public static void syncDirectories(final File src,
436: final FilenameFilter filter, final File tgt)
437: throws IOException {
438: Set<String> srcFilenames = null;
439: do {
440: srcFilenames = new HashSet<String>(Arrays.asList(src
441: .list(filter)));
442: List<String> tgtFilenames = Arrays.asList(tgt.list(filter));
443: srcFilenames.removeAll(tgtFilenames);
444: if (srcFilenames.size() > 0) {
445: int count = FileUtils.copyFiles(src, srcFilenames, tgt);
446: if (LOGGER.isLoggable(Level.FINE)) {
447: LOGGER.fine("Copied " + count);
448: }
449: }
450: } while (srcFilenames != null && srcFilenames.size() > 0);
451: }
452:
453: /**
454: * Test file exists and is readable.
455: * @param f File to test.
456: * @exception IOException If file does not exist or is not unreadable.
457: */
458: public static File isReadable(final File f) throws IOException {
459: if (!f.exists()) {
460: throw new FileNotFoundException(f.getAbsolutePath()
461: + " does not exist.");
462: }
463:
464: if (!f.canRead()) {
465: throw new FileNotFoundException(f.getAbsolutePath()
466: + " is not readable.");
467: }
468:
469: return f;
470: }
471:
472: /**
473: * @param f File to test.
474: * @return True if file is readable, has uncompressed extension,
475: * and magic string at file start.
476: * @exception IOException If file does not exist or is not readable.
477: */
478: public static boolean isReadableWithExtensionAndMagic(final File f,
479: final String uncompressedExtension, final String magic)
480: throws IOException {
481: boolean result = false;
482: FileUtils.isReadable(f);
483: if (f.getName().toLowerCase().endsWith(uncompressedExtension)) {
484: FileInputStream fis = new FileInputStream(f);
485: try {
486: byte[] b = new byte[magic.length()];
487: int read = fis.read(b, 0, magic.length());
488: fis.close();
489: if (read == magic.length()) {
490: StringBuffer beginStr = new StringBuffer(magic
491: .length());
492: for (int i = 0; i < magic.length(); i++) {
493: beginStr.append((char) b[i]);
494: }
495:
496: if (beginStr.toString().equalsIgnoreCase(magic)) {
497: result = true;
498: }
499: }
500: } finally {
501: fis.close();
502: }
503: }
504:
505: return result;
506: }
507:
508: /**
509: * Turn path into a File, relative to context (which may be ignored
510: * if path is absolute).
511: *
512: * @param context File context if path is relative
513: * @param path String path to make into a File
514: * @return File created
515: */
516: public static File maybeRelative(File context, String path) {
517: File f = new File(path);
518: if (f.isAbsolute()) {
519: return f;
520: }
521: return new File(context, path);
522: }
523: }
|