0001: /*
0002: * Licensed to the Apache Software Foundation (ASF) under one or more
0003: * contributor license agreements. See the NOTICE file distributed with
0004: * this work for additional information regarding copyright ownership.
0005: * The ASF licenses this file to You under the Apache License, Version 2.0
0006: * (the "License"); you may not use this file except in compliance with
0007: * the License. You may obtain a copy of the License at
0008: *
0009: * http://www.apache.org/licenses/LICENSE-2.0
0010: *
0011: * Unless required by applicable law or agreed to in writing, software
0012: * distributed under the License is distributed on an "AS IS" BASIS,
0013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014: * See the License for the specific language governing permissions and
0015: * limitations under the License.
0016: */
0017: package org.apache.commons.io;
0018:
0019: import java.io.File;
0020: import java.util.ArrayList;
0021: import java.util.Collection;
0022: import java.util.Iterator;
0023: import java.util.Stack;
0024:
0025: /**
0026: * General filename and filepath manipulation utilities.
0027: * <p>
0028: * When dealing with filenames you can hit problems when moving from a Windows
0029: * based development machine to a Unix based production machine.
0030: * This class aims to help avoid those problems.
0031: * <p>
0032: * <b>NOTE</b>: You may be able to avoid using this class entirely simply by
0033: * using JDK {@link java.io.File File} objects and the two argument constructor
0034: * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}.
0035: * <p>
0036: * Most methods on this class are designed to work the same on both Unix and Windows.
0037: * Those that don't include 'System', 'Unix' or 'Windows' in their name.
0038: * <p>
0039: * Most methods recognise both separators (forward and back), and both
0040: * sets of prefixes. See the javadoc of each method for details.
0041: * <p>
0042: * This class defines six components within a filename
0043: * (example C:\dev\project\file.txt):
0044: * <ul>
0045: * <li>the prefix - C:\</li>
0046: * <li>the path - dev\project\</li>
0047: * <li>the full path - C:\dev\project\</li>
0048: * <li>the name - file.txt</li>
0049: * <li>the base name - file</li>
0050: * <li>the extension - txt</li>
0051: * </ul>
0052: * Note that this class works best if directory filenames end with a separator.
0053: * If you omit the last separator, it is impossible to determine if the filename
0054: * corresponds to a file or a directory. As a result, we have chosen to say
0055: * it corresponds to a file.
0056: * <p>
0057: * This class only supports Unix and Windows style names.
0058: * Prefixes are matched as follows:
0059: * <pre>
0060: * Windows:
0061: * a\b\c.txt --> "" --> relative
0062: * \a\b\c.txt --> "\" --> current drive absolute
0063: * C:a\b\c.txt --> "C:" --> drive relative
0064: * C:\a\b\c.txt --> "C:\" --> absolute
0065: * \\server\a\b\c.txt --> "\\server\" --> UNC
0066: *
0067: * Unix:
0068: * a/b/c.txt --> "" --> relative
0069: * /a/b/c.txt --> "/" --> absolute
0070: * ~/a/b/c.txt --> "~/" --> current user
0071: * ~ --> "~/" --> current user (slash added)
0072: * ~user/a/b/c.txt --> "~user/" --> named user
0073: * ~user --> "~user/" --> named user (slash added)
0074: * </pre>
0075: * Both prefix styles are matched always, irrespective of the machine that you are
0076: * currently running on.
0077: * <p>
0078: * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils.
0079: *
0080: * @author <a href="mailto:burton@relativity.yi.org">Kevin A. Burton</A>
0081: * @author <a href="mailto:sanders@apache.org">Scott Sanders</a>
0082: * @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a>
0083: * @author <a href="mailto:Christoph.Reck@dlr.de">Christoph.Reck</a>
0084: * @author <a href="mailto:peter@apache.org">Peter Donald</a>
0085: * @author <a href="mailto:jefft@apache.org">Jeff Turner</a>
0086: * @author Matthew Hawthorne
0087: * @author Martin Cooper
0088: * @author <a href="mailto:jeremias@apache.org">Jeremias Maerki</a>
0089: * @author Stephen Colebourne
0090: * @version $Id: FilenameUtils.java 490424 2006-12-27 01:20:43Z bayard $
0091: * @since Commons IO 1.1
0092: */
0093: public class FilenameUtils {
0094:
0095: /**
0096: * The extension separator character.
0097: */
0098: private static final char EXTENSION_SEPARATOR = '.';
0099:
0100: /**
0101: * The Unix separator character.
0102: */
0103: private static final char UNIX_SEPARATOR = '/';
0104:
0105: /**
0106: * The Windows separator character.
0107: */
0108: private static final char WINDOWS_SEPARATOR = '\\';
0109:
0110: /**
0111: * The system separator character.
0112: */
0113: private static final char SYSTEM_SEPARATOR = File.separatorChar;
0114:
0115: /**
0116: * The separator character that is the opposite of the system separator.
0117: */
0118: private static final char OTHER_SEPARATOR;
0119: static {
0120: if (isSystemWindows()) {
0121: OTHER_SEPARATOR = UNIX_SEPARATOR;
0122: } else {
0123: OTHER_SEPARATOR = WINDOWS_SEPARATOR;
0124: }
0125: }
0126:
0127: /**
0128: * Instances should NOT be constructed in standard programming.
0129: */
0130: public FilenameUtils() {
0131: super ();
0132: }
0133:
0134: //-----------------------------------------------------------------------
0135: /**
0136: * Determines if Windows file system is in use.
0137: *
0138: * @return true if the system is Windows
0139: */
0140: static boolean isSystemWindows() {
0141: return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR;
0142: }
0143:
0144: //-----------------------------------------------------------------------
0145: /**
0146: * Checks if the character is a separator.
0147: *
0148: * @param ch the character to check
0149: * @return true if it is a separator character
0150: */
0151: private static boolean isSeparator(char ch) {
0152: return (ch == UNIX_SEPARATOR) || (ch == WINDOWS_SEPARATOR);
0153: }
0154:
0155: //-----------------------------------------------------------------------
0156: /**
0157: * Normalizes a path, removing double and single dot path steps.
0158: * <p>
0159: * This method normalizes a path to a standard format.
0160: * The input may contain separators in either Unix or Windows format.
0161: * The output will contain separators in the format of the system.
0162: * <p>
0163: * A trailing slash will be retained.
0164: * A double slash will be merged to a single slash (but UNC names are handled).
0165: * A single dot path segment will be removed.
0166: * A double dot will cause that path segment and the one before to be removed.
0167: * If the double dot has no parent path segment to work with, <code>null</code>
0168: * is returned.
0169: * <p>
0170: * The output will be the same on both Unix and Windows except
0171: * for the separator character.
0172: * <pre>
0173: * /foo// --> /foo/
0174: * /foo/./ --> /foo/
0175: * /foo/../bar --> /bar
0176: * /foo/../bar/ --> /bar/
0177: * /foo/../bar/../baz --> /baz
0178: * //foo//./bar --> /foo/bar
0179: * /../ --> null
0180: * ../foo --> null
0181: * foo/bar/.. --> foo/
0182: * foo/../../bar --> null
0183: * foo/../bar --> bar
0184: * //server/foo/../bar --> //server/bar
0185: * //server/../bar --> null
0186: * C:\foo\..\bar --> C:\bar
0187: * C:\..\bar --> null
0188: * ~/foo/../bar/ --> ~/bar/
0189: * ~/../bar --> null
0190: * </pre>
0191: * (Note the file separator returned will be correct for Windows/Unix)
0192: *
0193: * @param filename the filename to normalize, null returns null
0194: * @return the normalized filename, or null if invalid
0195: */
0196: public static String normalize(String filename) {
0197: return doNormalize(filename, true);
0198: }
0199:
0200: //-----------------------------------------------------------------------
0201: /**
0202: * Normalizes a path, removing double and single dot path steps,
0203: * and removing any final directory separator.
0204: * <p>
0205: * This method normalizes a path to a standard format.
0206: * The input may contain separators in either Unix or Windows format.
0207: * The output will contain separators in the format of the system.
0208: * <p>
0209: * A trailing slash will be removed.
0210: * A double slash will be merged to a single slash (but UNC names are handled).
0211: * A single dot path segment will be removed.
0212: * A double dot will cause that path segment and the one before to be removed.
0213: * If the double dot has no parent path segment to work with, <code>null</code>
0214: * is returned.
0215: * <p>
0216: * The output will be the same on both Unix and Windows except
0217: * for the separator character.
0218: * <pre>
0219: * /foo// --> /foo
0220: * /foo/./ --> /foo
0221: * /foo/../bar --> /bar
0222: * /foo/../bar/ --> /bar
0223: * /foo/../bar/../baz --> /baz
0224: * //foo//./bar --> /foo/bar
0225: * /../ --> null
0226: * ../foo --> null
0227: * foo/bar/.. --> foo
0228: * foo/../../bar --> null
0229: * foo/../bar --> bar
0230: * //server/foo/../bar --> //server/bar
0231: * //server/../bar --> null
0232: * C:\foo\..\bar --> C:\bar
0233: * C:\..\bar --> null
0234: * ~/foo/../bar/ --> ~/bar
0235: * ~/../bar --> null
0236: * </pre>
0237: * (Note the file separator returned will be correct for Windows/Unix)
0238: *
0239: * @param filename the filename to normalize, null returns null
0240: * @return the normalized filename, or null if invalid
0241: */
0242: public static String normalizeNoEndSeparator(String filename) {
0243: return doNormalize(filename, false);
0244: }
0245:
0246: /**
0247: * Internal method to perform the normalization.
0248: *
0249: * @param filename the filename
0250: * @param keepSeparator true to keep the final separator
0251: * @return the normalized filename
0252: */
0253: private static String doNormalize(String filename,
0254: boolean keepSeparator) {
0255: if (filename == null) {
0256: return null;
0257: }
0258: int size = filename.length();
0259: if (size == 0) {
0260: return filename;
0261: }
0262: int prefix = getPrefixLength(filename);
0263: if (prefix < 0) {
0264: return null;
0265: }
0266:
0267: char[] array = new char[size + 2]; // +1 for possible extra slash, +2 for arraycopy
0268: filename.getChars(0, filename.length(), array, 0);
0269:
0270: // fix separators throughout
0271: for (int i = 0; i < array.length; i++) {
0272: if (array[i] == OTHER_SEPARATOR) {
0273: array[i] = SYSTEM_SEPARATOR;
0274: }
0275: }
0276:
0277: // add extra separator on the end to simplify code below
0278: boolean lastIsDirectory = true;
0279: if (array[size - 1] != SYSTEM_SEPARATOR) {
0280: array[size++] = SYSTEM_SEPARATOR;
0281: lastIsDirectory = false;
0282: }
0283:
0284: // adjoining slashes
0285: for (int i = prefix + 1; i < size; i++) {
0286: if (array[i] == SYSTEM_SEPARATOR
0287: && array[i - 1] == SYSTEM_SEPARATOR) {
0288: System.arraycopy(array, i, array, i - 1, size - i);
0289: size--;
0290: i--;
0291: }
0292: }
0293:
0294: // dot slash
0295: for (int i = prefix + 1; i < size; i++) {
0296: if (array[i] == SYSTEM_SEPARATOR
0297: && array[i - 1] == '.'
0298: && (i == prefix + 1 || array[i - 2] == SYSTEM_SEPARATOR)) {
0299: if (i == size - 1) {
0300: lastIsDirectory = true;
0301: }
0302: System.arraycopy(array, i + 1, array, i - 1, size - i);
0303: size -= 2;
0304: i--;
0305: }
0306: }
0307:
0308: // double dot slash
0309: outer: for (int i = prefix + 2; i < size; i++) {
0310: if (array[i] == SYSTEM_SEPARATOR
0311: && array[i - 1] == '.'
0312: && array[i - 2] == '.'
0313: && (i == prefix + 2 || array[i - 3] == SYSTEM_SEPARATOR)) {
0314: if (i == prefix + 2) {
0315: return null;
0316: }
0317: if (i == size - 1) {
0318: lastIsDirectory = true;
0319: }
0320: int j;
0321: for (j = i - 4; j >= prefix; j--) {
0322: if (array[j] == SYSTEM_SEPARATOR) {
0323: // remove b/../ from a/b/../c
0324: System.arraycopy(array, i + 1, array, j + 1,
0325: size - i);
0326: size -= (i - j);
0327: i = j + 1;
0328: continue outer;
0329: }
0330: }
0331: // remove a/../ from a/../c
0332: System.arraycopy(array, i + 1, array, prefix, size - i);
0333: size -= (i + 1 - prefix);
0334: i = prefix + 1;
0335: }
0336: }
0337:
0338: if (size <= 0) { // should never be less than 0
0339: return "";
0340: }
0341: if (size <= prefix) { // should never be less than prefix
0342: return new String(array, 0, size);
0343: }
0344: if (lastIsDirectory && keepSeparator) {
0345: return new String(array, 0, size); // keep trailing separator
0346: }
0347: return new String(array, 0, size - 1); // lose trailing separator
0348: }
0349:
0350: //-----------------------------------------------------------------------
0351: /**
0352: * Concatenates a filename to a base path using normal command line style rules.
0353: * <p>
0354: * The effect is equivalent to resultant directory after changing
0355: * directory to the first argument, followed by changing directory to
0356: * the second argument.
0357: * <p>
0358: * The first argument is the base path, the second is the path to concatenate.
0359: * The returned path is always normalized via {@link #normalize(String)},
0360: * thus <code>..</code> is handled.
0361: * <p>
0362: * If <code>pathToAdd</code> is absolute (has an absolute prefix), then
0363: * it will be normalized and returned.
0364: * Otherwise, the paths will be joined, normalized and returned.
0365: * <p>
0366: * The output will be the same on both Unix and Windows except
0367: * for the separator character.
0368: * <pre>
0369: * /foo/ + bar --> /foo/bar
0370: * /foo + bar --> /foo/bar
0371: * /foo + /bar --> /bar
0372: * /foo + C:/bar --> C:/bar
0373: * /foo + C:bar --> C:bar (*)
0374: * /foo/a/ + ../bar --> foo/bar
0375: * /foo/ + ../../bar --> null
0376: * /foo/ + /bar --> /bar
0377: * /foo/.. + /bar --> /bar
0378: * /foo + bar/c.txt --> /foo/bar/c.txt
0379: * /foo/c.txt + bar --> /foo/c.txt/bar (!)
0380: * </pre>
0381: * (*) Note that the Windows relative drive prefix is unreliable when
0382: * used with this method.
0383: * (!) Note that the first parameter must be a path. If it ends with a name, then
0384: * the name will be built into the concatenated path. If this might be a problem,
0385: * use {@link #getFullPath(String)} on the base path argument.
0386: *
0387: * @param basePath the base path to attach to, always treated as a path
0388: * @param fullFilenameToAdd the filename (or path) to attach to the base
0389: * @return the concatenated path, or null if invalid
0390: */
0391: public static String concat(String basePath,
0392: String fullFilenameToAdd) {
0393: int prefix = getPrefixLength(fullFilenameToAdd);
0394: if (prefix < 0) {
0395: return null;
0396: }
0397: if (prefix > 0) {
0398: return normalize(fullFilenameToAdd);
0399: }
0400: if (basePath == null) {
0401: return null;
0402: }
0403: int len = basePath.length();
0404: if (len == 0) {
0405: return normalize(fullFilenameToAdd);
0406: }
0407: char ch = basePath.charAt(len - 1);
0408: if (isSeparator(ch)) {
0409: return normalize(basePath + fullFilenameToAdd);
0410: } else {
0411: return normalize(basePath + '/' + fullFilenameToAdd);
0412: }
0413: }
0414:
0415: //-----------------------------------------------------------------------
0416: /**
0417: * Converts all separators to the Unix separator of forward slash.
0418: *
0419: * @param path the path to be changed, null ignored
0420: * @return the updated path
0421: */
0422: public static String separatorsToUnix(String path) {
0423: if (path == null || path.indexOf(WINDOWS_SEPARATOR) == -1) {
0424: return path;
0425: }
0426: return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR);
0427: }
0428:
0429: /**
0430: * Converts all separators to the Windows separator of backslash.
0431: *
0432: * @param path the path to be changed, null ignored
0433: * @return the updated path
0434: */
0435: public static String separatorsToWindows(String path) {
0436: if (path == null || path.indexOf(UNIX_SEPARATOR) == -1) {
0437: return path;
0438: }
0439: return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR);
0440: }
0441:
0442: /**
0443: * Converts all separators to the system separator.
0444: *
0445: * @param path the path to be changed, null ignored
0446: * @return the updated path
0447: */
0448: public static String separatorsToSystem(String path) {
0449: if (path == null) {
0450: return null;
0451: }
0452: if (isSystemWindows()) {
0453: return separatorsToWindows(path);
0454: } else {
0455: return separatorsToUnix(path);
0456: }
0457: }
0458:
0459: //-----------------------------------------------------------------------
0460: /**
0461: * Returns the length of the filename prefix, such as <code>C:/</code> or <code>~/</code>.
0462: * <p>
0463: * This method will handle a file in either Unix or Windows format.
0464: * <p>
0465: * The prefix length includes the first slash in the full filename
0466: * if applicable. Thus, it is possible that the length returned is greater
0467: * than the length of the input string.
0468: * <pre>
0469: * Windows:
0470: * a\b\c.txt --> "" --> relative
0471: * \a\b\c.txt --> "\" --> current drive absolute
0472: * C:a\b\c.txt --> "C:" --> drive relative
0473: * C:\a\b\c.txt --> "C:\" --> absolute
0474: * \\server\a\b\c.txt --> "\\server\" --> UNC
0475: *
0476: * Unix:
0477: * a/b/c.txt --> "" --> relative
0478: * /a/b/c.txt --> "/" --> absolute
0479: * ~/a/b/c.txt --> "~/" --> current user
0480: * ~ --> "~/" --> current user (slash added)
0481: * ~user/a/b/c.txt --> "~user/" --> named user
0482: * ~user --> "~user/" --> named user (slash added)
0483: * </pre>
0484: * <p>
0485: * The output will be the same irrespective of the machine that the code is running on.
0486: * ie. both Unix and Windows prefixes are matched regardless.
0487: *
0488: * @param filename the filename to find the prefix in, null returns -1
0489: * @return the length of the prefix, -1 if invalid or null
0490: */
0491: public static int getPrefixLength(String filename) {
0492: if (filename == null) {
0493: return -1;
0494: }
0495: int len = filename.length();
0496: if (len == 0) {
0497: return 0;
0498: }
0499: char ch0 = filename.charAt(0);
0500: if (ch0 == ':') {
0501: return -1;
0502: }
0503: if (len == 1) {
0504: if (ch0 == '~') {
0505: return 2; // return a length greater than the input
0506: }
0507: return (isSeparator(ch0) ? 1 : 0);
0508: } else {
0509: if (ch0 == '~') {
0510: int posUnix = filename.indexOf(UNIX_SEPARATOR, 1);
0511: int posWin = filename.indexOf(WINDOWS_SEPARATOR, 1);
0512: if (posUnix == -1 && posWin == -1) {
0513: return len + 1; // return a length greater than the input
0514: }
0515: posUnix = (posUnix == -1 ? posWin : posUnix);
0516: posWin = (posWin == -1 ? posUnix : posWin);
0517: return Math.min(posUnix, posWin) + 1;
0518: }
0519: char ch1 = filename.charAt(1);
0520: if (ch1 == ':') {
0521: ch0 = Character.toUpperCase(ch0);
0522: if (ch0 >= 'A' && ch0 <= 'Z') {
0523: if (len == 2
0524: || isSeparator(filename.charAt(2)) == false) {
0525: return 2;
0526: }
0527: return 3;
0528: }
0529: return -1;
0530:
0531: } else if (isSeparator(ch0) && isSeparator(ch1)) {
0532: int posUnix = filename.indexOf(UNIX_SEPARATOR, 2);
0533: int posWin = filename.indexOf(WINDOWS_SEPARATOR, 2);
0534: if ((posUnix == -1 && posWin == -1) || posUnix == 2
0535: || posWin == 2) {
0536: return -1;
0537: }
0538: posUnix = (posUnix == -1 ? posWin : posUnix);
0539: posWin = (posWin == -1 ? posUnix : posWin);
0540: return Math.min(posUnix, posWin) + 1;
0541: } else {
0542: return (isSeparator(ch0) ? 1 : 0);
0543: }
0544: }
0545: }
0546:
0547: /**
0548: * Returns the index of the last directory separator character.
0549: * <p>
0550: * This method will handle a file in either Unix or Windows format.
0551: * The position of the last forward or backslash is returned.
0552: * <p>
0553: * The output will be the same irrespective of the machine that the code is running on.
0554: *
0555: * @param filename the filename to find the last path separator in, null returns -1
0556: * @return the index of the last separator character, or -1 if there
0557: * is no such character
0558: */
0559: public static int indexOfLastSeparator(String filename) {
0560: if (filename == null) {
0561: return -1;
0562: }
0563: int lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR);
0564: int lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR);
0565: return Math.max(lastUnixPos, lastWindowsPos);
0566: }
0567:
0568: /**
0569: * Returns the index of the last extension separator character, which is a dot.
0570: * <p>
0571: * This method also checks that there is no directory separator after the last dot.
0572: * To do this it uses {@link #indexOfLastSeparator(String)} which will
0573: * handle a file in either Unix or Windows format.
0574: * <p>
0575: * The output will be the same irrespective of the machine that the code is running on.
0576: *
0577: * @param filename the filename to find the last path separator in, null returns -1
0578: * @return the index of the last separator character, or -1 if there
0579: * is no such character
0580: */
0581: public static int indexOfExtension(String filename) {
0582: if (filename == null) {
0583: return -1;
0584: }
0585: int extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR);
0586: int lastSeparator = indexOfLastSeparator(filename);
0587: return (lastSeparator > extensionPos ? -1 : extensionPos);
0588: }
0589:
0590: //-----------------------------------------------------------------------
0591: /**
0592: * Gets the prefix from a full filename, such as <code>C:/</code>
0593: * or <code>~/</code>.
0594: * <p>
0595: * This method will handle a file in either Unix or Windows format.
0596: * The prefix includes the first slash in the full filename where applicable.
0597: * <pre>
0598: * Windows:
0599: * a\b\c.txt --> "" --> relative
0600: * \a\b\c.txt --> "\" --> current drive absolute
0601: * C:a\b\c.txt --> "C:" --> drive relative
0602: * C:\a\b\c.txt --> "C:\" --> absolute
0603: * \\server\a\b\c.txt --> "\\server\" --> UNC
0604: *
0605: * Unix:
0606: * a/b/c.txt --> "" --> relative
0607: * /a/b/c.txt --> "/" --> absolute
0608: * ~/a/b/c.txt --> "~/" --> current user
0609: * ~ --> "~/" --> current user (slash added)
0610: * ~user/a/b/c.txt --> "~user/" --> named user
0611: * ~user --> "~user/" --> named user (slash added)
0612: * </pre>
0613: * <p>
0614: * The output will be the same irrespective of the machine that the code is running on.
0615: * ie. both Unix and Windows prefixes are matched regardless.
0616: *
0617: * @param filename the filename to query, null returns null
0618: * @return the prefix of the file, null if invalid
0619: */
0620: public static String getPrefix(String filename) {
0621: if (filename == null) {
0622: return null;
0623: }
0624: int len = getPrefixLength(filename);
0625: if (len < 0) {
0626: return null;
0627: }
0628: if (len > filename.length()) {
0629: return filename + UNIX_SEPARATOR; // we know this only happens for unix
0630: }
0631: return filename.substring(0, len);
0632: }
0633:
0634: /**
0635: * Gets the path from a full filename, which excludes the prefix.
0636: * <p>
0637: * This method will handle a file in either Unix or Windows format.
0638: * The method is entirely text based, and returns the text before and
0639: * including the last forward or backslash.
0640: * <pre>
0641: * C:\a\b\c.txt --> a\b\
0642: * ~/a/b/c.txt --> a/b/
0643: * a.txt --> ""
0644: * a/b/c --> a/b/
0645: * a/b/c/ --> a/b/c/
0646: * </pre>
0647: * <p>
0648: * The output will be the same irrespective of the machine that the code is running on.
0649: * <p>
0650: * This method drops the prefix from the result.
0651: * See {@link #getFullPath(String)} for the method that retains the prefix.
0652: *
0653: * @param filename the filename to query, null returns null
0654: * @return the path of the file, an empty string if none exists, null if invalid
0655: */
0656: public static String getPath(String filename) {
0657: return doGetPath(filename, 1);
0658: }
0659:
0660: /**
0661: * Gets the path from a full filename, which excludes the prefix, and
0662: * also excluding the final directory separator.
0663: * <p>
0664: * This method will handle a file in either Unix or Windows format.
0665: * The method is entirely text based, and returns the text before the
0666: * last forward or backslash.
0667: * <pre>
0668: * C:\a\b\c.txt --> a\b
0669: * ~/a/b/c.txt --> a/b
0670: * a.txt --> ""
0671: * a/b/c --> a/b
0672: * a/b/c/ --> a/b/c
0673: * </pre>
0674: * <p>
0675: * The output will be the same irrespective of the machine that the code is running on.
0676: * <p>
0677: * This method drops the prefix from the result.
0678: * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
0679: *
0680: * @param filename the filename to query, null returns null
0681: * @return the path of the file, an empty string if none exists, null if invalid
0682: */
0683: public static String getPathNoEndSeparator(String filename) {
0684: return doGetPath(filename, 0);
0685: }
0686:
0687: /**
0688: * Does the work of getting the path.
0689: *
0690: * @param filename the filename
0691: * @param separatorAdd 0 to omit the end separator, 1 to return it
0692: * @return the path
0693: */
0694: private static String doGetPath(String filename, int separatorAdd) {
0695: if (filename == null) {
0696: return null;
0697: }
0698: int prefix = getPrefixLength(filename);
0699: if (prefix < 0) {
0700: return null;
0701: }
0702: int index = indexOfLastSeparator(filename);
0703: if (prefix >= filename.length() || index < 0) {
0704: return "";
0705: }
0706: return filename.substring(prefix, index + separatorAdd);
0707: }
0708:
0709: /**
0710: * Gets the full path from a full filename, which is the prefix + path.
0711: * <p>
0712: * This method will handle a file in either Unix or Windows format.
0713: * The method is entirely text based, and returns the text before and
0714: * including the last forward or backslash.
0715: * <pre>
0716: * C:\a\b\c.txt --> C:\a\b\
0717: * ~/a/b/c.txt --> ~/a/b/
0718: * a.txt --> ""
0719: * a/b/c --> a/b/
0720: * a/b/c/ --> a/b/c/
0721: * C: --> C:
0722: * C:\ --> C:\
0723: * ~ --> ~/
0724: * ~/ --> ~/
0725: * ~user --> ~user/
0726: * ~user/ --> ~user/
0727: * </pre>
0728: * <p>
0729: * The output will be the same irrespective of the machine that the code is running on.
0730: *
0731: * @param filename the filename to query, null returns null
0732: * @return the path of the file, an empty string if none exists, null if invalid
0733: */
0734: public static String getFullPath(String filename) {
0735: return doGetFullPath(filename, true);
0736: }
0737:
0738: /**
0739: * Gets the full path from a full filename, which is the prefix + path,
0740: * and also excluding the final directory separator.
0741: * <p>
0742: * This method will handle a file in either Unix or Windows format.
0743: * The method is entirely text based, and returns the text before the
0744: * last forward or backslash.
0745: * <pre>
0746: * C:\a\b\c.txt --> C:\a\b
0747: * ~/a/b/c.txt --> ~/a/b
0748: * a.txt --> ""
0749: * a/b/c --> a/b
0750: * a/b/c/ --> a/b/c
0751: * C: --> C:
0752: * C:\ --> C:\
0753: * ~ --> ~
0754: * ~/ --> ~
0755: * ~user --> ~user
0756: * ~user/ --> ~user
0757: * </pre>
0758: * <p>
0759: * The output will be the same irrespective of the machine that the code is running on.
0760: *
0761: * @param filename the filename to query, null returns null
0762: * @return the path of the file, an empty string if none exists, null if invalid
0763: */
0764: public static String getFullPathNoEndSeparator(String filename) {
0765: return doGetFullPath(filename, false);
0766: }
0767:
0768: /**
0769: * Does the work of getting the path.
0770: *
0771: * @param filename the filename
0772: * @param includeSeparator true to include the end separator
0773: * @return the path
0774: */
0775: private static String doGetFullPath(String filename,
0776: boolean includeSeparator) {
0777: if (filename == null) {
0778: return null;
0779: }
0780: int prefix = getPrefixLength(filename);
0781: if (prefix < 0) {
0782: return null;
0783: }
0784: if (prefix >= filename.length()) {
0785: if (includeSeparator) {
0786: return getPrefix(filename); // add end slash if necessary
0787: } else {
0788: return filename;
0789: }
0790: }
0791: int index = indexOfLastSeparator(filename);
0792: if (index < 0) {
0793: return filename.substring(0, prefix);
0794: }
0795: int end = index + (includeSeparator ? 1 : 0);
0796: return filename.substring(0, end);
0797: }
0798:
0799: /**
0800: * Gets the name minus the path from a full filename.
0801: * <p>
0802: * This method will handle a file in either Unix or Windows format.
0803: * The text after the last forward or backslash is returned.
0804: * <pre>
0805: * a/b/c.txt --> c.txt
0806: * a.txt --> a.txt
0807: * a/b/c --> c
0808: * a/b/c/ --> ""
0809: * </pre>
0810: * <p>
0811: * The output will be the same irrespective of the machine that the code is running on.
0812: *
0813: * @param filename the filename to query, null returns null
0814: * @return the name of the file without the path, or an empty string if none exists
0815: */
0816: public static String getName(String filename) {
0817: if (filename == null) {
0818: return null;
0819: }
0820: int index = indexOfLastSeparator(filename);
0821: return filename.substring(index + 1);
0822: }
0823:
0824: /**
0825: * Gets the base name, minus the full path and extension, from a full filename.
0826: * <p>
0827: * This method will handle a file in either Unix or Windows format.
0828: * The text after the last forward or backslash and before the last dot is returned.
0829: * <pre>
0830: * a/b/c.txt --> c
0831: * a.txt --> a
0832: * a/b/c --> c
0833: * a/b/c/ --> ""
0834: * </pre>
0835: * <p>
0836: * The output will be the same irrespective of the machine that the code is running on.
0837: *
0838: * @param filename the filename to query, null returns null
0839: * @return the name of the file without the path, or an empty string if none exists
0840: */
0841: public static String getBaseName(String filename) {
0842: return removeExtension(getName(filename));
0843: }
0844:
0845: /**
0846: * Gets the extension of a filename.
0847: * <p>
0848: * This method returns the textual part of the filename after the last dot.
0849: * There must be no directory separator after the dot.
0850: * <pre>
0851: * foo.txt --> "txt"
0852: * a/b/c.jpg --> "jpg"
0853: * a/b.txt/c --> ""
0854: * a/b/c --> ""
0855: * </pre>
0856: * <p>
0857: * The output will be the same irrespective of the machine that the code is running on.
0858: *
0859: * @param filename the filename to retrieve the extension of.
0860: * @return the extension of the file or an empty string if none exists.
0861: */
0862: public static String getExtension(String filename) {
0863: if (filename == null) {
0864: return null;
0865: }
0866: int index = indexOfExtension(filename);
0867: if (index == -1) {
0868: return "";
0869: } else {
0870: return filename.substring(index + 1);
0871: }
0872: }
0873:
0874: //-----------------------------------------------------------------------
0875: /**
0876: * Removes the extension from a filename.
0877: * <p>
0878: * This method returns the textual part of the filename before the last dot.
0879: * There must be no directory separator after the dot.
0880: * <pre>
0881: * foo.txt --> foo
0882: * a\b\c.jpg --> a\b\c
0883: * a\b\c --> a\b\c
0884: * a.b\c --> a.b\c
0885: * </pre>
0886: * <p>
0887: * The output will be the same irrespective of the machine that the code is running on.
0888: *
0889: * @param filename the filename to query, null returns null
0890: * @return the filename minus the extension
0891: */
0892: public static String removeExtension(String filename) {
0893: if (filename == null) {
0894: return null;
0895: }
0896: int index = indexOfExtension(filename);
0897: if (index == -1) {
0898: return filename;
0899: } else {
0900: return filename.substring(0, index);
0901: }
0902: }
0903:
0904: //-----------------------------------------------------------------------
0905: /**
0906: * Checks whether two filenames are equal exactly.
0907: * <p>
0908: * No processing is performed on the filenames other than comparison,
0909: * thus this is merely a null-safe case-sensitive equals.
0910: *
0911: * @param filename1 the first filename to query, may be null
0912: * @param filename2 the second filename to query, may be null
0913: * @return true if the filenames are equal, null equals null
0914: * @see IOCase#SENSITIVE
0915: */
0916: public static boolean equals(String filename1, String filename2) {
0917: return equals(filename1, filename2, false, IOCase.SENSITIVE);
0918: }
0919:
0920: /**
0921: * Checks whether two filenames are equal using the case rules of the system.
0922: * <p>
0923: * No processing is performed on the filenames other than comparison.
0924: * The check is case-sensitive on Unix and case-insensitive on Windows.
0925: *
0926: * @param filename1 the first filename to query, may be null
0927: * @param filename2 the second filename to query, may be null
0928: * @return true if the filenames are equal, null equals null
0929: * @see IOCase#SYSTEM
0930: */
0931: public static boolean equalsOnSystem(String filename1,
0932: String filename2) {
0933: return equals(filename1, filename2, false, IOCase.SYSTEM);
0934: }
0935:
0936: //-----------------------------------------------------------------------
0937: /**
0938: * Checks whether two filenames are equal after both have been normalized.
0939: * <p>
0940: * Both filenames are first passed to {@link #normalize(String)}.
0941: * The check is then performed in a case-sensitive manner.
0942: *
0943: * @param filename1 the first filename to query, may be null
0944: * @param filename2 the second filename to query, may be null
0945: * @return true if the filenames are equal, null equals null
0946: * @see IOCase#SENSITIVE
0947: */
0948: public static boolean equalsNormalized(String filename1,
0949: String filename2) {
0950: return equals(filename1, filename2, true, IOCase.SENSITIVE);
0951: }
0952:
0953: /**
0954: * Checks whether two filenames are equal after both have been normalized
0955: * and using the case rules of the system.
0956: * <p>
0957: * Both filenames are first passed to {@link #normalize(String)}.
0958: * The check is then performed case-sensitive on Unix and
0959: * case-insensitive on Windows.
0960: *
0961: * @param filename1 the first filename to query, may be null
0962: * @param filename2 the second filename to query, may be null
0963: * @return true if the filenames are equal, null equals null
0964: * @see IOCase#SYSTEM
0965: */
0966: public static boolean equalsNormalizedOnSystem(String filename1,
0967: String filename2) {
0968: return equals(filename1, filename2, true, IOCase.SYSTEM);
0969: }
0970:
0971: /**
0972: * Checks whether two filenames are equal, optionally normalizing and providing
0973: * control over the case-sensitivity.
0974: *
0975: * @param filename1 the first filename to query, may be null
0976: * @param filename2 the second filename to query, may be null
0977: * @param normalized whether to normalize the filenames
0978: * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive
0979: * @return true if the filenames are equal, null equals null
0980: * @since Commons IO 1.3
0981: */
0982: public static boolean equals(String filename1, String filename2,
0983: boolean normalized, IOCase caseSensitivity) {
0984:
0985: if (filename1 == null || filename2 == null) {
0986: return filename1 == filename2;
0987: }
0988: if (normalized) {
0989: filename1 = normalize(filename1);
0990: filename2 = normalize(filename2);
0991: }
0992: if (caseSensitivity == null) {
0993: caseSensitivity = IOCase.SENSITIVE;
0994: }
0995: return caseSensitivity.checkEquals(filename1, filename2);
0996: }
0997:
0998: //-----------------------------------------------------------------------
0999: /**
1000: * Checks whether the extension of the filename is that specified.
1001: * <p>
1002: * This method obtains the extension as the textual part of the filename
1003: * after the last dot. There must be no directory separator after the dot.
1004: * The extension check is case-sensitive on all platforms.
1005: *
1006: * @param filename the filename to query, null returns false
1007: * @param extension the extension to check for, null or empty checks for no extension
1008: * @return true if the filename has the specified extension
1009: */
1010: public static boolean isExtension(String filename, String extension) {
1011: if (filename == null) {
1012: return false;
1013: }
1014: if (extension == null || extension.length() == 0) {
1015: return (indexOfExtension(filename) == -1);
1016: }
1017: String fileExt = getExtension(filename);
1018: return fileExt.equals(extension);
1019: }
1020:
1021: /**
1022: * Checks whether the extension of the filename is one of those specified.
1023: * <p>
1024: * This method obtains the extension as the textual part of the filename
1025: * after the last dot. There must be no directory separator after the dot.
1026: * The extension check is case-sensitive on all platforms.
1027: *
1028: * @param filename the filename to query, null returns false
1029: * @param extensions the extensions to check for, null checks for no extension
1030: * @return true if the filename is one of the extensions
1031: */
1032: public static boolean isExtension(String filename,
1033: String[] extensions) {
1034: if (filename == null) {
1035: return false;
1036: }
1037: if (extensions == null || extensions.length == 0) {
1038: return (indexOfExtension(filename) == -1);
1039: }
1040: String fileExt = getExtension(filename);
1041: for (int i = 0; i < extensions.length; i++) {
1042: if (fileExt.equals(extensions[i])) {
1043: return true;
1044: }
1045: }
1046: return false;
1047: }
1048:
1049: /**
1050: * Checks whether the extension of the filename is one of those specified.
1051: * <p>
1052: * This method obtains the extension as the textual part of the filename
1053: * after the last dot. There must be no directory separator after the dot.
1054: * The extension check is case-sensitive on all platforms.
1055: *
1056: * @param filename the filename to query, null returns false
1057: * @param extensions the extensions to check for, null checks for no extension
1058: * @return true if the filename is one of the extensions
1059: */
1060: public static boolean isExtension(String filename,
1061: Collection extensions) {
1062: if (filename == null) {
1063: return false;
1064: }
1065: if (extensions == null || extensions.isEmpty()) {
1066: return (indexOfExtension(filename) == -1);
1067: }
1068: String fileExt = getExtension(filename);
1069: for (Iterator it = extensions.iterator(); it.hasNext();) {
1070: if (fileExt.equals(it.next())) {
1071: return true;
1072: }
1073: }
1074: return false;
1075: }
1076:
1077: //-----------------------------------------------------------------------
1078: /**
1079: * Checks a filename to see if it matches the specified wildcard matcher,
1080: * always testing case-sensitive.
1081: * <p>
1082: * The wildcard matcher uses the characters '?' and '*' to represent a
1083: * single or multiple wildcard characters.
1084: * This is the same as often found on Dos/Unix command lines.
1085: * The check is case-sensitive always.
1086: * <pre>
1087: * wildcardMatch("c.txt", "*.txt") --> true
1088: * wildcardMatch("c.txt", "*.jpg") --> false
1089: * wildcardMatch("a/b/c.txt", "a/b/*") --> true
1090: * wildcardMatch("c.txt", "*.???") --> true
1091: * wildcardMatch("c.txt", "*.????") --> false
1092: * </pre>
1093: *
1094: * @param filename the filename to match on
1095: * @param wildcardMatcher the wildcard string to match against
1096: * @return true if the filename matches the wilcard string
1097: * @see IOCase#SENSITIVE
1098: */
1099: public static boolean wildcardMatch(String filename,
1100: String wildcardMatcher) {
1101: return wildcardMatch(filename, wildcardMatcher,
1102: IOCase.SENSITIVE);
1103: }
1104:
1105: /**
1106: * Checks a filename to see if it matches the specified wildcard matcher
1107: * using the case rules of the system.
1108: * <p>
1109: * The wildcard matcher uses the characters '?' and '*' to represent a
1110: * single or multiple wildcard characters.
1111: * This is the same as often found on Dos/Unix command lines.
1112: * The check is case-sensitive on Unix and case-insensitive on Windows.
1113: * <pre>
1114: * wildcardMatch("c.txt", "*.txt") --> true
1115: * wildcardMatch("c.txt", "*.jpg") --> false
1116: * wildcardMatch("a/b/c.txt", "a/b/*") --> true
1117: * wildcardMatch("c.txt", "*.???") --> true
1118: * wildcardMatch("c.txt", "*.????") --> false
1119: * </pre>
1120: *
1121: * @param filename the filename to match on
1122: * @param wildcardMatcher the wildcard string to match against
1123: * @return true if the filename matches the wilcard string
1124: * @see IOCase#SYSTEM
1125: */
1126: public static boolean wildcardMatchOnSystem(String filename,
1127: String wildcardMatcher) {
1128: return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM);
1129: }
1130:
1131: /**
1132: * Checks a filename to see if it matches the specified wildcard matcher
1133: * allowing control over case-sensitivity.
1134: * <p>
1135: * The wildcard matcher uses the characters '?' and '*' to represent a
1136: * single or multiple wildcard characters.
1137: *
1138: * @param filename the filename to match on
1139: * @param wildcardMatcher the wildcard string to match against
1140: * @param caseSensitivity what case sensitivity rule to use, null means case-sensitive
1141: * @return true if the filename matches the wilcard string
1142: * @since Commons IO 1.3
1143: */
1144: public static boolean wildcardMatch(String filename,
1145: String wildcardMatcher, IOCase caseSensitivity) {
1146: if (filename == null && wildcardMatcher == null) {
1147: return true;
1148: }
1149: if (filename == null || wildcardMatcher == null) {
1150: return false;
1151: }
1152: if (caseSensitivity == null) {
1153: caseSensitivity = IOCase.SENSITIVE;
1154: }
1155: filename = caseSensitivity.convertCase(filename);
1156: wildcardMatcher = caseSensitivity.convertCase(wildcardMatcher);
1157: String[] wcs = splitOnTokens(wildcardMatcher);
1158: boolean anyChars = false;
1159: int textIdx = 0;
1160: int wcsIdx = 0;
1161: Stack backtrack = new Stack();
1162:
1163: // loop around a backtrack stack, to handle complex * matching
1164: do {
1165: if (backtrack.size() > 0) {
1166: int[] array = (int[]) backtrack.pop();
1167: wcsIdx = array[0];
1168: textIdx = array[1];
1169: anyChars = true;
1170: }
1171:
1172: // loop whilst tokens and text left to process
1173: while (wcsIdx < wcs.length) {
1174:
1175: if (wcs[wcsIdx].equals("?")) {
1176: // ? so move to next text char
1177: textIdx++;
1178: anyChars = false;
1179:
1180: } else if (wcs[wcsIdx].equals("*")) {
1181: // set any chars status
1182: anyChars = true;
1183: if (wcsIdx == wcs.length - 1) {
1184: textIdx = filename.length();
1185: }
1186:
1187: } else {
1188: // matching text token
1189: if (anyChars) {
1190: // any chars then try to locate text token
1191: textIdx = filename
1192: .indexOf(wcs[wcsIdx], textIdx);
1193: if (textIdx == -1) {
1194: // token not found
1195: break;
1196: }
1197: int repeat = filename.indexOf(wcs[wcsIdx],
1198: textIdx + 1);
1199: if (repeat >= 0) {
1200: backtrack
1201: .push(new int[] { wcsIdx, repeat });
1202: }
1203: } else {
1204: // matching from current position
1205: if (!filename.startsWith(wcs[wcsIdx], textIdx)) {
1206: // couldnt match token
1207: break;
1208: }
1209: }
1210:
1211: // matched text token, move text index to end of matched token
1212: textIdx += wcs[wcsIdx].length();
1213: anyChars = false;
1214: }
1215:
1216: wcsIdx++;
1217: }
1218:
1219: // full match
1220: if (wcsIdx == wcs.length && textIdx == filename.length()) {
1221: return true;
1222: }
1223:
1224: } while (backtrack.size() > 0);
1225:
1226: return false;
1227: }
1228:
1229: /**
1230: * Splits a string into a number of tokens.
1231: *
1232: * @param text the text to split
1233: * @return the tokens, never null
1234: */
1235: static String[] splitOnTokens(String text) {
1236: // used by wildcardMatch
1237: // package level so a unit test may run on this
1238:
1239: if (text.indexOf("?") == -1 && text.indexOf("*") == -1) {
1240: return new String[] { text };
1241: }
1242:
1243: char[] array = text.toCharArray();
1244: ArrayList list = new ArrayList();
1245: StringBuffer buffer = new StringBuffer();
1246: for (int i = 0; i < array.length; i++) {
1247: if (array[i] == '?' || array[i] == '*') {
1248: if (buffer.length() != 0) {
1249: list.add(buffer.toString());
1250: buffer.setLength(0);
1251: }
1252: if (array[i] == '?') {
1253: list.add("?");
1254: } else if (list.size() == 0
1255: || (i > 0 && list.get(list.size() - 1).equals(
1256: "*") == false)) {
1257: list.add("*");
1258: }
1259: } else {
1260: buffer.append(array[i]);
1261: }
1262: }
1263: if (buffer.length() != 0) {
1264: list.add(buffer.toString());
1265: }
1266:
1267: return (String[]) list.toArray(new String[list.size()]);
1268: }
1269:
1270: }
|