URL utilities class that makes it easy to create new URLs based off of old URLs without having to assemble or parse them yourself : Utilities « Network Protocol


 



/*

 * Copyright (c) 2002-2009 Gargoyle Software Inc.

 *

 * Licensed under the Apache License, Version 2.0 (the "License");

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 * http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */



import java.net.MalformedURLException;

import java.net.URL;

import java.util.List;



/**

 * URL utilities class that makes it easy to create new URLs based off of old URLs

 * without having to assemble or parse them yourself.

 *

 * @version $Revision: 4387 $

 * @author Daniel Gredler

 * @author Martin Tamme

 * @author Sudhan Moghe

 */

public final class UrlUtils {



    /**

     * Disallow instantiation of this class.

     */

    private UrlUtils() {

        // Empty.

    }



    /**

     * Creates and returns a new URL identical to the specified URL, except using the specified protocol.

     * @param u the URL on which to base the returned URL

     * @param newProtocol the new protocol to use in the returned URL

     * @return a new URL identical to the specified URL, except using the specified protocol

     * @throws MalformedURLException if there is a problem creating the new URL

     */

    public static URL getUrlWithNewProtocol(final URL u, final String newProtocol) throws MalformedURLException {

        return createNewUrl(newProtocol, u.getHost(), u.getPort(), u.getPath(), u.getRef(), u.getQuery());

    }



    /**

     * Creates and returns a new URL identical to the specified URL, except using the specified host.

     * @param u the URL on which to base the returned URL

     * @param newHost the new host to use in the returned URL

     * @return a new URL identical to the specified URL, except using the specified host

     * @throws MalformedURLException if there is a problem creating the new URL

     */

    public static URL getUrlWithNewHost(final URL u, final String newHost) throws MalformedURLException {

        return createNewUrl(u.getProtocol(), newHost, u.getPort(), u.getPath(), u.getRef(), u.getQuery());

    }



    /**

     * Creates and returns a new URL identical to the specified URL, except using the specified port.

     * @param u the URL on which to base the returned URL

     * @param newPort the new port to use in the returned URL

     * @return a new URL identical to the specified URL, except using the specified port

     * @throws MalformedURLException if there is a problem creating the new URL

     */

    public static URL getUrlWithNewPort(final URL u, final int newPort) throws MalformedURLException {

        return createNewUrl(u.getProtocol(), u.getHost(), newPort, u.getPath(), u.getRef(), u.getQuery());

    }



    /**

     * Creates and returns a new URL identical to the specified URL, except using the specified path.

     * @param u the URL on which to base the returned URL

     * @param newPath the new path to use in the returned URL

     * @return a new URL identical to the specified URL, except using the specified path

     * @throws MalformedURLException if there is a problem creating the new URL

     */

    public static URL getUrlWithNewPath(final URL u, final String newPath) throws MalformedURLException {

        return createNewUrl(u.getProtocol(), u.getHost(), u.getPort(), newPath, u.getRef(), u.getQuery());

    }



    /**

     * Creates and returns a new URL identical to the specified URL, except using the specified reference.

     * @param u the URL on which to base the returned URL

     * @param newRef the new reference to use in the returned URL

     * @return a new URL identical to the specified URL, except using the specified reference

     * @throws MalformedURLException if there is a problem creating the new URL

     */

    public static URL getUrlWithNewRef(final URL u, final String newRef) throws MalformedURLException {

        return createNewUrl(u.getProtocol(), u.getHost(), u.getPort(), u.getPath(), newRef, u.getQuery());

    }



    /**

     * Creates and returns a new URL identical to the specified URL, except using the specified query string.

     * @param u the URL on which to base the returned URL

     * @param newQuery the new query string to use in the returned URL

     * @return a new URL identical to the specified URL, except using the specified query string

     * @throws MalformedURLException if there is a problem creating the new URL

     */

    public static URL getUrlWithNewQuery(final URL u, final String newQuery) throws MalformedURLException {

        return createNewUrl(u.getProtocol(), u.getHost(), u.getPort(), u.getPath(), u.getRef(), newQuery);

    }



    /**

     * Creates a new URL based on the specified fragments.

     * @param protocol the protocol to use (may not be <tt>null</tt>)

     * @param host the host to use (may not be <tt>null</tt>)

     * @param port the port to use (may be <tt>-1</tt> if no port is specified)

     * @param path the path to use (may be <tt>null</tt> and may omit the initial <tt>'/'</tt>)

     * @param ref the reference to use (may be <tt>null</tt> and must not include the <tt>'#'</tt>)

     * @param query the query to use (may be <tt>null</tt> and must not include the <tt>'?'</tt>)

     * @return a new URL based on the specified fragments

     * @throws MalformedURLException if there is a problem creating the new URL

     */

    private static URL createNewUrl(final String protocol, final String host, final int port,

            final String path, final String ref, final String query) throws MalformedURLException {

        final StringBuilder s = new StringBuilder();

        s.append(protocol);

        s.append("://");

        s.append(host);

        if (port != -1) {

            s.append(":").append(port);

        }

        if (path != null && path.length() > 0) {

            if (!path.startsWith("/")) {

                s.append("/");

            }

            s.append(path);

        }

        if (query != null) {

            s.append("?").append(query);

        }

        if (ref != null) {

            if (!ref.startsWith("#")) {

                s.append("#");

            }

            s.append(ref);

        }

        final URL url = new URL(s.toString());

        return url;

    }



    /**

     * Resolves a given relative URL against a base URL. See

     * <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>

     * Section 4 for more details.

     *

     * @param baseUrl     The base URL in which to resolve the specification.

     * @param relativeUrl The relative URL to resolve against the base URL.

     * @return the resolved specification.

     */

    public static String resolveUrl(final String baseUrl, final String relativeUrl) {

        if (baseUrl == null) {

            throw new IllegalArgumentException("Base URL must not be null");

        }

        if (relativeUrl == null) {

            throw new IllegalArgumentException("Relative URL must not be null");

        }

        final Url url = resolveUrl(parseUrl(baseUrl.trim()), relativeUrl.trim());



        return url.toString();

    }



    /**

     * Resolves a given relative URL against a base URL. See

     * <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>

     * Section 4 for more details.

     *

     * @param baseUrl     The base URL in which to resolve the specification.

     * @param relativeUrl The relative URL to resolve against the base URL.

     * @return the resolved specification.

     */

    public static String resolveUrl(final URL baseUrl, final String relativeUrl) {

        if (baseUrl == null) {

            throw new IllegalArgumentException("Base URL must not be null");

        }

        return resolveUrl(baseUrl.toExternalForm(), relativeUrl);

    }



    /**

     * Parses a given specification using the algorithm depicted in

     * <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>:

     *

     * Section 2.4: Parsing a URL

     *

     *   An accepted method for parsing URLs is useful to clarify the

     *   generic-RL syntax of Section 2.2 and to describe the algorithm for

     *   resolving relative URLs presented in Section 4. This section

     *   describes the parsing rules for breaking down a URL (relative or

     *   absolute) into the component parts described in Section 2.1.  The

     *   rules assume that the URL has already been separated from any

     *   surrounding text and copied to a "parse string". The rules are

     *   listed in the order in which they would be applied by the parser.

     *

     * @param spec The specification to parse.

     * @return the parsed specification.

     */

    private static Url parseUrl(final String spec) {

        final Url url = new Url();

        int startIndex = 0;

        int endIndex = spec.length();



        // Section 2.4.1: Parsing the Fragment Identifier

        //

        //   If the parse string contains a crosshatch "#" character, then the

        //   substring after the first (left-most) crosshatch "#" and up to the

        //   end of the parse string is the <fragment> identifier. If the

        //   crosshatch is the last character, or no crosshatch is present, then

        //   the fragment identifier is empty. The matched substring, including

        //   the crosshatch character, is removed from the parse string before

        //   continuing.

        //

        //   Note that the fragment identifier is not considered part of the URL.

        //   However, since it is often attached to the URL, parsers must be able

        //   to recognize and set aside fragment identifiers as part of the

        //   process.

        final int crosshatchIndex = StringUtils.indexOf(spec, '#', startIndex, endIndex);



        if (crosshatchIndex >= 0) {

            url.fragment_ = spec.substring(crosshatchIndex + 1, endIndex);

            endIndex = crosshatchIndex;

        }

        // Section 2.4.2: Parsing the Scheme

        //

        //   If the parse string contains a colon ":" after the first character

        //   and before any characters not allowed as part of a scheme name (i.e.,

        //   any not an alphanumeric, plus "+", period ".", or hyphen "-"), the

        //   <scheme> of the URL is the substring of characters up to but not

        //   including the first colon. These characters and the colon are then

        //   removed from the parse string before continuing.

        final int colonIndex = StringUtils.indexOf(spec, ':', startIndex, endIndex);



        if (colonIndex > 0) {

            final String scheme = spec.substring(startIndex, colonIndex);

            if (isValidScheme(scheme)) {

                url.scheme_ = scheme;

                startIndex = colonIndex + 1;

            }

        }

        // Section 2.4.3: Parsing the Network Location/Login

        //

        //   If the parse string begins with a double-slash "//", then the

        //   substring of characters after the double-slash and up to, but not

        //   including, the next slash "/" character is the network location/login

        //   (<net_loc>) of the URL. If no trailing slash "/" is present, the

        //   entire remaining parse string is assigned to <net_loc>. The double-

        //   slash and <net_loc> are removed from the parse string before

        //   continuing.

        //

        // Note: We also accept a question mark "?" or a semicolon ";" character as

        //       delimiters for the network location/login (<net_loc>) of the URL.

        final int locationStartIndex;

        int locationEndIndex;



        if (spec.startsWith("//", startIndex)) {

            locationStartIndex = startIndex + 2;

            locationEndIndex = StringUtils.indexOf(spec, '/', locationStartIndex, endIndex);

            if (locationEndIndex >= 0) {

                startIndex = locationEndIndex;

            }

        }

        else {

            locationStartIndex = -1;

            locationEndIndex = -1;

        }

        // Section 2.4.4: Parsing the Query Information

        //

        //   If the parse string contains a question mark "?" character, then the

        //   substring after the first (left-most) question mark "?" and up to the

        //   end of the parse string is the <query> information. If the question

        //   mark is the last character, or no question mark is present, then the

        //   query information is empty. The matched substring, including the

        //   question mark character, is removed from the parse string before

        //   continuing.

        final int questionMarkIndex = StringUtils.indexOf(spec, '?', startIndex, endIndex);



        if (questionMarkIndex >= 0) {

            if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {

                // The substring of characters after the double-slash and up to, but not

                // including, the question mark "?" character is the network location/login

                // (<net_loc>) of the URL.

                locationEndIndex = questionMarkIndex;

                startIndex = questionMarkIndex;

            }

            url.query_ = spec.substring(questionMarkIndex + 1, endIndex);

            endIndex = questionMarkIndex;

        }

        // Section 2.4.5: Parsing the Parameters

        //

        //   If the parse string contains a semicolon ";" character, then the

        //   substring after the first (left-most) semicolon ";" and up to the end

        //   of the parse string is the parameters (<params>). If the semicolon

        //   is the last character, or no semicolon is present, then <params> is

        //   empty. The matched substring, including the semicolon character, is

        //   removed from the parse string before continuing.

        final int semicolonIndex = StringUtils.indexOf(spec, ';', startIndex, endIndex);



        if (semicolonIndex >= 0) {

            if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {

                // The substring of characters after the double-slash and up to, but not

                // including, the semicolon ";" character is the network location/login

                // (<net_loc>) of the URL.

                locationEndIndex = semicolonIndex;

                startIndex = semicolonIndex;

            }

            url.parameters_ = spec.substring(semicolonIndex + 1, endIndex);

            endIndex = semicolonIndex;

        }

        // Section 2.4.6: Parsing the Path

        //

        //   After the above steps, all that is left of the parse string is the

        //   URL <path> and the slash "/" that may precede it. Even though the

        //   initial slash is not part of the URL path, the parser must remember

        //   whether or not it was present so that later processes can

        //   differentiate between relative and absolute paths. Often this is

        //   done by simply storing the preceding slash along with the path.

        if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {

            // The entire remaining parse string is assigned to the network

            // location/login (<net_loc>) of the URL.

            locationEndIndex = endIndex;

        }

        else if (startIndex < endIndex) {

            url.path_ = spec.substring(startIndex, endIndex);

        }

        // Set the network location/login (<net_loc>) of the URL.

        if ((locationStartIndex >= 0) && (locationEndIndex >= 0)) {

            url.location_ = spec.substring(locationStartIndex, locationEndIndex);

        }

        return url;

    }



    /*

     * Returns true if specified string is a valid scheme name.

     */

    private static boolean isValidScheme(final String scheme) {

        final int length = scheme.length();

        if (length < 1) {

            return false;

        }

        char c = scheme.charAt(0);

        if (!Character.isLetter(c)) {

            return false;

        }

        for (int i = 1; i < length; i++) {

            c = scheme.charAt(i);

            if (!Character.isLetterOrDigit(c) && c != '.' && c != '+' && c != '-') {

                return false;

            }

        }

        return true;

    }



    /**

     * Resolves a given relative URL against a base URL using the algorithm

     * depicted in <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>:

     *

     * Section 4: Resolving Relative URLs

     *

     *   This section describes an example algorithm for resolving URLs within

     *   a context in which the URLs may be relative, such that the result is

     *   always a URL in absolute form. Although this algorithm cannot

     *   guarantee that the resulting URL will equal that intended by the

     *   original author, it does guarantee that any valid URL (relative or

     *   absolute) can be consistently transformed to an absolute form given a

     *   valid base URL.

     *

     * @param baseUrl     The base URL in which to resolve the specification.

     * @param relativeUrl The relative URL to resolve against the base URL.

     * @return the resolved specification.

     */

    private static Url resolveUrl(final Url baseUrl, final String relativeUrl) {

        final Url url = parseUrl(relativeUrl);

        // Step 1: The base URL is established according to the rules of

        //         Section 3.  If the base URL is the empty string (unknown),

        //         the embedded URL is interpreted as an absolute URL and

        //         we are done.

        if (baseUrl == null) {

            return url;

        }

        // Step 2: Both the base and embedded URLs are parsed into their

        //         component parts as described in Section 2.4.

        //      a) If the embedded URL is entirely empty, it inherits the

        //         entire base URL (i.e., is set equal to the base URL)

        //         and we are done.

        if (relativeUrl.length() == 0) {

            return new Url(baseUrl);

        }

        //      b) If the embedded URL starts with a scheme name, it is

        //         interpreted as an absolute URL and we are done.

        if (url.scheme_ != null) {

            return url;

        }

        //      c) Otherwise, the embedded URL inherits the scheme of

        //         the base URL.

        url.scheme_ = baseUrl.scheme_;

        // Step 3: If the embedded URL's <net_loc> is non-empty, we skip to

        //         Step 7.  Otherwise, the embedded URL inherits the <net_loc>

        //         (if any) of the base URL.

        if (url.location_ != null) {

            return url;

        }

        url.location_ = baseUrl.location_;

        // Step 4: If the embedded URL path is preceded by a slash "/", the

        //         path is not relative and we skip to Step 7.

        if ((url.path_ != null) && url.path_.startsWith("/")) {

            url.path_ = removeLeadingSlashPoints(url.path_);

            return url;

        }

        // Step 5: If the embedded URL path is empty (and not preceded by a

        //         slash), then the embedded URL inherits the base URL path,

        //         and

        if (url.path_ == null) {

            url.path_ = baseUrl.path_;

            //  a) if the embedded URL's <params> is non-empty, we skip to

            //     step 7; otherwise, it inherits the <params> of the base

            //     URL (if any) and

            if (url.parameters_ != null) {

                return url;

            }

            url.parameters_ = baseUrl.parameters_;

            //  b) if the embedded URL's <query> is non-empty, we skip to

            //     step 7; otherwise, it inherits the <query> of the base

            //     URL (if any) and we skip to step 7.

            if (url.query_ != null) {

                return url;

            }

            url.query_ = baseUrl.query_;

            return url;

        }

        // Step 6: The last segment of the base URL's path (anything

        //         following the rightmost slash "/", or the entire path if no

        //         slash is present) is removed and the embedded URL's path is

        //         appended in its place.  The following operations are

        //         then applied, in order, to the new path:

        final String basePath = baseUrl.path_;

        String path = new String();



        if (basePath != null) {

            final int lastSlashIndex = basePath.lastIndexOf('/');



            if (lastSlashIndex >= 0) {

                path = basePath.substring(0, lastSlashIndex + 1);

            }

        }

        else {

            path = "/";

        }

        path = path.concat(url.path_);

        //      a) All occurrences of "./", where "." is a complete path

        //         segment, are removed.

        int pathSegmentIndex;



        while ((pathSegmentIndex = path.indexOf("/./")) >= 0) {

            path = path.substring(0, pathSegmentIndex + 1).concat(path.substring(pathSegmentIndex + 3));

        }

        //      b) If the path ends with "." as a complete path segment,

        //         that "." is removed.

        if (path.endsWith("/.")) {

            path = path.substring(0, path.length() - 1);

        }

        //      c) All occurrences of "<segment>/../", where <segment> is a

        //         complete path segment not equal to "..", are removed.

        //         Removal of these path segments is performed iteratively,

        //         removing the leftmost matching pattern on each iteration,

        //         until no matching pattern remains.

        while ((pathSegmentIndex = path.indexOf("/../")) > 0) {

            final String pathSegment = path.substring(0, pathSegmentIndex);

            final int slashIndex = pathSegment.lastIndexOf('/');



            if (slashIndex < 0) {

                continue;

            }

            if (!pathSegment.substring(slashIndex).equals("..")) {

                path = path.substring(0, slashIndex + 1).concat(path.substring(pathSegmentIndex + 4));

            }

        }

        //      d) If the path ends with "<segment>/..", where <segment> is a

        //         complete path segment not equal to "..", that

        //         "<segment>/.." is removed.

        if (path.endsWith("/..")) {

            final String pathSegment = path.substring(0, path.length() - 3);

            final int slashIndex = pathSegment.lastIndexOf('/');



            if (slashIndex >= 0) {

                path = path.substring(0, slashIndex + 1);

            }

        }



        path = removeLeadingSlashPoints(path);



        url.path_ = path;

        // Step 7: The resulting URL components, including any inherited from

        //         the base URL, are recombined to give the absolute form of

        //         the embedded URL.

        return url;

    }



    /**

     * "/.." at the beginning should be removed as browsers do (not in RFC)

     */

    private static String removeLeadingSlashPoints(String path) {

        while (path.startsWith("/..")) {

            path = path.substring(3);

        }



        return path;

    }



    /**

     * Class <tt>Url</tt> represents a Uniform Resource Locator.

     *

     * @author Martin Tamme

     */

    private static class Url {



        private String scheme_;

        private String location_;

        private String path_;

        private String parameters_;

        private String query_;

        private String fragment_;



        /**

         * Creates a <tt>Url</tt> object.

         */

        public Url() {

        }



        /**

         * Creates a <tt>Url</tt> object from the specified

         * <tt>Url</tt> object.

         *

         * @param url a <tt>Url</tt> object.

         */

        public Url(final Url url) {

            scheme_ = url.scheme_;

            location_ = url.location_;

            path_ = url.path_;

            parameters_ = url.parameters_;

            query_ = url.query_;

            fragment_ = url.fragment_;

        }



        /**

         * Returns a string representation of the <tt>Url</tt> object.

         *

         * @return a string representation of the <tt>Url</tt> object.

         */

        @Override

        public String toString() {

            final StringBuilder sb = new StringBuilder();



            if (scheme_ != null) {

                sb.append(scheme_);

                sb.append(':');

            }

            if (location_ != null) {

                sb.append("//");

                sb.append(location_);

            }

            if (path_ != null) {

                sb.append(path_);

            }

            if (parameters_ != null) {

                sb.append(';');

                sb.append(parameters_);

            }

            if (query_ != null) {

                sb.append('?');

                sb.append(query_);

            }

            if (fragment_ != null) {

                sb.append('#');

                sb.append(fragment_);

            }

            return sb.toString();

        }

    }

}



/*

 * Copyright (c) 2002-2009 Gargoyle Software Inc.

 *

 * Licensed under the Apache License, Version 2.0 (the "License");

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 * http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */





/**

 * String utilities class for utility functions not covered by third party libraries.

 *

 * @version $Revision: 4002 $

 * @author Daniel Gredler

 * @author Ahmed Ashour

 * @author Martin Tamme

 */

 final class StringUtils {



    /**

     * Disallow instantiation of this class.

     */

    private StringUtils() {

        // Empty.

    }



    /**

     * Escapes the characters '<', '>' and '&' into their XML entity equivalents. Note that

     * sometimes we have to use this method instead of

     * {@link org.apache.commons.lang.StringEscapeUtils#escapeXml(String)} or

     * {@link org.apache.commons.lang.StringEscapeUtils#escapeHtml(String)} because those methods

     * escape some unicode characters as well.

     *

     * @param s the string to escape

     * @return the escaped form of the specified string

     */

    public static String escapeXmlChars(final String s) {

        return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;");

    }



    /**

     * Returns <tt>true</tt> if the specified string contains whitespace, <tt>false</tt> otherwise.

     *

     * @param s the string to check for whitespace

     * @return <tt>true</tt> if the specified string contains whitespace, <tt>false</tt> otherwise

     */

    public static boolean containsWhitespace(final String s) {

        for (final char c : s.toCharArray()) {

            if (Character.isWhitespace(c)) {

                return true;

            }

        }

        return false;

    }



    /**

     * Returns the index within a given string of the first occurrence of

     * the specified search character.

     *

     * @param s          a string.

     * @param searchChar a search character.

     * @param beginIndex the index to start the search from.

     * @param endIndex   the index to stop the search.

     * @return the index of the first occurrence of the character in the string or <tt>-1</tt>.

     */

    public static int indexOf(

            final String s,

            final char searchChar,

            final int beginIndex,

            final int endIndex) {

        for (int i = beginIndex; i < endIndex; i++) {

            if (s.charAt(i) == searchChar) {

                return i;

            }

        }

        return -1;

    }



    /**

     * Returns <tt>true</tt> if the specified string is a valid float, possibly triming the string before checking.

     * @param s the string to check

     * @param trim whether or not to trim the string before checking

     * @return <tt>true</tt> if the specified string is a valid float, <tt>false</tt> otherwise

     */

    public static boolean isFloat(String s, final boolean trim) {

        if (trim) {

            s = s.trim();

        }



        boolean ok;

        try {

            Float.parseFloat(s);

            ok = true;

        }

        catch (final NumberFormatException e) {

            ok = false;

        }



        return ok;

    }



    /**

     * Returns <tt>true</tt> if the specified list of strings contains the specified string, ignoring case.

     * @param strings the strings to search

     * @param string the string to search for

     * @return <tt>true</tt> if the specified list of strings contains the specified string, ignoring case

     */

    public static boolean containsCaseInsensitive(final List<String> strings, String string) {

        string = string.toLowerCase();

        for (String s : strings) {

            if (s.toLowerCase().equals(string)) {

                return true;

            }

        }

        return false;

    }



}
URL utilities class that makes it easy to create new URLs based off of old URLs without having to assemble or parse them yourself : Utilities « Network Protocol « Java