001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. The ASF licenses this file to You
004: * under the Apache License, Version 2.0 (the "License"); you may not
005: * use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License. For additional information regarding
015: * copyright in this work, please see the NOTICE file in the top level
016: * directory of this distribution.
017: */
018: /*
019: * Created on Nov 8, 2003
020: *
021: */
022: package org.apache.roller.util;
023:
024: import org.apache.commons.codec.binary.Hex;
025:
026: import java.io.UnsupportedEncodingException;
027: import java.util.ArrayList;
028: import java.util.regex.Matcher;
029: import java.util.regex.Pattern;
030:
031: /**
032: * @author lance
033: */
034: public class RegexUtil {
035: public static final Pattern mailtoPattern = Pattern
036: .compile("mailto:([a-zA-Z0-9\\.]+@[a-zA-Z0-9\\.]+\\.[a-zA-Z0-9]+)");
037: public static final Pattern emailPattern = Pattern
038: .compile("\\b[a-zA-Z0-9\\.]+(@)([a-zA-Z0-9\\.]+)(\\.)([a-zA-Z0-9]+)\\b");
039:
040: public static String encodeEmail(String str) {
041: // obfuscate mailto's: turns them into hex encoded,
042: // so that browsers can still understand the mailto link
043: Matcher mailtoMatch = mailtoPattern.matcher(str);
044: while (mailtoMatch.find()) {
045: String email = mailtoMatch.group(1);
046: //System.out.println("email=" + email);
047: String hexed = encode(email);
048: str = str
049: .replaceFirst("mailto:" + email, "mailto:" + hexed);
050: }
051:
052: return obfuscateEmail(str);
053: }
054:
055: /**
056: * obfuscate plaintext emails: makes them
057: * "human-readable" - still too easy for
058: * machines to parse however.
059: *
060: * @param str
061: * @return
062: */
063: public static String obfuscateEmail(String str) {
064: Matcher emailMatch = emailPattern.matcher(str);
065: while (emailMatch.find()) {
066: String at = emailMatch.group(1);
067: //System.out.println("at=" + at);
068: str = str.replaceFirst(at, "-AT-");
069:
070: String dot = emailMatch.group(2) + emailMatch.group(3)
071: + emailMatch.group(4);
072: String newDot = emailMatch.group(2) + "-DOT-"
073: + emailMatch.group(4);
074: //System.out.println("dot=" + dot);
075: str = str.replaceFirst(dot, newDot);
076: }
077: return str;
078: }
079:
080: /**
081: * Return the specified match "groups" from the pattern.
082: * For each group matched a String will be entered in the ArrayList.
083: *
084: * @param pattern The Pattern to use.
085: * @param match The String to match against.
086: * @param group The group number to return in case of a match.
087: * @return
088: */
089: public static ArrayList getMatches(Pattern pattern, String match,
090: int group) {
091: ArrayList matches = new ArrayList();
092: Matcher matcher = pattern.matcher(match);
093: while (matcher.find()) {
094: matches.add(matcher.group(group));
095: }
096: return matches;
097: }
098:
099: /**
100: * Thanks to the folks at Blojsom (http://sf.net/projects/blojsom)
101: * for showing me what I was doing wrong with the Hex class.
102: *
103: * @param email
104: * @return
105: */
106: public static String encode(String email) {
107: StringBuffer result = new StringBuffer();
108: try {
109: char[] hexString = Hex.encodeHex(email.getBytes("UTF-8"));
110: for (int i = 0; i < hexString.length; i++) {
111: if (i % 2 == 0) {
112: result.append("%");
113: }
114: result.append(hexString[i]);
115: }
116: } catch (UnsupportedEncodingException e) {
117: return email;
118: }
119:
120: return result.toString();
121: }
122: }
|