001: package org.claros.commons.mail.parser;
002:
003: import java.util.ArrayList;
004: import java.util.Locale;
005: import java.util.regex.Matcher;
006: import java.util.regex.Pattern;
007:
008: import org.claros.commons.mail.models.Email;
009: import org.claros.commons.mail.models.EmailPart;
010: import org.claros.commons.utility.Utility;
011:
012: /**
013: * @author Umut Gokbayrak
014: *
015: */
016: public class HTMLMessageParser {
017: private static Locale loc = new Locale("en", "US");
018:
019: /**
020: * Default Constructor
021: */
022: public HTMLMessageParser() {
023: super ();
024: }
025:
026: public static String prepareInlineHTMLContent(Email msg, String str) {
027: if ((str == null) || (str.equals("")))
028: return "";
029: int i = -1;
030: // cid with double quotes
031: String tmp = null;
032: String contentId = null;
033: int j = -1;
034: int partId = -1;
035: while ((i = str.indexOf("\"cid:")) != -1) {
036: tmp = str.substring(i + 5);
037: j = tmp.indexOf("\"");
038: contentId = tmp.substring(0, j);
039: partId = getPartIdByContentId(msg, contentId);
040:
041: str = str.substring(0, i) + "\"dumpPart.service?partid="
042: + partId + "\"" + tmp.substring(j + 1);
043: }
044:
045: return str;
046: }
047:
048: public static String organizeLinks(String str) {
049: // clear targets first to eliminate duplicate targets in hrefs
050: String EXPR = null;
051: StringBuffer buffer = null;
052: Matcher matcher = null;
053: int count = 0;
054: String tag = null;
055: try {
056: EXPR = "target([ =\"\']*[\t\n\r:#0-9a-z\\./@~?&=;%_-]+[ \"']*)";
057: buffer = new StringBuffer();
058: matcher = Pattern.compile(EXPR, Pattern.CASE_INSENSITIVE)
059: .matcher(str);
060: count = 0;
061: while (matcher.find()) {
062: count++;
063: tag = matcher.group();
064: tag = findTagValue(tag);
065: matcher.appendReplacement(buffer, "");
066: }
067: matcher.appendTail(buffer);
068: str = buffer.toString();
069: } catch (Throwable e) {
070: e.printStackTrace();
071: }
072:
073: // now parse the mailto and href links.
074: try {
075: EXPR = " href([ =\"\']*[\t\n\r:#0-9a-z\\./@~?&=;%_-]+[ \"']*)";
076: buffer = new StringBuffer();
077: matcher = Pattern.compile(EXPR, Pattern.CASE_INSENSITIVE)
078: .matcher(str);
079: count = 0;
080: while (matcher.find()) {
081: count++;
082: tag = matcher.group();
083: tag = findTagValue(tag);
084: // eliminate the mailto hrefs
085: if (tag != null
086: && tag.toLowerCase(loc).startsWith("mailto:")) {
087: tag = tag.substring(7);
088: }
089: // replace the values of mail links with the compose form values
090: if (tag.indexOf("@") > 0) {
091: tag = "javascript:parent.fastEmail('" + tag + "')";
092: matcher.appendReplacement(buffer, " href=\"" + tag
093: + "\"");
094: } else {
095: matcher.appendReplacement(buffer, " href=\"" + tag
096: + "\" target=\"_blank\" ");
097: }
098: }
099: matcher.appendTail(buffer);
100: str = buffer.toString();
101: } catch (Throwable e) {
102: e.printStackTrace();
103: }
104:
105: return str;
106: }
107:
108: private static String findTagValue(String tag) {
109: int pos = tag.indexOf("=");
110: tag = Utility.extendedTrim(tag.substring(pos + 1), "\"");
111: tag = Utility.extendedTrim(tag, "'");
112: return tag;
113: }
114:
115: private static int getPartIdByContentId(Email msg, String cid) {
116: ArrayList parts = msg.getParts();
117: for (int i = 0; i < parts.size(); i++) {
118: EmailPart part = (EmailPart) parts.get(i);
119: String contentId = part.getContentId();
120: String fileName = part.getFileName();
121: if ((contentId != null && contentId.equals(cid))
122: || (fileName != null && fileName
123: .equalsIgnoreCase(cid))) {
124: return i;
125: }
126: }
127: // still havent found it. maybe cid is surrounded with < >
128: for (int i = 0; i < parts.size(); i++) {
129: EmailPart part = (EmailPart) parts.get(i);
130: String contentId = part.getContentId();
131: contentId = Utility.extendedTrim(contentId, "<");
132: contentId = Utility.extendedTrim(contentId, ">");
133: if (contentId != null && contentId.equals(cid)) {
134: return i;
135: }
136: }
137: return -1;
138: }
139: }
|