001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU Library General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
015: */
016: package dlog4j.formbean;
017:
018: import org.apache.commons.lang.StringUtils;
019: import org.htmlparser.Node;
020: import org.htmlparser.NodeFilter;
021: import org.htmlparser.Parser;
022: import org.htmlparser.tags.TableTag;
023: import org.htmlparser.util.NodeList;
024:
025: /**
026: * 支持内容预览的基类
027: * @author Liudong
028: */
029: public class ContentPreviewForm extends DlogActionForm {
030:
031: public final static int MAX_COUNT = 400;
032: public final static int MAX_COUNT2 = 1000;
033: public static int BRIEF_LENGTH = 25;
034:
035: String content = null;
036:
037: public String getBrief() {
038: //去除所有尖括号对
039: if (content == null)
040: return "无内容";
041: String ct = StringUtils.replace(content, " ", "");
042: StringBuffer brief = new StringBuffer(256);
043: int cur = 0;
044: do {
045: int idx = ct.indexOf('<', cur);
046: if (idx == -1) {
047: brief.append(ct.substring(cur));
048: break;
049: } else {
050: brief.append(ct.substring(cur, idx));
051: cur = ct.indexOf('>', idx);
052: if (cur == -1)
053: break;
054: cur++;
055: }
056: } while (true);
057: String text = StringUtils.left(brief.toString().trim(),
058: BRIEF_LENGTH);
059: if (text == null || text.length() == 0)
060: text = "[无文本信息]";
061: else
062: text = StringUtils.replace(text, " ", " ");
063:
064: return text.trim();
065: }
066:
067: /**
068: * 获取HTML的预览信息
069: * @return
070: */
071: public String getPreviewContent() {
072: String ct = StringUtils.left(content, MAX_COUNT);
073: try {
074: //截取前N个字符
075: if (ct != null && content != null) {
076: int idx2 = ct.lastIndexOf('>');
077: int idx1 = ct.lastIndexOf('<');
078: if ((idx2 == -1 && idx1 >= 0) || idx1 > idx2) {
079: String ct2 = content.substring(ct.length());
080: int idx3 = ct2.indexOf('>');
081: if (idx3 != -1 && idx3 < (MAX_COUNT2 - MAX_COUNT)) {
082: ct += content.substring(ct.length(), ct
083: .length()
084: + idx3 + 1);
085: }
086: }
087: }
088: if (ct != null && content != null) {
089: int idx2 = ct.toLowerCase().lastIndexOf("</object>");
090: int idx1 = ct.toLowerCase().lastIndexOf("<object");
091: if ((idx2 == -1 && idx1 >= 0) || idx1 > idx2) {
092: String ct2 = content.substring(ct.length())
093: .toLowerCase();
094: int idx3 = ct2.indexOf("</object>");
095: if (idx3 != -1)
096: ct += content.substring(ct.length(), ct
097: .length()
098: + idx3 + 9);
099: else
100: ct = ct.substring(0, idx1);
101: }
102: }
103: if (ct != null && content != null) {
104: //System.out.println(ct);
105: int idx1 = ct
106: .toLowerCase()
107: .lastIndexOf(
108: "<div align='right'><font color='#cccccc' size='1'>[edit");
109: int idx2 = ct.toLowerCase().lastIndexOf(
110: "]</font></div>");
111: //System.out.println("idx1="+idx1+",idx2="+idx2);
112: if ((idx1 >= 0 && idx2 == -1) || idx1 > idx2) {
113: String ct2 = content.substring(ct.length());
114: int idx3 = ct2.toLowerCase().indexOf(
115: "]</font></div>");
116: if (idx3 != -1)
117: ct += content.substring(ct.length(), ct
118: .length()
119: + idx3 + 14);
120: else
121: ct = ct.substring(0, idx1);
122: }
123: }
124: if (ct != null && content != null) {
125: Parser parser = Parser.createParser(new String(ct
126: .getBytes(), ISO8859_1));
127: Node[] tables = parser
128: .extractAllNodesThatAre(TableTag.class);
129: if (tables != null && tables.length > 0) {
130: TableTag tableTag = (TableTag) tables[0];
131: ct = ct.substring(0, tableTag.getStartPosition())
132: + new String(tableTag.toHtml().getBytes(
133: ISO8859_1));
134: }
135: }
136: pc_len = ct.length();
137: } catch (NullPointerException e) {
138: } catch (Exception e) {
139: e.printStackTrace();
140: }
141: return ct;
142: }
143:
144: int pc_len = -1;
145:
146: public String getOtherContent() {
147: if (content != null) {
148: if (pc_len == -1)
149: pc_len = getPreviewContent().length();
150: int pl = (pc_len == -1) ? 0 : pc_len;
151: int cl = getContent().length() - pl;
152: if (cl > 0)
153: return content.substring(pl);
154: }
155: return null;
156: }
157:
158: public String getContent() {
159: return content;
160: }
161:
162: public String getWmlContent() {
163: try {
164: return extractText(content);
165: } catch (Exception e) {
166: }
167: return null;
168: }
169:
170: public void setContent(String content) {
171: this .content = content;
172: }
173:
174: /**
175: * 抽取纯文本信息
176: *
177: * @param inputHtml
178: * @return
179: */
180: protected static String extractText(String inputHtml)
181: throws Exception {
182: StringBuffer text = new StringBuffer();
183: Parser parser = Parser.createParser(new String(inputHtml
184: .getBytes(), ISO8859_1));
185: //遍历所有的节点
186: NodeList nodes = parser
187: .extractAllNodesThatMatch(new NodeFilter() {
188: public boolean accept(Node node) {
189: return true;
190: }
191: });
192: for (int i = 0; i < nodes.size(); i++) {
193: Node node = nodes.elementAt(i);
194: text.append(new String(node.toPlainTextString().getBytes(
195: ISO8859_1)));
196: }
197: return text.toString();
198: }
199:
200: public static void main(String[] args) {
201: String ct = "点击<A href=\"http://www.javayou.com/dlog/showlog.asp?log_id=534\" target=_blank><FONT color=#ff0000 size=3><STRONG>这里</STRONG></FONT></A>查看 <DIV align=right><FONT color=#cccccc size=1>[Edit on 2004-03-11 14:18:31 By 管理员]</FONT></DIV>";
202: ContentPreviewForm cpf = new ContentPreviewForm();
203: cpf.setContent(ct);
204: System.out.println(cpf.getPreviewContent());
205: }
206:
207: private final static String ISO8859_1 = "8859_1";
208: }
|