001: package com.ivata.groupware.web.format;
002:
003: import com.ivata.mask.util.StringHandling;
004: import com.ivata.mask.web.format.CharacterEntityFormat;
005: import com.ivata.mask.web.format.HTMLFormat;
006: import com.ivata.mask.web.format.HTMLFormatter;
007:
008: import org.apache.log4j.Logger;
009:
010: import org.dom4j.DocumentException;
011:
012: import org.dom4j.io.SAXReader;
013: import org.dom4j.io.XMLWriter;
014:
015: import org.w3c.dom.Comment;
016: import org.w3c.dom.Document;
017: import org.w3c.dom.Element;
018: import org.w3c.dom.EntityReference;
019: import org.w3c.dom.Node;
020: import org.w3c.dom.NodeList;
021: import org.w3c.dom.Text;
022:
023: import org.w3c.tidy.Tidy;
024:
025: import java.io.ByteArrayInputStream;
026: import java.io.ByteArrayOutputStream;
027: import java.io.IOException;
028: import java.io.InputStream;
029: import java.io.UnsupportedEncodingException;
030:
031: import java.util.Iterator;
032:
033: /*
034: * Copyright (c) 2001 - 2005 ivata limited.
035: * All rights reserved.
036: * -----------------------------------------------------------------------------
037: * ivata groupware may be redistributed under the GNU General Public
038: * License as published by the Free Software Foundation;
039: * version 2 of the License.
040: *
041: * These programs are free software; you can redistribute them and/or
042: * modify them under the terms of the GNU General Public License
043: * as published by the Free Software Foundation; version 2 of the License.
044: *
045: * These programs are distributed in the hope that they will be useful,
046: * but WITHOUT ANY WARRANTY; without even the implied warranty of
047: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
048: *
049: * See the GNU General Public License in the file LICENSE.txt for more
050: * details.
051: *
052: * If you would like a copy of the GNU General Public License write to
053: *
054: * Free Software Foundation, Inc.
055: * 59 Temple Place - Suite 330
056: * Boston, MA 02111-1307, USA.
057: *
058: *
059: * To arrange commercial support and licensing, contact ivata at
060: * http://www.ivata.com/contact.jsp
061: * -----------------------------------------------------------------------------
062: * $Log: SanitizerFormat.java,v $
063: * Revision 1.8 2005/10/12 18:36:41 colinmacleod
064: * Standardized format of Logger declaration - to make it easier to find instances
065: * which are not both static and final.
066: *
067: * Revision 1.7 2005/10/11 18:57:47 colinmacleod
068: * Fixed some checkstyle and javadoc issues.
069: *
070: * Revision 1.6 2005/10/02 14:08:57 colinmacleod
071: * Added/improved log4j logging.
072: *
073: * Revision 1.5 2005/09/14 15:38:34 colinmacleod
074: * Removed unused local and class variables.
075: *
076: * Revision 1.4 2005/04/30 13:07:31 colinmacleod
077: * Added EntityResolver so you don't need an
078: * internet connection.
079: *
080: * Revision 1.3 2005/04/10 20:32:00 colinmacleod
081: * Added new themes.
082: * Changed id type to String.
083: * Changed i tag to em and b tag to strong.
084: * Improved PicoContainerFactory with NanoContainer scripts.
085: *
086: * Revision 1.2 2005/04/09 17:19:42 colinmacleod
087: * Changed copyright text to GPL v2 explicitly.
088: *
089: * Revision 1.1.1.1 2005/03/10 17:49:53 colinmacleod
090: * Restructured ivata op around Hibernate/PicoContainer.
091: * Renamed ivata groupware.
092: *
093: * Revision 1.7 2004/11/03 15:54:32 colinmacleod
094: * Changed todo comments to TODO: all caps.
095: *
096: * Revision 1.6 2004/09/30 14:58:06 colinmacleod
097: * Bugfixes for documents with no surrounding tags.
098: * Added log4j.
099: *
100: * Revision 1.5 2004/08/01 11:54:07 colinmacleod
101: * Removed ivata groupware custom HTML parser in favor of JTidy.
102: *
103: * Revision 1.4 2004/07/13 19:48:08 colinmacleod
104: * Moved project to POJOs from EJBs.
105: * Applied PicoContainer to services layer (replacing session EJBs).
106: * Applied Hibernate to persistence layer (replacing entity EJBs).
107: *
108: * Revision 1.3 2004/03/21 21:16:27 colinmacleod
109: * Shortened name to ivata op.
110: *
111: * Revision 1.2 2004/02/01 22:07:30 colinmacleod
112: * Added full names to author tags
113: *
114: * Revision 1.1.1.1 2004/01/27 20:58:30 colinmacleod
115: * Moved ivata openportal to SourceForge..
116: *
117: * Revision 1.2 2003/10/15 14:15:36 colin
118: * fixing for XDoclet
119: *
120: * Revision 1.2 2003/05/06 13:42:25 peter
121: * added embedded IMG attachments functionality
122: *
123: * Revision 1.1 2003/02/24 19:33:33 colin
124: * moved to jsp
125: *
126: * Revision 1.2 2003/02/04 17:43:46 colin
127: * copyright notice
128: *
129: * Revision 1.1 2002/08/10 21:17:48 colin
130: * first version of HTML sanitizer/parser to clean up HTML code
131: */
132:
133: /**
134: * <p>
135: * This class uses the parser defined in {@linkcom.ivata.groupware.web.parser}
136: * to tidy up the HTML and posibly convert it to text-only.
137: * </p>
138: *
139: * @since 2002-08-10
140: * @author Colin MacLeod
141: * <a href='mailto:colin.macleod@ivata.com'>colin.macleod@ivata.com</a>
142: * @version $Revision: 1.8 $
143: * @see com.ivata.groupware.web.parser
144: */
145: public class SanitizerFormat implements HTMLFormat {
146: /**
147: * <p>
148: * <strong>Log4J</strong> logger.
149: * </p>
150: */
151: private static final Logger logger = Logger
152: .getLogger(SanitizerFormat.class);
153:
154: /**
155: * <p>Used to convert character entities back again in text mode.</p>
156: */
157: private CharacterEntityFormat characterEntities = new CharacterEntityFormat();
158:
159: /**
160: * <p>
161: * Stores whether or not <code>format</code> should return just plain
162: * text. If <code>true</code>, only text is returned, otherwise formatted
163: * HTML is returned.
164: * </p>
165: */
166: private boolean formattedText = false;
167:
168: /**
169: * <p>Used to convert character entities back again in text mode.</p>
170: */
171: private HTMLFormatter formatter = new HTMLFormatter();
172:
173: /**
174: * <p>
175: * If <code>true</code> then only the contents of the body tag are returned.
176: * </p>
177: */
178: private boolean onlyBodyContents = false;
179:
180: /**
181: * <p>
182: * Stores name of the source or file to output for debugging.
183: * </p>
184: */
185: private String sourceName = "user input";
186:
187: /**
188: * <p>
189: * Remember whether or not we're at the start of a line in a text file.
190: * </p>
191: */
192: private boolean textAtStartOfLine = true;
193:
194: /**
195: * <p>Remember how many newlines we've made in a text file.</p>
196: */
197: private int textNewLineCount = 0;
198:
199: /**
200: * <p>
201: * Stores whether or not <code>format</code> should return just plain
202: * text with line feeds and converted horizonal rule. If <code>true</code>,
203: * fomratted text is returned, otherwise formatted
204: * HTML is returned.
205: * </p>
206: */
207: private boolean textOnly = false;
208:
209: /**
210: * <p>
211: * This tidy instance does all the hard work.
212: * </p>
213: */
214: private Tidy tidy = new Tidy();
215:
216: /**
217: * <p>
218: * Default constructor.
219: * </p>
220: */
221: public SanitizerFormat() {
222: tidy.setBreakBeforeBR(true);
223: tidy.setIndentContent(true);
224: tidy.setMakeClean(true);
225: tidy.setOnlyErrors(true);
226: tidy.setQuiet(true);
227: tidy.setUpperCaseAttrs(false);
228: tidy.setUpperCaseTags(false);
229: tidy.setXmlOut(true);
230:
231: // these objects are used to convert character entities back again
232: characterEntities.setReverse(true);
233: formatter.add(characterEntities);
234: }
235:
236: /**
237: * <p>Convert an closing tag element to text.</p>
238: *
239: * @param element element which is closed.
240: * @param buffer <code>PrintWriter</code> to send the results to.
241: */
242: private void addCloseElementAsText(final Element element,
243: final StringBuffer buffer) {
244: if (logger.isDebugEnabled()) {
245: logger.debug("addCloseElementAsText(Element element = "
246: + element + ", StringBuffer buffer = " + buffer
247: + ") - start");
248: }
249:
250: // follow table cells with a tab
251: if (element.getTagName().equals("A")) {
252: // see what the link was
253: if (element.hasAttribute("href")) {
254: notTextNewLine();
255: buffer
256: .append(" (" + element.getAttribute("href")
257: + ")");
258: }
259: } else if (element.getTagName().equals("HR")
260: || element.getTagName().equals("H1")
261: || element.getTagName().equals("H2")
262: || element.getTagName().equals("H3")
263: || element.getTagName().equals("H4")
264: || element.getTagName().equals("H5")
265: || element.getTagName().equals("H6")) {
266: addTextNewLine(buffer);
267: buffer
268: .append("____________________________________________________________");
269: buffer.append("\n");
270: } else if (element.getTagName().equals("B")
271: || element.getTagName().equals("BIG")
272: || element.getTagName().equals("EM")
273: || element.getTagName().equals("I")
274: || element.getTagName().equals("STRONG")
275: || element.getTagName().equals("U")) {
276: notTextNewLine();
277: buffer.append("__");
278: } else if (element.getTagName().equals("TR")
279: || element.getTagName().equals("TD")
280: || element.getTagName().equals("TH")
281: || element.getTagName().equals("P")
282: || element.getTagName().equals("BR")
283: || element.getTagName().equals("CITE")
284: || element.getTagName().equals("LI")
285: || element.getTagName().equals("BLOCKQUOTE")) {
286: addTextNewLine(buffer);
287: }
288:
289: if (logger.isDebugEnabled()) {
290: logger
291: .debug("addCloseElementAsText(Element, StringBuffer) - end");
292: }
293: }
294:
295: /**
296: * <p>Convert an open tag element to text.</p>
297: *
298: * @param element element which is opened.
299: * @param buffer <code>PrintWriter</code> to send the results to.
300: */
301: private void addOpenElementAsText(final Element element,
302: final StringBuffer buffer) {
303: if (logger.isDebugEnabled()) {
304: logger.debug("addOpenElementAsText(Element element = "
305: + element + ", StringBuffer buffer = " + buffer
306: + ") - start");
307: }
308:
309: // precede some tags with a character in read-only mode
310: if (element.getTagName().equals("BLOCKQUOTE")
311: || element.getTagName().equals("CITE")
312: || element.getTagName().equals("H1")
313: || element.getTagName().equals("H2")
314: || element.getTagName().equals("H3")
315: || element.getTagName().equals("H4")
316: || element.getTagName().equals("H5")
317: || element.getTagName().equals("H6")
318: || element.getTagName().equals("OL")
319: || element.getTagName().equals("UL")
320: || element.getTagName().equals("TABLE")
321: || element.getTagName().equals("P")
322: || element.getTagName().equals("CITE")
323: || element.getTagName().equals("BLOCKQUOTE")) {
324: addTextNewLine(buffer);
325: } else if (element.getTagName().equals("B")
326: || element.getTagName().equals("BIG")
327: || element.getTagName().equals("EM")
328: || element.getTagName().equals("I")
329: || element.getTagName().equals("STRONG")
330: || element.getTagName().equals("U")) {
331: notTextNewLine();
332: buffer.append("__");
333: } else if (element.getTagName().equals("LI")) {
334: // TODO: work buffer somehow if it is ol or ul
335: addTextNewLine(buffer);
336: notTextNewLine();
337: buffer.append(" * ");
338: } else if (element.getTagName().equals("IMG")) {
339: // see if there is an alternate text for this image
340: if (element.hasAttribute("alt")) {
341: notTextNewLine();
342: buffer.append(formatter.format(element.getAttribute(
343: "alt").trim()));
344: } else if (element.hasAttribute("title")) {
345: notTextNewLine();
346: buffer.append(formatter.format(element.getAttribute(
347: "title").trim()));
348: }
349: }
350:
351: if (logger.isDebugEnabled()) {
352: logger
353: .debug("addOpenElementAsText(Element, StringBuffer) - end");
354: }
355: }
356:
357: /**
358: * <p>Write a text new line.</p>
359: *
360: * @param bufferParam the buffer to add the text new line to.
361: */
362: private void addTextNewLine(final StringBuffer bufferParam) {
363: if (logger.isDebugEnabled()) {
364: logger.debug("addTextNewLine(StringBuffer buffer = "
365: + bufferParam + ") - start");
366: }
367:
368: if (textNewLineCount < 2) {
369: textAtStartOfLine = true;
370: bufferParam.append("\n");
371: ++textNewLineCount;
372: }
373:
374: if (logger.isDebugEnabled()) {
375: logger.debug("addTextNewLine(StringBuffer) - end");
376: }
377: }
378:
379: /**
380: * <p>Add a string representation of the given element to the buffer.</p>
381: *
382: * @param node node to add, and to add all of the children for.
383: * @param buffer <code>PrintWriter</code> to send the results to.
384: * @throws IOException If there is a problem adding to the write buffer.
385: */
386: private void addToBuffer(final Node node, final StringBuffer buffer)
387: throws IOException {
388: if (logger.isDebugEnabled()) {
389: logger
390: .debug("addToBuffer(Node node = " + node
391: + ", StringBuffer buffer = " + buffer
392: + ") - start");
393: }
394:
395: Element element = null;
396:
397: if (formattedText && Element.class.isInstance(node)) {
398: element = (Element) node;
399: addOpenElementAsText(element, buffer);
400: } else if (formattedText && Comment.class.isInstance(node)) {
401: // ignore comments in text mode
402: if (logger.isDebugEnabled()) {
403: logger
404: .debug(("addToBuffer - Node is a comment, so ignoring."));
405: }
406: } else if (formattedText
407: && EntityReference.class.isInstance(node)) {
408: EntityReference entity = (EntityReference) node;
409: buffer.append("&");
410: buffer.append(entity.getNodeName());
411: buffer.append(";");
412: } else if (Text.class.isInstance(node)) {
413: Text text = (Text) node;
414: String data = text.getData();
415: StringBuffer dataReformatted = new StringBuffer();
416:
417: if (data != null) {
418: // strip buffer any funny characters and double spaces
419: int length = data.length();
420: boolean lastWasSpace = false;
421:
422: for (int index = 0; index < length; ++index) {
423: // newlines, carriage returns and tabs are all spaces now
424: if ((data.charAt(index) == '\n')
425: || (data.charAt(index) == '\r')
426: || (data.charAt(index) == ' ')
427: || (data.charAt(index) == '\t')) {
428: // ignore double spaces
429: if (!lastWasSpace) {
430: lastWasSpace = true;
431:
432: if (!textAtStartOfLine) {
433: dataReformatted.append(' ');
434: }
435: }
436: } else {
437: lastWasSpace = false;
438: dataReformatted.append(data.charAt(index));
439: }
440: }
441:
442: if (!(data = dataReformatted.toString()).equals("")) {
443: buffer.append(formatter.format(data));
444: notTextNewLine();
445: }
446: }
447: } else {
448: String value = node.getNodeValue();
449:
450: if (!StringHandling.isNullOrEmpty(value)) {
451: notTextNewLine();
452: buffer.append(value);
453: }
454: }
455:
456: // if that doesn't work, try the children
457: if (node.hasChildNodes() && ((element == null) ||
458: // these are the tags to ignore the contents of in text mode
459: (!element.getTagName().equals("APPLET")
460: && !element.getTagName().equals("EMBED") && !element
461: .getTagName().equals("SCRIPT")))) {
462: NodeList children = node.getChildNodes();
463:
464: for (int index = 0; index < children.getLength(); ++index) {
465: Node nextChild = children.item(index);
466: addToBuffer(nextChild, buffer);
467: }
468: }
469:
470: // in text only mode, certain elements are followed by a special
471: // character
472: if (element != null) {
473: addCloseElementAsText(element, buffer);
474: }
475:
476: if (logger.isDebugEnabled()) {
477: logger.debug("addToBuffer(Node, StringBuffer) - end");
478: }
479: }
480:
481: /**
482: * <p>
483: * Internal method which converts the <strong>HTML</strong> into plain text.
484: * </p>
485: *
486: * @param document Root <strong>HTML</strong> element to be converted.
487: * @return Plain text matching the <strong>HTML</strong>.
488: */
489: private String convertToText(final Document document) {
490: if (logger.isDebugEnabled()) {
491: logger.debug("convertToText(Document document = "
492: + document + ") - start");
493: }
494:
495: StringBuffer buffer = new StringBuffer();
496:
497: try {
498: addToBuffer(document, buffer);
499: } catch (IOException e) {
500: logger.error("convertToText(Document)", e);
501:
502: e.printStackTrace();
503:
504: String returnString = "ERROR: " + e.getMessage();
505:
506: if (logger.isDebugEnabled()) {
507: logger
508: .debug("convertToText(Document) - end - return value = "
509: + returnString);
510: }
511:
512: return returnString;
513: }
514:
515: String returnString = buffer.toString();
516:
517: if (logger.isDebugEnabled()) {
518: logger
519: .debug("convertToText(Document) - end - return value = "
520: + returnString);
521: }
522:
523: return returnString;
524: }
525:
526: /**
527: * <p>
528: * Format the string given in <code>hTMLText</code> and clean up the
529: * syntax of the HTML.
530: * </p>
531: *
532: * @param hTMLTextParam
533: * the text to truncate.
534: * @return Cleaned HTML text.
535: */
536: public String format(final String hTMLTextParam) {
537: if (logger.isDebugEnabled()) {
538: logger.debug("format(String hTMLTextParam = "
539: + hTMLTextParam + ") - start");
540: }
541:
542: if (hTMLTextParam == null) {
543: if (logger.isDebugEnabled()) {
544: logger.debug("Null input received - returning null.");
545: }
546:
547: return null;
548: }
549:
550: if (hTMLTextParam.trim().length() == 0) {
551: if (logger.isDebugEnabled()) {
552: logger
553: .debug("Empty input received - returning input unchanged.");
554: }
555:
556: return hTMLTextParam;
557: }
558:
559: // basic sanity check - if there is no HTML tag, assume we only have
560: // body content.
561: String lowerCaseText = hTMLTextParam.toLowerCase();
562: boolean hasHTMLTag = lowerCaseText.indexOf("<HTML") != -1;
563: String hTMLText;
564:
565: if (!hasHTMLTag) {
566: if (logger.isDebugEnabled()) {
567: logger.debug("format - No HTML tag found - "
568: + "surrounding everything with HTML and BODY.");
569: }
570:
571: StringBuffer newHTMLText = new StringBuffer();
572: newHTMLText
573: .append("<HTML><head><title></title></head><body>");
574: newHTMLText.append(hTMLTextParam);
575: newHTMLText.append("</body></HTML>");
576: hTMLText = newHTMLText.toString();
577: } else {
578: hTMLText = hTMLTextParam;
579: }
580:
581: // TOTAL HACK to convert JSP tags to entities
582: if (hTMLText.indexOf("<%") != -1) {
583: hTMLText = hTMLText.replaceAll("<%", "<%");
584: }
585:
586: if (hTMLText.indexOf("%>") != -1) {
587: hTMLText = hTMLText.replaceAll("%>", "%>");
588: }
589:
590: InputStream inStream = new ByteArrayInputStream(hTMLText
591: .getBytes());
592: Document document = tidy.parseDOM(inStream, null);
593:
594: if (textOnly) {
595: if (logger.isDebugEnabled()) {
596: logger.debug("Converting document to text.");
597: }
598:
599: return convertToText(document);
600: } else {
601: ByteArrayOutputStream outStream = new ByteArrayOutputStream();
602: tidy.pprint(document, outStream);
603:
604: if (onlyBodyContents
605: && (outStream.toString().trim().length() > 0)) {
606: SAXReader saxReader = new SAXReader();
607: String text = outStream.toString();
608:
609: // EVEN BIGGER HACK to remove previous over-zealous dash
610: // replacement
611: if (text.indexOf("−") != -1) {
612: text = text.replaceAll("−", "-");
613: }
614:
615: inStream = new ByteArrayInputStream(text.getBytes());
616:
617: org.dom4j.Document dom4jDocument;
618:
619: try {
620: dom4jDocument = saxReader.read(inStream);
621: } catch (DocumentException e) {
622: logger
623: .error(
624: "Error ("
625: + e.getClass().getName()
626: + ") reading the document back in after Tidy:\n"
627: + outStream.toString(), e);
628: throw new RuntimeException(e);
629: }
630:
631: org.dom4j.Element rootElement = dom4jDocument
632: .getRootElement();
633: org.dom4j.Element bodyElement = rootElement
634: .element("body");
635:
636: if (bodyElement == null) {
637: if (logger.isDebugEnabled()) {
638: logger
639: .debug("format(String) - end - return value = "
640: + null);
641: }
642:
643: return null;
644: }
645:
646: outStream = new ByteArrayOutputStream();
647:
648: XMLWriter writer;
649:
650: try {
651: writer = new XMLWriter(outStream,
652: new org.dom4j.io.OutputFormat("", true));
653: } catch (UnsupportedEncodingException e) {
654: logger
655: .error(
656: "Error ("
657: + e.getClass().getName()
658: + ") creating the document to write back out.",
659: e);
660: throw new RuntimeException(e);
661: }
662:
663: Iterator bodyNodeIterator = bodyElement.nodeIterator();
664:
665: while (bodyNodeIterator.hasNext()) {
666: try {
667: writer.write((org.dom4j.Node) bodyNodeIterator
668: .next());
669: } catch (IOException e) {
670: logger.error("Error (" + e.getClass().getName()
671: + ") writing the body back out:\n"
672: + bodyElement.asXML(), e);
673: throw new RuntimeException(e);
674: }
675: }
676: }
677:
678: String returnString = outStream.toString();
679:
680: if (logger.isDebugEnabled()) {
681: logger.debug("format(String) - end - return value = "
682: + returnString);
683: }
684:
685: return returnString;
686: }
687: }
688:
689: /**
690: * <p>
691: * Get the name of the source or file, used for debugging.
692: * </p>
693: *
694: * @return the current value of the source name, output by the parser for
695: * debugging.
696: */
697: public final String getSourceName() {
698: if (logger.isDebugEnabled()) {
699: logger.debug("getSourceName() - start");
700: }
701:
702: if (logger.isDebugEnabled()) {
703: logger.debug("getSourceName() - end - return value = "
704: + sourceName);
705: }
706:
707: return sourceName;
708: }
709:
710: /**
711: * <p>
712: * Stores whether or not <code>format</code> should return just plain
713: * text with line feeds and converted horizonal rule. If <code>true</code>,
714: * fomratted text is returned, otherwise formatted
715: * HTML is returned.
716: * </p>
717: *
718: * @return Returns formattedText.
719: */
720: public boolean isFormattedText() {
721: if (logger.isDebugEnabled()) {
722: logger.debug("isFormattedText() - start");
723: }
724:
725: if (logger.isDebugEnabled()) {
726: logger.debug("isFormattedText() - end - return value = "
727: + formattedText);
728: }
729:
730: return formattedText;
731: }
732:
733: /**
734: * <p>
735: * Get whether or not the parser will only return plain text.
736: * </p>
737: *
738: * @return <code>true</code> if the parser will only return plain text,
739: * otherwise <code>false</code>.
740: */
741: public boolean isTextOnly() {
742: if (logger.isDebugEnabled()) {
743: logger.debug("isTextOnly() - start");
744: }
745:
746: if (logger.isDebugEnabled()) {
747: logger.debug("isTextOnly() - end - return value = "
748: + textOnly);
749: }
750:
751: return textOnly;
752: }
753:
754: /**
755: * <p>Write something other than a new line.</p>
756: */
757: private void notTextNewLine() {
758: if (logger.isDebugEnabled()) {
759: logger.debug("notTextNewLine() - start");
760: }
761:
762: textNewLineCount = 0;
763: textAtStartOfLine = false;
764:
765: if (logger.isDebugEnabled()) {
766: logger.debug("notTextNewLine() - end");
767: }
768: }
769:
770: /**
771: * <p>
772: * Stores whether or not <code>format</code> should return just plain
773: * text with line feeds and converted horizonal rule. If <code>true</code>,
774: * fomratted text is returned, otherwise formatted
775: * HTML is returned.
776: * </p>
777: *
778: * @param formattedTextParam The new value of formattedText to set.
779: */
780: public final void setFormattedText(final boolean formattedTextParam) {
781: if (logger.isDebugEnabled()) {
782: logger.debug("setFormattedText(boolean formattedText = "
783: + formattedTextParam + ") - start");
784: }
785:
786: this .formattedText = formattedTextParam;
787:
788: if (logger.isDebugEnabled()) {
789: logger.debug("setFormattedText(boolean) - end");
790: }
791: }
792:
793: /**
794: * <p>
795: * If <code>true</code> then only the contents of the body tag are returned.
796: * </p>
797: *
798: * @param onlyChildren set to <code>true</code> to specify that the parser
799: * should only include the contents of the body tag.
800: */
801: public final void setOnlyBodyContents(final boolean onlyChildren) {
802: if (logger.isDebugEnabled()) {
803: logger.debug("setOnlyBodyContents(boolean onlyChildren = "
804: + onlyChildren + ") - start");
805: }
806:
807: this .onlyBodyContents = onlyChildren;
808:
809: if (logger.isDebugEnabled()) {
810: logger.debug("setOnlyBodyContents(boolean) - end");
811: }
812: }
813:
814: /**
815: * <p>
816: * Set the name of the source or file, used for debugging.
817: * </p>
818: *
819: * @param sourceNameParam
820: * the current value of the source name, output by the parser for
821: * debugging.
822: */
823: public final void setSourceName(final String sourceNameParam) {
824: if (logger.isDebugEnabled()) {
825: logger.debug("setSourceName(String sourceName = "
826: + sourceNameParam + ") - start");
827: }
828:
829: this .sourceName = sourceNameParam;
830:
831: if (logger.isDebugEnabled()) {
832: logger.debug("setSourceName(String) - end");
833: }
834: }
835:
836: /**
837: * <p>
838: * Set whether or not the parser should only return plain text.
839: * </p>
840: *
841: * @param textOnlyParam
842: * set to <code>true</code> if the parser should only return
843: * plain text, otherwise <code>false</code>.
844: */
845: public final void setTextOnly(final boolean textOnlyParam) {
846: if (logger.isDebugEnabled()) {
847: logger.debug("setTextOnly(boolean textOnly = "
848: + textOnlyParam + ") - start");
849: }
850:
851: this .textOnly = textOnlyParam;
852:
853: if (logger.isDebugEnabled()) {
854: logger.debug("setTextOnly(boolean) - end");
855: }
856: }
857: }
|