001: /*
002:
003: This software is OSI Certified Open Source Software.
004: OSI Certified is a certification mark of the Open Source Initiative.
005:
006: The license (Mozilla version 1.0) can be read at the MMBase site.
007: See http://www.MMBase.org/license
008:
009: */
010: package org.mmbase.util.transformers;
011:
012: import java.io.*;
013: import java.util.*;
014: import java.util.regex.Matcher;
015: import java.util.regex.Pattern;
016:
017: import org.mmbase.util.logging.*;
018:
019: /**
020: * Replace 1 or more spaces by 1 space, and 1 or more newlines by 1
021: * newline. Any other combination of newlines and spaces is replaced
022: * by one newline.
023: *
024: * Except if they are in between "<pre>" and "</pre>". (Note: perhaps this last behaviour should be made
025: * configurable).
026: *
027: * @author Michiel Meeuwissen
028: * @author Ernst Bunders
029: * @since MMBase-1.7
030: * @version $Id: SpaceReducer.java,v 1.20 2008/03/11 12:43:05 ernst Exp $
031: */
032:
033: public class SpaceReducer extends BufferedReaderTransformer implements
034: CharTransformer {
035:
036: private static Logger log = Logging
037: .getLoggerInstance(SpaceReducer.class);
038:
039: @Override
040: protected boolean transform(
041: PrintWriter bw,
042: String line,
043: org.mmbase.util.transformers.BufferedReaderTransformer.Status status) {
044:
045: SpaceReducerStatus srStatus = (SpaceReducerStatus) status;
046: List<Tag> tagsToPass = srStatus.getTagsToPass();
047: boolean result = false;
048:
049: if (!line.trim().equals("")
050: || srStatus.getCurrentlyOpen() != null) {
051: bw.write(line);
052: result = true;
053: }
054: if (srStatus.getCurrentlyOpen() != null) {
055: //look for a closing tag.
056: srStatus.getCurrentlyOpen().setLine(line);
057: if (srStatus.getCurrentlyOpen().hasClosed()) {
058: srStatus.setCurrentlyOpen(null);
059: }
060: } else {
061: //look for an opening tag
062: for (Tag tag : tagsToPass) {
063: tag.setLine(line);
064: if (tag.hasOpened()) {
065: srStatus.setCurrentlyOpen(tag);
066: break;
067: }
068: }
069: }
070: return result;
071: }
072:
073: /**
074: * This was the original, now unused implementation (not efficient enough)
075: */
076: protected Writer transform2(Reader r, Writer w) {
077:
078: int space = 1; // 'open' spaces (on this line)
079: int nl = 1; // 'open' newlines
080: // we start at 1, rather then 0, because in that way, all leading space is deleted too
081:
082: StringBuilder indent = new StringBuilder(); // 'open' indentation of white-space
083: int l = 0; // number of non-white-space (letter) on the current line
084:
085: int lines = 0; // for debug: the total number of lines read.
086: try {
087: log.debug("Starting spacereducing");
088: int c = r.read();
089: while (c != -1) {
090: if (c == '\n' || c == '\r') {
091: if (nl == 0)
092: w.write('\n');
093: nl++;
094: l = 0;
095: space = 0;
096: indent.setLength(0);
097: } else if (Character.isWhitespace((char) c)) {
098: if (space == 0 && l > 0)
099: w.write(' ');
100: if (l == 0)
101: indent.append((char) c);
102: space++;
103: } else {
104: if (l == 0 && space > 0) {
105: w.write(indent.toString());
106: indent.setLength(0);
107: }
108: space = 0;
109: lines += nl;
110: nl = 0;
111: l++;
112: w.write(c);
113: }
114: c = r.read();
115: }
116: log.debug("Finished: read " + lines + " lines");
117: } catch (java.io.IOException e) {
118: log.error(e.toString());
119: }
120: return w;
121: }
122:
123: @Override
124: public String toString() {
125: return "SPACEREDUCER";
126: }
127:
128: /**
129: * this is a helper class that can check if a tag was opened or closed in a line of text
130: * It first removes all bodyless versions of the tag from the line, and then counts all opening and
131: * closing occurrences of the tag.
132: * This will not work if an opening or closing tag is partly written on the next line, so it's not perfect.
133: * <ul>
134: * <li>have no body
135: * <li>can be opened and closed multiple times in one line.
136: * </ul>
137: * @author ebunders
138: *
139: */
140: private static class Tag {
141: private boolean hasOpened = false;
142: private boolean hasClosed = false;
143: private Pattern openingPattern;
144: private Pattern closingPattern;
145: private Pattern noBodyPattern;
146: private String name;
147:
148: public Tag(String name) {
149: openingPattern = Pattern.compile("<[\\s]*" + name
150: + "(\\s+[a-zA-Z]+\\=\"[\\S]+\")*\\s*>",
151: Pattern.CASE_INSENSITIVE);
152: closingPattern = Pattern.compile("<[\\s]*/\\s*" + name
153: + "\\s*>", Pattern.CASE_INSENSITIVE);
154: noBodyPattern = Pattern.compile("<[\\s]*" + name
155: + "\\s+([a-zA-Z]+\\=\"[\\S]+\")*\\s*/\\s*>",
156: Pattern.CASE_INSENSITIVE);
157: this .name = name;
158: }
159:
160: public void setLine(String line) {
161: //remove the bodyless versions of the tag from this line (if they exist, which they should not)
162: line = removeTagsWithoutBody(line);
163:
164: //count the opening and closing versions of the tag
165: int opening = countOccurences(openingPattern, line);
166: int closing = countOccurences(closingPattern, line);
167: hasOpened = opening > closing;
168: hasClosed = closing > opening;
169: }
170:
171: private int countOccurences(Pattern pattern, String line) {
172: Matcher m = pattern.matcher(line);
173: int counter = 0;
174: while (m.find() && counter < 5) {
175: counter++;
176: line = line.substring(m.end(), line.length());
177: m = pattern.matcher(line);
178: }
179: return counter;
180: }
181:
182: /**
183: * remove all the occurrences of bodyless versions of the tag
184: * they should not be there, but for safety
185: *
186: * @param line
187: * @return
188: */
189: private String removeTagsWithoutBody(String line) {
190: Matcher m = noBodyPattern.matcher(line);
191: while (m.find()) {
192: line = line.substring(0, m.start())
193: + line.substring(m.end(), line.length());
194: m = noBodyPattern.matcher(line);
195: }
196: return line;
197: }
198:
199: public boolean hasOpened() {
200: return hasOpened;
201: }
202:
203: public boolean hasClosed() {
204: return hasClosed;
205: }
206:
207: public String toString() {
208: return name;
209: }
210: }
211:
212: @Override
213: public Status createNewStatus() {
214: return (Status) new SpaceReducerStatus();
215: }
216:
217: public static class SpaceReducerStatus extends Status {
218: private List<Tag> tagsToPass = new ArrayList<Tag>();
219: private Tag currentlyOpen = null;
220:
221: public SpaceReducerStatus() {
222: tagsToPass.add(new Tag("pre"));
223: tagsToPass.add(new Tag("textarea"));
224: }
225:
226: public List<Tag> getTagsToPass() {
227: return tagsToPass;
228: }
229:
230: public Tag getCurrentlyOpen() {
231: return currentlyOpen;
232: }
233:
234: public void setCurrentlyOpen(Tag currentlyOpen) {
235: this .currentlyOpen = currentlyOpen;
236: }
237: }
238:
239: /**
240: * method to test the tag class
241: * TODO: this should be a unit test
242: * @param args
243: */
244: public static void main(String[] args) {
245: test("bladie hallo<pre> en nog wat");
246: test("bladie hallo<pre> en nog wat<pre>daarna");
247: test("bladie hallo<pre> en nog wat< / pre>< pre> <p>jaja</p> <a href=\"nogwat\">jaja</a>");
248: test("jaja</pre>");
249: test("jaja</pre> <pre> hoera</pre><p>test</p>");
250: test("jaja<pre>bla <pre /></pre>filter out bodyless tags");
251: System.out.println("FINISED");
252: }
253:
254: public static void test(String line) {
255: System.out.println("testing line: " + line);
256: Tag tag = new Tag("pre");
257: tag.setLine(line);
258: System.out.println("opening: " + tag.hasOpened()
259: + " :: closed: " + tag.hasClosed());
260: System.out.println("****************\n");
261: }
262: }
|