001: // Jericho HTML Parser - Java based library for analysing and manipulating HTML
002: // Version 2.5
003: // Copyright (C) 2007 Martin Jericho
004: // http://jerichohtml.sourceforge.net/
005: //
006: // This library is free software; you can redistribute it and/or
007: // modify it under the terms of either one of the following licences:
008: //
009: // 1. The Eclipse Public License (EPL) version 1.0,
010: // included in this distribution in the file licence-epl-1.0.html
011: // or available at http://www.eclipse.org/legal/epl-v10.html
012: //
013: // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
014: // included in this distribution in the file licence-lgpl-2.1.txt
015: // or available at http://www.gnu.org/licenses/lgpl.txt
016: //
017: // This library is distributed on an "AS IS" basis,
018: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
019: // See the individual licence texts for more details.
020:
021: package au.id.jericho.lib.html;
022:
023: import java.util.*;
024: import java.io.*;
025:
026: /**
027: * Contains miscellaneous utility methods not directly associated with the HTML Parser library.
028: */
029: public final class Util {
030: private static final int BUFFER_SIZE = 2048;
031: private static final String CSVNewLine = System
032: .getProperty("line.separator");
033:
034: private Util() {
035: }
036:
037: /**
038: * Returns the text loaded from the specified <code>Reader</code> as a string.
039: * <p>
040: * If a <code>null</code> argument is supplied to this method, an empty string is returned.
041: * <p>
042: * To load text from an <code>InputStream</code>, use <code>getString(new InputStreamReader(inputStream,encoding))</code>.
043: *
044: * @param reader the <code>java.io.Reader</code> from which to load the text.
045: * @return the text loaded from the specified <code>java.io.Reader</code> as a string.
046: * @throws java.io.IOException if an I/O error occurs.
047: */
048: public static String getString(final Reader reader)
049: throws IOException {
050: if (reader == null)
051: return "";
052: try {
053: int charsRead;
054: final char[] copyBuffer = new char[BUFFER_SIZE];
055: final StringBuffer sb = new StringBuffer();
056: while ((charsRead = reader.read(copyBuffer, 0, BUFFER_SIZE)) != -1)
057: sb.append(copyBuffer, 0, charsRead);
058: return sb.toString();
059: } finally {
060: reader.close();
061: }
062: }
063:
064: /**
065: * Outputs the specified array of strings to the specified <code>Writer</code> in the format of a line for a CSV file.
066: * <p>
067: * "CSV" stands for <i>Comma Separated Values</i>.
068: * There is no formal specification for a CSV file, so there is significant variation in
069: * the way different applications handle issues like the encoding of different data types and special characters.
070: * <p>
071: * Generally, a CSV file contains a list of records separated by line breaks, with each record consisting of a list of
072: * field values separated by commas.
073: * Each record in the file should contain the same number of field values, with the values at each position representing the same
074: * type of data in all the records. In this way the file can also be divided into columns, often with the first line of the
075: * file containing the column labels.
076: * <p>
077: * Columns can have different data types such as text, numeric, date / time and boolean.
078: * A text value is often delimited with single (<code>'</code>) or double-quotes (<code>"</code>),
079: * especially if the value contains a comma, line feed, or other special character that is significant to the syntax.
080: * Encoding techniques for including quote characters themselves in text values vary widely.
081: * Values of other types are generally unquoted to distinguish them from text values.
082: * <p>
083: * This method produces output that is readable by MS-Excel, conforming to the following rules:
084: * <p>
085: * <ul>
086: * <li>All values are considered to be of type text, except for the constants {@link Config#ColumnValueTrue}
087: * and {@link Config#ColumnValueFalse}, representing the boolean values <code>true</code> and <code>false</code> respectively.
088: * <li>All text values are enclosed in double-quotes.
089: * <li>Double-quote characters contained in text values are encoded using two consecutive double-quotes (<code>""</code>).
090: * <li><code>null</code> values are represented as empty fields.
091: * <li>The end of each record is represented by a carriage-return / line-feed (CR/LF) pair.
092: * <li>Line breaks inside text values are represented by a single line feed (LF) character.
093: * </ul>
094: *
095: * @param writer the destination <code>java.io.Writer</code> for the output.
096: * @throws java.io.IOException if an I/O error occurs.
097: * @see FormFields#getColumnLabels()
098: * @see FormFields#getColumnValues(Map)
099: */
100: public static void outputCSVLine(final Writer writer,
101: final String[] values) throws IOException {
102: for (int i = 0; i < values.length;) {
103: final String value = values[i];
104: if (value != null) {
105: if (value == Config.ColumnValueTrue
106: || value == Config.ColumnValueFalse) {
107: writer.write(value); // assumes neither ColumnTrue or ColumnFalse contain double quotes.
108: } else {
109: writer.write('"');
110: outputValueEscapeQuotes(writer, value);
111: writer.write('"');
112: }
113: }
114: if (++i != values.length)
115: writer.write(',');
116: }
117: writer.write(CSVNewLine);
118: }
119:
120: private static void outputValueEscapeQuotes(final Writer writer,
121: final String text) throws IOException {
122: for (int i = 0; i < text.length(); i++) {
123: final char ch = text.charAt(i);
124: writer.write(ch);
125: if (ch == '"')
126: writer.write(ch);
127: }
128: }
129:
130: // use this method until we can replace with java 1.5 StringBuffer.append(CharSequence s)
131: static StringBuffer appendTo(final StringBuffer sb,
132: final CharSequence s) {
133: return appendTo(sb, s, 0, s.length());
134: }
135:
136: // use this method until we can replace with java 1.5 StringBuffer.append(CharSequence s, int begin, int end)
137: static StringBuffer appendTo(final StringBuffer sb,
138: final CharSequence s, int start, final int end) {
139: while (start < end) {
140: sb.append(s.charAt(start));
141: start++;
142: }
143: return sb;
144: }
145:
146: // use this method until we can replace with java 1.5 Writer.append(CharSequence s)
147: static Writer appendTo(final Writer writer, final CharSequence s)
148: throws IOException {
149: return appendTo(writer, s, 0, s.length());
150: }
151:
152: // use this method until we can replace with java 1.5 Writer.append(CharSequence s, int begin, int end)
153: static Writer appendTo(final Writer writer, final CharSequence s,
154: int start, final int end) throws IOException {
155: while (start < end) {
156: writer.write(s.charAt(start));
157: start++;
158: }
159: return writer;
160: }
161: }
|