001: /* ====================================================================
002: Licensed to the Apache Software Foundation (ASF) under one or more
003: contributor license agreements. See the NOTICE file distributed with
004: this work for additional information regarding copyright ownership.
005: The ASF licenses this file to You under the Apache License, Version 2.0
006: (the "License"); you may not use this file except in compliance with
007: the License. You may obtain a copy of the License at
008:
009: http://www.apache.org/licenses/LICENSE-2.0
010:
011: Unless required by applicable law or agreed to in writing, software
012: distributed under the License is distributed on an "AS IS" BASIS,
013: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: See the License for the specific language governing permissions and
015: limitations under the License.
016: ==================================================================== */
017:
018: package org.apache.poi.util;
019:
020: import java.io.UnsupportedEncodingException;
021: import java.text.FieldPosition;
022: import java.text.NumberFormat;
023:
024: /**
025: * Title: String Utility Description: Collection of string handling utilities
026: *
027: *
028: *@author Andrew C. Oliver
029: *@author Sergei Kozello (sergeikozello at mail.ru)
030: *@author Toshiaki Kamoshida (kamoshida.toshiaki at future dot co dot jp)
031: *@since May 10, 2002
032: *@version 1.0
033: */
034: public class StringUtil {
035: private final static String ENCODING = "ISO-8859-1";
036:
037: /**
038: * Constructor for the StringUtil object
039: */
040: private StringUtil() {
041: }
042:
043: /**
044: * Given a byte array of 16-bit unicode characters in Little Endian
045: * format (most important byte last), return a Java String representation
046: * of it.
047: *
048: * { 0x16, 0x00 } -0x16
049: *
050: * @param string the byte array to be converted
051: * @param offset the initial offset into the
052: * byte array. it is assumed that string[ offset ] and string[ offset +
053: * 1 ] contain the first 16-bit unicode character
054: * @param len the length of the final string
055: * @return the converted string
056: * @exception ArrayIndexOutOfBoundsException if offset is out of bounds for
057: * the byte array (i.e., is negative or is greater than or equal to
058: * string.length)
059: * @exception IllegalArgumentException if len is too large (i.e.,
060: * there is not enough data in string to create a String of that
061: * length)
062: */
063: public static String getFromUnicodeLE(final byte[] string,
064: final int offset, final int len)
065: throws ArrayIndexOutOfBoundsException,
066: IllegalArgumentException {
067: if ((offset < 0) || (offset >= string.length)) {
068: throw new ArrayIndexOutOfBoundsException("Illegal offset");
069: }
070: if ((len < 0) || (((string.length - offset) / 2) < len)) {
071: throw new IllegalArgumentException("Illegal length");
072: }
073:
074: try {
075: return new String(string, offset, len * 2, "UTF-16LE");
076: } catch (UnsupportedEncodingException e) {
077: throw new InternalError(); /*unreachable*/
078: }
079: }
080:
081: /**
082: * Given a byte array of 16-bit unicode characters in little endian
083: * format (most important byte last), return a Java String representation
084: * of it.
085: *
086: * { 0x16, 0x00 } -0x16
087: *
088: *@param string the byte array to be converted
089: *@return the converted string
090: */
091: public static String getFromUnicodeLE(final byte[] string) {
092: if (string.length == 0) {
093: return "";
094: }
095: return getFromUnicodeLE(string, 0, string.length / 2);
096: }
097:
098: /**
099: * Given a byte array of 16-bit unicode characters in big endian
100: * format (most important byte first), return a Java String representation
101: * of it.
102: *
103: * { 0x00, 0x16 } -0x16
104: *
105: *@param string the byte array to be converted
106: **@param offset the initial offset into the
107: * byte array. it is assumed that string[ offset ] and string[ offset +
108: * 1 ] contain the first 16-bit unicode character
109: *@param len the length of the final string
110: *@return the converted string
111: *@exception ArrayIndexOutOfBoundsException if offset is out of bounds for
112: * the byte array (i.e., is negative or is greater than or equal to
113: * string.length)
114: *@exception IllegalArgumentException if len is too large (i.e.,
115: * there is not enough data in string to create a String of that
116: * length)
117: */
118: public static String getFromUnicodeBE(final byte[] string,
119: final int offset, final int len)
120: throws ArrayIndexOutOfBoundsException,
121: IllegalArgumentException {
122: if ((offset < 0) || (offset >= string.length)) {
123: throw new ArrayIndexOutOfBoundsException("Illegal offset");
124: }
125: if ((len < 0) || (((string.length - offset) / 2) < len)) {
126: throw new IllegalArgumentException("Illegal length");
127: }
128: try {
129: return new String(string, offset, len * 2, "UTF-16BE");
130: } catch (UnsupportedEncodingException e) {
131: throw new InternalError(); /*unreachable*/
132: }
133: }
134:
135: /**
136: * Given a byte array of 16-bit unicode characters in big endian
137: * format (most important byte first), return a Java String representation
138: * of it.
139: *
140: * { 0x00, 0x16 } -0x16
141: *
142: *@param string the byte array to be converted
143: *@return the converted string
144: */
145: public static String getFromUnicodeBE(final byte[] string) {
146: if (string.length == 0) {
147: return "";
148: }
149: return getFromUnicodeBE(string, 0, string.length / 2);
150: }
151:
152: /**
153: * Read 8 bit data (in ISO-8859-1 codepage) into a (unicode) Java
154: * String and return.
155: * (In Excel terms, read compressed 8 bit unicode as a string)
156: *
157: * @param string byte array to read
158: * @param offset offset to read byte array
159: * @param len length to read byte array
160: * @return String generated String instance by reading byte array
161: */
162: public static String getFromCompressedUnicode(final byte[] string,
163: final int offset, final int len) {
164: try {
165: int len_to_use = Math.min(len, string.length - offset);
166: return new String(string, offset, len_to_use, "ISO-8859-1");
167: } catch (UnsupportedEncodingException e) {
168: throw new InternalError(); /* unreachable */
169: }
170: }
171:
172: /**
173: * Takes a unicode (java) string, and returns it as 8 bit data (in ISO-8859-1
174: * codepage).
175: * (In Excel terms, write compressed 8 bit unicode)
176: *
177: *@param input the String containing the data to be written
178: *@param output the byte array to which the data is to be written
179: *@param offset an offset into the byte arrat at which the data is start
180: * when written
181: */
182: public static void putCompressedUnicode(final String input,
183: final byte[] output, final int offset) {
184: try {
185: byte[] bytes = input.getBytes("ISO-8859-1");
186: System.arraycopy(bytes, 0, output, offset, bytes.length);
187: } catch (UnsupportedEncodingException e) {
188: throw new InternalError(); /*unreachable*/
189: }
190: }
191:
192: /**
193: * Takes a unicode string, and returns it as little endian (most
194: * important byte last) bytes in the supplied byte array.
195: * (In Excel terms, write uncompressed unicode)
196: *
197: *@param input the String containing the unicode data to be written
198: *@param output the byte array to hold the uncompressed unicode, should be twice the length of the String
199: *@param offset the offset to start writing into the byte array
200: */
201: public static void putUnicodeLE(final String input,
202: final byte[] output, final int offset) {
203: try {
204: byte[] bytes = input.getBytes("UTF-16LE");
205: System.arraycopy(bytes, 0, output, offset, bytes.length);
206: } catch (UnsupportedEncodingException e) {
207: throw new InternalError(); /*unreachable*/
208: }
209: }
210:
211: /**
212: * Takes a unicode string, and returns it as big endian (most
213: * important byte first) bytes in the supplied byte array.
214: * (In Excel terms, write uncompressed unicode)
215: *
216: *@param input the String containing the unicode data to be written
217: *@param output the byte array to hold the uncompressed unicode, should be twice the length of the String
218: *@param offset the offset to start writing into the byte array
219: */
220: public static void putUnicodeBE(final String input,
221: final byte[] output, final int offset) {
222: try {
223: byte[] bytes = input.getBytes("UTF-16BE");
224: System.arraycopy(bytes, 0, output, offset, bytes.length);
225: } catch (UnsupportedEncodingException e) {
226: throw new InternalError(); /*unreachable*/
227: }
228: }
229:
230: /**
231: * Apply printf() like formatting to a string.
232: * Primarily used for logging.
233: *@param message the string with embedded formatting info
234: * eg. "This is a test %2.2"
235: *@param params array of values to format into the string
236: *@return The formatted string
237: */
238: public static String format(String message, Object[] params) {
239: int currentParamNumber = 0;
240: StringBuffer formattedMessage = new StringBuffer();
241: for (int i = 0; i < message.length(); i++) {
242: if (message.charAt(i) == '%') {
243: if (currentParamNumber >= params.length) {
244: formattedMessage.append("?missing data?");
245: } else if ((params[currentParamNumber] instanceof Number)
246: && (i + 1 < message.length())) {
247: i += matchOptionalFormatting(
248: (Number) params[currentParamNumber++],
249: message.substring(i + 1), formattedMessage);
250: } else {
251: formattedMessage
252: .append(params[currentParamNumber++]
253: .toString());
254: }
255: } else {
256: if ((message.charAt(i) == '\\')
257: && (i + 1 < message.length())
258: && (message.charAt(i + 1) == '%')) {
259: formattedMessage.append('%');
260: i++;
261: } else {
262: formattedMessage.append(message.charAt(i));
263: }
264: }
265: }
266: return formattedMessage.toString();
267: }
268:
269: private static int matchOptionalFormatting(Number number,
270: String formatting, StringBuffer outputTo) {
271: NumberFormat numberFormat = NumberFormat.getInstance();
272: if ((0 < formatting.length())
273: && Character.isDigit(formatting.charAt(0))) {
274: numberFormat.setMinimumIntegerDigits(Integer
275: .parseInt(formatting.charAt(0) + ""));
276: if ((2 < formatting.length())
277: && (formatting.charAt(1) == '.')
278: && Character.isDigit(formatting.charAt(2))) {
279: numberFormat.setMaximumFractionDigits(Integer
280: .parseInt(formatting.charAt(2) + ""));
281: numberFormat.format(number, outputTo,
282: new FieldPosition(0));
283: return 3;
284: }
285: numberFormat.format(number, outputTo, new FieldPosition(0));
286: return 1;
287: } else if ((0 < formatting.length())
288: && (formatting.charAt(0) == '.')) {
289: if ((1 < formatting.length())
290: && Character.isDigit(formatting.charAt(1))) {
291: numberFormat.setMaximumFractionDigits(Integer
292: .parseInt(formatting.charAt(1) + ""));
293: numberFormat.format(number, outputTo,
294: new FieldPosition(0));
295: return 2;
296: }
297: }
298: numberFormat.format(number, outputTo, new FieldPosition(0));
299: return 1;
300: }
301:
302: /**
303: * @return the encoding we want to use, currently hardcoded to ISO-8859-1
304: */
305: public static String getPreferredEncoding() {
306: return ENCODING;
307: }
308:
309: /**
310: * check the parameter has multibyte character
311: *
312: * @param value string to check
313: * @return boolean result
314: * true:string has at least one multibyte character
315: */
316: public static boolean hasMultibyte(String value) {
317: if (value == null)
318: return false;
319: for (int i = 0; i < value.length(); i++) {
320: char c = value.charAt(i);
321: if (c > 0xFF)
322: return true;
323: }
324: return false;
325: }
326:
327: /**
328: * Checks to see if a given String needs to be represented as Unicode
329: * @param value
330: * @return true if string needs Unicode to be represented.
331: */
332: public static boolean isUnicodeString(final String value) {
333: try {
334: return !value.equals(new String(value
335: .getBytes("ISO-8859-1"), "ISO-8859-1"));
336: } catch (UnsupportedEncodingException e) {
337: return true;
338: }
339: }
340: }
|