001: /* ====================================================================
002: Licensed to the Apache Software Foundation (ASF) under one or more
003: contributor license agreements. See the NOTICE file distributed with
004: this work for additional information regarding copyright ownership.
005: The ASF licenses this file to You under the Apache License, Version 2.0
006: (the "License"); you may not use this file except in compliance with
007: the License. You may obtain a copy of the License at
008:
009: http://www.apache.org/licenses/LICENSE-2.0
010:
011: Unless required by applicable law or agreed to in writing, software
012: distributed under the License is distributed on an "AS IS" BASIS,
013: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: See the License for the specific language governing permissions and
015: limitations under the License.
016: ==================================================================== */
017:
018: package org.apache.poi.hpsf;
019:
020: import java.io.IOException;
021: import java.io.OutputStream;
022: import java.io.UnsupportedEncodingException;
023: import java.util.Date;
024: import java.util.LinkedList;
025: import java.util.List;
026:
027: import org.apache.poi.util.LittleEndian;
028: import org.apache.poi.util.LittleEndianConsts;
029:
030: /**
031: * <p>Supports reading and writing of variant data.</p>
032: *
033: * <p><strong>FIXME (3):</strong> Reading and writing should be made more
034: * uniform than it is now. The following items should be resolved:
035: *
036: * <ul>
037: *
038: * <li><p>Reading requires a length parameter that is 4 byte greater than the
039: * actual data, because the variant type field is included. </p></li>
040: *
041: * <li><p>Reading reads from a byte array while writing writes to an byte array
042: * output stream.</p></li>
043: *
044: * </ul>
045: *
046: * @author Rainer Klute <a
047: * href="mailto:klute@rainer-klute.de"><klute@rainer-klute.de></a>
048: * @since 2003-08-08
049: * @version $Id: VariantSupport.java 489730 2006-12-22 19:18:16Z bayard $
050: */
051: public class VariantSupport extends Variant {
052:
053: private static boolean logUnsupportedTypes = false;
054:
055: /**
056: * <p>Specifies whether warnings about unsupported variant types are to be
057: * written to <code>System.err</code> or not.</p>
058: *
059: * @param logUnsupportedTypes If <code>true</code> warnings will be written,
060: * if <code>false</code> they won't.
061: */
062: public static void setLogUnsupportedTypes(
063: final boolean logUnsupportedTypes) {
064: VariantSupport.logUnsupportedTypes = logUnsupportedTypes;
065: }
066:
067: /**
068: * <p>Checks whether logging of unsupported variant types warning is turned
069: * on or off.</p>
070: *
071: * @return <code>true</code> if logging is turned on, else
072: * <code>false</code>.
073: */
074: public static boolean isLogUnsupportedTypes() {
075: return logUnsupportedTypes;
076: }
077:
078: /**
079: * <p>Keeps a list of the variant types an "unsupported" message has already
080: * been issued for.</p>
081: */
082: protected static List unsupportedMessage;
083:
084: /**
085: * <p>Writes a warning to <code>System.err</code> that a variant type is
086: * unsupported by HPSF. Such a warning is written only once for each variant
087: * type. Log messages can be turned on or off by </p>
088: *
089: * @param ex The exception to log
090: */
091: protected static void writeUnsupportedTypeMessage(
092: final UnsupportedVariantTypeException ex) {
093: if (isLogUnsupportedTypes()) {
094: if (unsupportedMessage == null)
095: unsupportedMessage = new LinkedList();
096: Long vt = new Long(ex.getVariantType());
097: if (!unsupportedMessage.contains(vt)) {
098: System.err.println(ex.getMessage());
099: unsupportedMessage.add(vt);
100: }
101: }
102: }
103:
104: /**
105: * <p>Reads a variant type from a byte array.</p>
106: *
107: * @param src The byte array
108: * @param offset The offset in the byte array where the variant
109: * starts
110: * @param length The length of the variant including the variant
111: * type field
112: * @param type The variant type to read
113: * @param codepage The codepage to use to write non-wide strings
114: * @return A Java object that corresponds best to the variant
115: * field. For example, a VT_I4 is returned as a {@link Long}, a
116: * VT_LPSTR as a {@link String}.
117: * @exception ReadingNotSupportedException if a property is to be written
118: * who's variant type HPSF does not yet support
119: * @exception UnsupportedEncodingException if the specified codepage is not
120: * supported.
121: *
122: * @see Variant
123: */
124: public static Object read(final byte[] src, final int offset,
125: final int length, final long type, final int codepage)
126: throws ReadingNotSupportedException,
127: UnsupportedEncodingException {
128: Object value;
129: int o1 = offset;
130: int l1 = length - LittleEndian.INT_SIZE;
131: long lType = type;
132:
133: /* Instead of trying to read 8-bit characters from a Unicode string,
134: * read 16-bit characters. */
135: if (codepage == Constants.CP_UNICODE
136: && type == Variant.VT_LPSTR)
137: lType = Variant.VT_LPWSTR;
138:
139: switch ((int) lType) {
140: case Variant.VT_EMPTY: {
141: value = null;
142: break;
143: }
144: case Variant.VT_I2: {
145: /*
146: * Read a short. In Java it is represented as an
147: * Integer object.
148: */
149: value = new Integer(LittleEndian.getShort(src, o1));
150: break;
151: }
152: case Variant.VT_I4: {
153: /*
154: * Read a word. In Java it is represented as an
155: * Integer object.
156: */
157: value = new Integer(LittleEndian.getInt(src, o1));
158: break;
159: }
160: case Variant.VT_I8: {
161: /*
162: * Read a double word. In Java it is represented as a
163: * Long object.
164: */
165: value = new Long(LittleEndian.getLong(src, o1));
166: break;
167: }
168: case Variant.VT_R8: {
169: /*
170: * Read an eight-byte double value. In Java it is represented as
171: * a Double object.
172: */
173: value = new Double(LittleEndian.getDouble(src, o1));
174: break;
175: }
176: case Variant.VT_FILETIME: {
177: /*
178: * Read a FILETIME object. In Java it is represented
179: * as a Date object.
180: */
181: final long low = LittleEndian.getUInt(src, o1);
182: o1 += LittleEndian.INT_SIZE;
183: final long high = LittleEndian.getUInt(src, o1);
184: value = Util.filetimeToDate((int) high, (int) low);
185: break;
186: }
187: case Variant.VT_LPSTR: {
188: /*
189: * Read a byte string. In Java it is represented as a
190: * String object. The 0x00 bytes at the end must be
191: * stripped.
192: */
193: final int first = o1 + LittleEndian.INT_SIZE;
194: long last = first + LittleEndian.getUInt(src, o1) - 1;
195: o1 += LittleEndian.INT_SIZE;
196: while (src[(int) last] == 0 && first <= last)
197: last--;
198: final int l = (int) (last - first + 1);
199: value = codepage != -1 ? new String(src, first, l,
200: codepageToEncoding(codepage)) : new String(src,
201: first, l);
202: break;
203: }
204: case Variant.VT_LPWSTR: {
205: /*
206: * Read a Unicode string. In Java it is represented as
207: * a String object. The 0x00 bytes at the end must be
208: * stripped.
209: */
210: final int first = o1 + LittleEndian.INT_SIZE;
211: long last = first + LittleEndian.getUInt(src, o1) - 1;
212: long l = last - first;
213: o1 += LittleEndian.INT_SIZE;
214: StringBuffer b = new StringBuffer((int) (last - first));
215: for (int i = 0; i <= l; i++) {
216: final int i1 = o1 + (i * 2);
217: final int i2 = i1 + 1;
218: final int high = src[i2] << 8;
219: final int low = src[i1] & 0x00ff;
220: final char c = (char) (high | low);
221: b.append(c);
222: }
223: /* Strip 0x00 characters from the end of the string: */
224: while (b.length() > 0 && b.charAt(b.length() - 1) == 0x00)
225: b.setLength(b.length() - 1);
226: value = b.toString();
227: break;
228: }
229: case Variant.VT_CF: {
230: final byte[] v = new byte[l1];
231: for (int i = 0; i < l1; i++)
232: v[i] = src[(o1 + i)];
233: value = v;
234: break;
235: }
236: case Variant.VT_BOOL: {
237: /*
238: * The first four bytes in src, from src[offset] to
239: * src[offset + 3] contain the DWord for VT_BOOL, so
240: * skip it, we don't need it.
241: */
242: // final int first = offset + LittleEndian.INT_SIZE;
243: long bool = LittleEndian.getUInt(src, o1);
244: if (bool != 0)
245: value = Boolean.TRUE;
246: else
247: value = Boolean.FALSE;
248: break;
249: }
250: default: {
251: final byte[] v = new byte[l1];
252: for (int i = 0; i < l1; i++)
253: v[i] = src[(o1 + i)];
254: throw new ReadingNotSupportedException(type, v);
255: }
256: }
257: return value;
258: }
259:
260: /**
261: * <p>Turns a codepage number into the equivalent character encoding's
262: * name.</p>
263: *
264: * @param codepage The codepage number
265: *
266: * @return The character encoding's name. If the codepage number is 65001,
267: * the encoding name is "UTF-8". All other positive numbers are mapped to
268: * "cp" followed by the number, e.g. if the codepage number is 1252 the
269: * returned character encoding name will be "cp1252".
270: *
271: * @exception UnsupportedEncodingException if the specified codepage is
272: * less than zero.
273: */
274: public static String codepageToEncoding(final int codepage)
275: throws UnsupportedEncodingException {
276: if (codepage <= 0)
277: throw new UnsupportedEncodingException(
278: "Codepage number may not be " + codepage);
279: switch (codepage) {
280: case Constants.CP_UTF16:
281: return "UTF-16";
282: case Constants.CP_UTF16_BE:
283: return "UTF-16BE";
284: case Constants.CP_UTF8:
285: return "UTF-8";
286: case Constants.CP_037:
287: return "cp037";
288: case Constants.CP_GBK:
289: return "GBK";
290: case Constants.CP_MS949:
291: return "ms949";
292: case Constants.CP_WINDOWS_1250:
293: return "windows-1250";
294: case Constants.CP_WINDOWS_1251:
295: return "windows-1251";
296: case Constants.CP_WINDOWS_1252:
297: return "windows-1252";
298: case Constants.CP_WINDOWS_1253:
299: return "windows-1253";
300: case Constants.CP_WINDOWS_1254:
301: return "windows-1254";
302: case Constants.CP_WINDOWS_1255:
303: return "windows-1255";
304: case Constants.CP_WINDOWS_1256:
305: return "windows-1256";
306: case Constants.CP_WINDOWS_1257:
307: return "windows-1257";
308: case Constants.CP_WINDOWS_1258:
309: return "windows-1258";
310: case Constants.CP_JOHAB:
311: return "johab";
312: case Constants.CP_MAC_ROMAN:
313: return "MacRoman";
314: case Constants.CP_MAC_JAPAN:
315: return "SJIS";
316: case Constants.CP_MAC_CHINESE_TRADITIONAL:
317: return "Big5";
318: case Constants.CP_MAC_KOREAN:
319: return "EUC-KR";
320: case Constants.CP_MAC_ARABIC:
321: return "MacArabic";
322: case Constants.CP_MAC_HEBREW:
323: return "MacHebrew";
324: case Constants.CP_MAC_GREEK:
325: return "MacGreek";
326: case Constants.CP_MAC_CYRILLIC:
327: return "MacCyrillic";
328: case Constants.CP_MAC_CHINESE_SIMPLE:
329: return "EUC_CN";
330: case Constants.CP_MAC_ROMANIA:
331: return "MacRomania";
332: case Constants.CP_MAC_UKRAINE:
333: return "MacUkraine";
334: case Constants.CP_MAC_THAI:
335: return "MacThai";
336: case Constants.CP_MAC_CENTRAL_EUROPE:
337: return "MacCentralEurope";
338: case Constants.CP_MAC_ICELAND:
339: return "MacIceland";
340: case Constants.CP_MAC_TURKISH:
341: return "MacTurkish";
342: case Constants.CP_MAC_CROATIAN:
343: return "MacCroatian";
344: case Constants.CP_US_ACSII:
345: case Constants.CP_US_ASCII2:
346: return "US-ASCII";
347: case Constants.CP_KOI8_R:
348: return "KOI8-R";
349: case Constants.CP_ISO_8859_1:
350: return "ISO-8859-1";
351: case Constants.CP_ISO_8859_2:
352: return "ISO-8859-2";
353: case Constants.CP_ISO_8859_3:
354: return "ISO-8859-3";
355: case Constants.CP_ISO_8859_4:
356: return "ISO-8859-4";
357: case Constants.CP_ISO_8859_5:
358: return "ISO-8859-5";
359: case Constants.CP_ISO_8859_6:
360: return "ISO-8859-6";
361: case Constants.CP_ISO_8859_7:
362: return "ISO-8859-7";
363: case Constants.CP_ISO_8859_8:
364: return "ISO-8859-8";
365: case Constants.CP_ISO_8859_9:
366: return "ISO-8859-9";
367: case Constants.CP_ISO_2022_JP1:
368: case Constants.CP_ISO_2022_JP2:
369: case Constants.CP_ISO_2022_JP3:
370: return "ISO-2022-JP";
371: case Constants.CP_ISO_2022_KR:
372: return "ISO-2022-KR";
373: case Constants.CP_EUC_JP:
374: return "EUC-JP";
375: case Constants.CP_EUC_KR:
376: return "EUC-KR";
377: case Constants.CP_GB2312:
378: return "GB2312";
379: case Constants.CP_GB18030:
380: return "GB18030";
381: case Constants.CP_SJIS:
382: return "SJIS";
383: default:
384: return "cp" + codepage;
385: }
386: }
387:
388: /**
389: * <p>Writes a variant value to an output stream. This method ensures that
390: * always a multiple of 4 bytes is written.</p>
391: *
392: * <p>If the codepage is UTF-16, which is encouraged, strings
393: * <strong>must</strong> always be written as {@link Variant#VT_LPWSTR}
394: * strings, not as {@link Variant#VT_LPSTR} strings. This method ensure this
395: * by converting strings appropriately, if needed.</p>
396: *
397: * @param out The stream to write the value to.
398: * @param type The variant's type.
399: * @param value The variant's value.
400: * @param codepage The codepage to use to write non-wide strings
401: * @return The number of entities that have been written. In many cases an
402: * "entity" is a byte but this is not always the case.
403: * @exception IOException if an I/O exceptions occurs
404: * @exception WritingNotSupportedException if a property is to be written
405: * who's variant type HPSF does not yet support
406: */
407: public static int write(final OutputStream out, final long type,
408: final Object value, final int codepage) throws IOException,
409: WritingNotSupportedException {
410: int length = 0;
411: switch ((int) type) {
412: case Variant.VT_BOOL: {
413: int trueOrFalse;
414: if (((Boolean) value).booleanValue())
415: trueOrFalse = 1;
416: else
417: trueOrFalse = 0;
418: length = TypeWriter.writeUIntToStream(out, trueOrFalse);
419: break;
420: }
421: case Variant.VT_LPSTR: {
422: final byte[] bytes = (codepage == -1 ? ((String) value)
423: .getBytes() : ((String) value)
424: .getBytes(codepageToEncoding(codepage)));
425: length = TypeWriter
426: .writeUIntToStream(out, bytes.length + 1);
427: final byte[] b = new byte[bytes.length + 1];
428: System.arraycopy(bytes, 0, b, 0, bytes.length);
429: b[b.length - 1] = 0x00;
430: out.write(b);
431: length += b.length;
432: break;
433: }
434: case Variant.VT_LPWSTR: {
435: final int nrOfChars = ((String) value).length() + 1;
436: length += TypeWriter.writeUIntToStream(out, nrOfChars);
437: char[] s = Util.pad4((String) value);
438: for (int i = 0; i < s.length; i++) {
439: final int high = ((s[i] & 0x0000ff00) >> 8);
440: final int low = (s[i] & 0x000000ff);
441: final byte highb = (byte) high;
442: final byte lowb = (byte) low;
443: out.write(lowb);
444: out.write(highb);
445: length += 2;
446: }
447: out.write(0x00);
448: out.write(0x00);
449: length += 2;
450: break;
451: }
452: case Variant.VT_CF: {
453: final byte[] b = (byte[]) value;
454: out.write(b);
455: length = b.length;
456: break;
457: }
458: case Variant.VT_EMPTY: {
459: TypeWriter.writeUIntToStream(out, Variant.VT_EMPTY);
460: length = LittleEndianConsts.INT_SIZE;
461: break;
462: }
463: case Variant.VT_I2: {
464: TypeWriter.writeToStream(out, ((Integer) value)
465: .shortValue());
466: length = LittleEndianConsts.SHORT_SIZE;
467: break;
468: }
469: case Variant.VT_I4: {
470: if (!(value instanceof Integer)) {
471: throw new ClassCastException(
472: "Could not cast an object to "
473: + Integer.class.toString() + ": "
474: + value.getClass().toString() + ", "
475: + value.toString());
476: }
477: length += TypeWriter.writeToStream(out, ((Integer) value)
478: .intValue());
479: break;
480: }
481: case Variant.VT_I8: {
482: TypeWriter.writeToStream(out, ((Long) value).longValue());
483: length = LittleEndianConsts.LONG_SIZE;
484: break;
485: }
486: case Variant.VT_R8: {
487: length += TypeWriter.writeToStream(out, ((Double) value)
488: .doubleValue());
489: break;
490: }
491: case Variant.VT_FILETIME: {
492: long filetime = Util.dateToFileTime((Date) value);
493: int high = (int) ((filetime >> 32) & 0x00000000FFFFFFFFL);
494: int low = (int) (filetime & 0x00000000FFFFFFFFL);
495: length += TypeWriter.writeUIntToStream(out,
496: 0x0000000FFFFFFFFL & low);
497: length += TypeWriter.writeUIntToStream(out,
498: 0x0000000FFFFFFFFL & high);
499: break;
500: }
501: default: {
502: /* The variant type is not supported yet. However, if the value
503: * is a byte array we can write it nevertheless. */
504: if (value instanceof byte[]) {
505: final byte[] b = (byte[]) value;
506: out.write(b);
507: length = b.length;
508: writeUnsupportedTypeMessage(new WritingNotSupportedException(
509: type, value));
510: } else
511: throw new WritingNotSupportedException(type, value);
512: break;
513: }
514: }
515:
516: return length;
517: }
518:
519: }
|