001: /* ====================================================================
002: Licensed to the Apache Software Foundation (ASF) under one or more
003: contributor license agreements. See the NOTICE file distributed with
004: this work for additional information regarding copyright ownership.
005: The ASF licenses this file to You under the Apache License, Version 2.0
006: (the "License"); you may not use this file except in compliance with
007: the License. You may obtain a copy of the License at
008:
009: http://www.apache.org/licenses/LICENSE-2.0
010:
011: Unless required by applicable law or agreed to in writing, software
012: distributed under the License is distributed on an "AS IS" BASIS,
013: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: See the License for the specific language governing permissions and
015: limitations under the License.
016: ==================================================================== */
017:
018: package org.apache.poi.hssf.record;
019:
020: import org.apache.poi.util.IntMapper;
021: import org.apache.poi.util.LittleEndianConsts;
022:
023: import java.util.Iterator;
024:
025: /**
026: * Title: Static String Table Record
027: * <P>
028: * Description: This holds all the strings for LabelSSTRecords.
029: * <P>
030: * REFERENCE: PG 389 Microsoft Excel 97 Developer's Kit (ISBN:
031: * 1-57231-498-2)
032: * <P>
033: * @author Andrew C. Oliver (acoliver at apache dot org)
034: * @author Marc Johnson (mjohnson at apache dot org)
035: * @author Glen Stampoultzis (glens at apache.org)
036: *
037: * @see org.apache.poi.hssf.record.LabelSSTRecord
038: * @see org.apache.poi.hssf.record.ContinueRecord
039: */
040:
041: public class SSTRecord extends Record {
042:
043: private static UnicodeString EMPTY_STRING = new UnicodeString("");
044:
045: /** how big can an SST record be? As big as any record can be: 8228 bytes */
046: static final int MAX_RECORD_SIZE = 8228;
047:
048: /** standard record overhead: two shorts (record id plus data space size)*/
049: static final int STD_RECORD_OVERHEAD = 2 * LittleEndianConsts.SHORT_SIZE;
050:
051: /** SST overhead: the standard record overhead, plus the number of strings and the number of unique strings -- two ints */
052: static final int SST_RECORD_OVERHEAD = (STD_RECORD_OVERHEAD + (2 * LittleEndianConsts.INT_SIZE));
053:
054: /** how much data can we stuff into an SST record? That would be _max minus the standard SST record overhead */
055: static final int MAX_DATA_SPACE = MAX_RECORD_SIZE
056: - SST_RECORD_OVERHEAD;
057:
058: /** overhead for each string includes the string's character count (a short) and the flag describing its characteristics (a byte) */
059: static final int STRING_MINIMAL_OVERHEAD = LittleEndianConsts.SHORT_SIZE
060: + LittleEndianConsts.BYTE_SIZE;
061:
062: public static final short sid = 0xfc;
063:
064: /** union of strings in the SST and EXTSST */
065: private int field_1_num_strings;
066:
067: /** according to docs ONLY SST */
068: private int field_2_num_unique_strings;
069: private IntMapper field_3_strings;
070:
071: private SSTDeserializer deserializer;
072:
073: /** Offsets from the beginning of the SST record (even across continuations) */
074: int[] bucketAbsoluteOffsets;
075: /** Offsets relative the start of the current SST or continue record */
076: int[] bucketRelativeOffsets;
077:
078: /**
079: * default constructor
080: */
081: public SSTRecord() {
082: field_1_num_strings = 0;
083: field_2_num_unique_strings = 0;
084: field_3_strings = new IntMapper();
085: deserializer = new SSTDeserializer(field_3_strings);
086: }
087:
088: /**
089: * Constructs an SST record and sets its fields appropriately.
090: *
091: * @param in the RecordInputstream to read the record from
092: */
093:
094: public SSTRecord(RecordInputStream in) {
095: super (in);
096: }
097:
098: /**
099: * Add a string.
100: *
101: * @param string string to be added
102: *
103: * @return the index of that string in the table
104: */
105:
106: public int addString(final UnicodeString string) {
107: field_1_num_strings++;
108: UnicodeString ucs = (string == null) ? EMPTY_STRING : string;
109: int rval;
110: int index = field_3_strings.getIndex(ucs);
111:
112: if (index != -1) {
113: rval = index;
114: } else {
115: // This is a new string -- we didn't see it among the
116: // strings we've already collected
117: rval = field_3_strings.size();
118: field_2_num_unique_strings++;
119: SSTDeserializer.addToStringTable(field_3_strings, ucs);
120: }
121: return rval;
122: }
123:
124: /**
125: * @return number of strings
126: */
127:
128: public int getNumStrings() {
129: return field_1_num_strings;
130: }
131:
132: /**
133: * @return number of unique strings
134: */
135:
136: public int getNumUniqueStrings() {
137: return field_2_num_unique_strings;
138: }
139:
140: /**
141: * USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
142: * METHODS MANIPULATE THE NUMBER OF STRINGS AS A SIDE EFFECT; YOUR
143: * ATTEMPTS AT MANIPULATING THE STRING COUNT IS LIKELY TO BE VERY
144: * WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN THIS RECORD IS
145: * WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ THE RECORD
146: *
147: * @param count number of strings
148: *
149: */
150:
151: public void setNumStrings(final int count) {
152: field_1_num_strings = count;
153: }
154:
155: /**
156: * USE THIS METHOD AT YOUR OWN PERIL: THE <code>addString</code>
157: * METHODS MANIPULATE THE NUMBER OF UNIQUE STRINGS AS A SIDE
158: * EFFECT; YOUR ATTEMPTS AT MANIPULATING THE UNIQUE STRING COUNT
159: * IS LIKELY TO BE VERY WRONG AND WILL RESULT IN BAD BEHAVIOR WHEN
160: * THIS RECORD IS WRITTEN OUT AND ANOTHER PROCESS ATTEMPTS TO READ
161: * THE RECORD
162: *
163: * @param count number of strings
164: */
165:
166: public void setNumUniqueStrings(final int count) {
167: field_2_num_unique_strings = count;
168: }
169:
170: /**
171: * Get a particular string by its index
172: *
173: * @param id index into the array of strings
174: *
175: * @return the desired string
176: */
177:
178: public UnicodeString getString(final int id) {
179: return (UnicodeString) field_3_strings.get(id);
180: }
181:
182: public boolean isString16bit(final int id) {
183: UnicodeString unicodeString = ((UnicodeString) field_3_strings
184: .get(id));
185: return ((unicodeString.getOptionFlags() & 0x01) == 1);
186: }
187:
188: /**
189: * Return a debugging string representation
190: *
191: * @return string representation
192: */
193:
194: public String toString() {
195: StringBuffer buffer = new StringBuffer();
196:
197: buffer.append("[SST]\n");
198: buffer.append(" .numstrings = ").append(
199: Integer.toHexString(getNumStrings())).append("\n");
200: buffer.append(" .uniquestrings = ").append(
201: Integer.toHexString(getNumUniqueStrings()))
202: .append("\n");
203: for (int k = 0; k < field_3_strings.size(); k++) {
204: UnicodeString s = (UnicodeString) field_3_strings.get(k);
205: buffer.append(" .string_" + k + " = ").append(
206: s.getDebugInfo()).append("\n");
207: }
208: buffer.append("[/SST]\n");
209: return buffer.toString();
210: }
211:
212: /**
213: * @return sid
214: */
215: public short getSid() {
216: return sid;
217: }
218:
219: /**
220: * @return hashcode
221: */
222: public int hashCode() {
223: return field_2_num_unique_strings;
224: }
225:
226: public boolean equals(Object o) {
227: if ((o == null) || (o.getClass() != this .getClass())) {
228: return false;
229: }
230: SSTRecord other = (SSTRecord) o;
231:
232: return ((field_1_num_strings == other.field_1_num_strings)
233: && (field_2_num_unique_strings == other.field_2_num_unique_strings) && field_3_strings
234: .equals(other.field_3_strings));
235: }
236:
237: /**
238: * validate SID
239: *
240: * @param id the alleged SID
241: *
242: * @exception RecordFormatException if validation fails
243: */
244:
245: protected void validateSid(final short id)
246: throws RecordFormatException {
247: if (id != sid) {
248: throw new RecordFormatException("NOT An SST RECORD");
249: }
250: }
251:
252: /**
253: * Fill the fields from the data
254: * <P>
255: * The data consists of sets of string data. This string data is
256: * arranged as follows:
257: * <P>
258: * <CODE><pre>
259: * short string_length; // length of string data
260: * byte string_flag; // flag specifying special string
261: * // handling
262: * short run_count; // optional count of formatting runs
263: * int extend_length; // optional extension length
264: * char[] string_data; // string data, can be byte[] or
265: * // short[] (length of array is
266: * // string_length)
267: * int[] formatting_runs; // optional formatting runs (length of
268: * // array is run_count)
269: * byte[] extension; // optional extension (length of array
270: * // is extend_length)
271: * </pre></CODE>
272: * <P>
273: * The string_flag is bit mapped as follows:
274: * <P>
275: * <TABLE>
276: * <TR>
277: * <TH>Bit number</TH>
278: * <TH>Meaning if 0</TH>
279: * <TH>Meaning if 1</TH>
280: * <TR>
281: * <TR>
282: * <TD>0</TD>
283: * <TD>string_data is byte[]</TD>
284: * <TD>string_data is short[]</TH>
285: * <TR>
286: * <TR>
287: * <TD>1</TD>
288: * <TD>Should always be 0</TD>
289: * <TD>string_flag is defective</TH>
290: * <TR>
291: * <TR>
292: * <TD>2</TD>
293: * <TD>extension is not included</TD>
294: * <TD>extension is included</TH>
295: * <TR>
296: * <TR>
297: * <TD>3</TD>
298: * <TD>formatting run data is not included</TD>
299: * <TD>formatting run data is included</TH>
300: * <TR>
301: * <TR>
302: * <TD>4</TD>
303: * <TD>Should always be 0</TD>
304: * <TD>string_flag is defective</TH>
305: * <TR>
306: * <TR>
307: * <TD>5</TD>
308: * <TD>Should always be 0</TD>
309: * <TD>string_flag is defective</TH>
310: * <TR>
311: * <TR>
312: * <TD>6</TD>
313: * <TD>Should always be 0</TD>
314: * <TD>string_flag is defective</TH>
315: * <TR>
316: * <TR>
317: * <TD>7</TD>
318: * <TD>Should always be 0</TD>
319: * <TD>string_flag is defective</TH>
320: * <TR>
321: * </TABLE>
322: * <P>
323: * We can handle eating the overhead associated with bits 2 or 3
324: * (or both) being set, but we have no idea what to do with the
325: * associated data. The UnicodeString class can handle the byte[]
326: * vs short[] nature of the actual string data
327: *
328: * @param in the RecordInputstream to read the record from
329: */
330:
331: protected void fillFields(RecordInputStream in) {
332: // this method is ALWAYS called after construction -- using
333: // the nontrivial constructor, of course -- so this is where
334: // we initialize our fields
335: field_1_num_strings = in.readInt();
336: field_2_num_unique_strings = in.readInt();
337: field_3_strings = new IntMapper();
338: deserializer = new SSTDeserializer(field_3_strings);
339: deserializer.manufactureStrings(field_2_num_unique_strings, in);
340: }
341:
342: /**
343: * @return an iterator of the strings we hold. All instances are
344: * UnicodeStrings
345: */
346:
347: Iterator getStrings() {
348: return field_3_strings.iterator();
349: }
350:
351: /**
352: * @return count of the strings we hold.
353: */
354:
355: int countStrings() {
356: return field_3_strings.size();
357: }
358:
359: /**
360: * called by the class that is responsible for writing this sucker.
361: * Subclasses should implement this so that their data is passed back in a
362: * byte array.
363: *
364: * @return size
365: */
366:
367: public int serialize(int offset, byte[] data) {
368: SSTSerializer serializer = new SSTSerializer(field_3_strings,
369: getNumStrings(), getNumUniqueStrings());
370: int bytes = serializer.serialize(offset, data);
371: bucketAbsoluteOffsets = serializer.getBucketAbsoluteOffsets();
372: bucketRelativeOffsets = serializer.getBucketRelativeOffsets();
373: return bytes;
374: }
375:
376: public int getRecordSize() {
377: SSTRecordSizeCalculator calculator = new SSTRecordSizeCalculator(
378: field_3_strings);
379: int recordSize = calculator.getRecordSize();
380: return recordSize;
381: }
382:
383: SSTDeserializer getDeserializer() {
384: return deserializer;
385: }
386:
387: /**
388: * Creates an extended string record based on the current contents of
389: * the current SST record. The offset within the stream to the SST record
390: * is required because the extended string record points directly to the
391: * strings in the SST record.
392: * <p>
393: * NOTE: THIS FUNCTION MUST ONLY BE CALLED AFTER THE SST RECORD HAS BEEN
394: * SERIALIZED.
395: *
396: * @param sstOffset The offset in the stream to the start of the
397: * SST record.
398: * @return The new SST record.
399: */
400: public ExtSSTRecord createExtSSTRecord(int sstOffset) {
401: if (bucketAbsoluteOffsets == null
402: || bucketAbsoluteOffsets == null)
403: throw new IllegalStateException(
404: "SST record has not yet been serialized.");
405:
406: ExtSSTRecord extSST = new ExtSSTRecord();
407: extSST.setNumStringsPerBucket((short) 8);
408: int[] absoluteOffsets = (int[]) bucketAbsoluteOffsets.clone();
409: int[] relativeOffsets = (int[]) bucketRelativeOffsets.clone();
410: for (int i = 0; i < absoluteOffsets.length; i++)
411: absoluteOffsets[i] += sstOffset;
412: extSST.setBucketOffsets(absoluteOffsets, relativeOffsets);
413: return extSST;
414: }
415:
416: /**
417: * Calculates the size in bytes of the EXTSST record as it would be if the
418: * record was serialized.
419: *
420: * @return The size of the ExtSST record in bytes.
421: */
422: public int calcExtSSTRecordSize() {
423: return ExtSSTRecord.getRecordSizeForStrings(field_3_strings
424: .size());
425: }
426: }
|