Source Code Cross Referenced for HDGFLZW.java in » Collaboration » poi-3.0.2-beta2 » org » apache » poi » hdgf » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation

1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI

Java

Java Tutorial

Illustrator Tutorials

GIMP Tutorials

C# / C Sharp

C# / CSharp Tutorial

C# / CSharp Open Source

SQL Server / T-SQL Tutorial

Oracle PL / SQL

Oracle PL/SQL Tutorial

Flash / Flex / ActionScript

VBA / Excel / Access / Word

XML

XML Tutorial

Microsoft Office PowerPoint 2007 Tutorial

Microsoft Office Excel 2007 Tutorial

Microsoft Office Word 2007 Tutorial

Java Source Code / Java Documentation » Collaboration » poi 3.0.2 beta2 » org.apache.poi.hdgf

Source Cross Referenced Class Diagram Java Document (Java Doc)

001:        /* ====================================================================
002:           Licensed to the Apache Software Foundation (ASF) under one or more
003:           contributor license agreements.  See the NOTICE file distributed with
004:           this work for additional information regarding copyright ownership.
005:           The ASF licenses this file to You under the Apache License, Version 2.0
006:           (the "License"); you may not use this file except in compliance with
007:           the License.  You may obtain a copy of the License at
008:
009:               http://www.apache.org/licenses/LICENSE-2.0
010:
011:           Unless required by applicable law or agreed to in writing, software
012:           distributed under the License is distributed on an "AS IS" BASIS,
013:           WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014:           See the License for the specific language governing permissions and
015:           limitations under the License.
016:        ==================================================================== */
017:        package org.apache.poi.hdgf;
018:
019:        import java.io.ByteArrayOutputStream;
020:        import java.io.IOException;
021:        import java.io.InputStream;
022:        import java.io.OutputStream;
023:
024:        /**
025:         * A decoder for the crazy LZW implementation used
026:         *  in Visio.
027:         * According to VSDump, "it's a slightly perverted version of LZW 
028:         *  compression, with inverted meaning of flag byte and 0xFEE as an 
029:         *  'initial shift'". It uses 12 bit codes
030:         * (http://www.gnome.ru/projects/vsdump_en.html)
031:         *
032:         * Two good resources on LZW are:
033:         *  http://en.wikipedia.org/wiki/LZW
034:         *  http://marknelson.us/1989/10/01/lzw-data-compression/
035:         */
036:        public class HDGFLZW {
037:
038:            /**
039:             * Given an integer, turn it into a java byte, handling 
040:             *  the wrapping.
041:             * This is a convenience method
042:             */
043:            public static byte fromInt(int b) {
044:                if (b < 128)
045:                    return (byte) b;
046:                return (byte) (b - 256);
047:            }
048:
049:            /**
050:             * Given a java byte, turn it into an integer between 0 
051:             *  and 255 (i.e. handle the unwrapping).
052:             * This is a convenience method
053:             */
054:            public static int fromByte(byte b) {
055:                if (b >= 0)
056:                    return (int) b;
057:                return (int) (b + 256);
058:            }
059:
060:            /**
061:             * Compress the given input stream, returning the array of bytes
062:             *  of the compressed input
063:             */
064:            public byte[] compress(InputStream src) throws IOException {
065:                ByteArrayOutputStream res = new ByteArrayOutputStream();
066:                compress(src, res);
067:                return res.toByteArray();
068:            }
069:
070:            /**
071:             * Decompresses the given input stream, returning the array of bytes
072:             *  of the decompressed input.
073:             */
074:            public byte[] decode(InputStream src) throws IOException {
075:                ByteArrayOutputStream res = new ByteArrayOutputStream();
076:                decode(src, res);
077:                return res.toByteArray();
078:            }
079:
080:            /**
081:             * Perform a streaming decompression of the input.
082:             * Works by:
083:             * 1) Reading a flag byte, the 8 bits of which tell you if the
084:             *     following 8 codes are compressed our un-compressed
085:             * 2) Consider the 8 bits in turn
086:             * 3) If the bit is set, the next code is un-compressed, so
087:             *     add it to the dictionary and output it
088:             * 4) If the bit isn't set, then read in the length and start
089:             *     position in the dictionary, and output the bytes there
090:             * 5) Loop until we've done all 8 bits, then read in the next
091:             *     flag byte
092:             */
093:            public void decode(InputStream src, OutputStream res)
094:                    throws IOException {
095:                // We use 12 bit codes:
096:                // * 0-255 are real bytes
097:                // * 256-4095 are the substring codes
098:                // Java handily initialises our buffer / dictionary
099:                //  to all zeros
100:                byte[] buffer = new byte[4096];
101:
102:                // How far through the output we've got
103:                // (This is normally used &4095, so it nicely wraps)
104:                int pos = 0;
105:                // The flag byte is treated as its 8 individual
106:                //  bits, which tell us if the following 8 codes
107:                //  are compressed or un-compressed
108:                int flag;
109:                // The mask, between 1 and 255, which is used when
110:                //  processing each bit of the flag byte in turn
111:                int mask;
112:
113:                // This is a byte as looked up in the dictionary
114:                // It needs to be signed, as it'll get passed on to
115:                //  the output stream
116:                byte dataB;
117:                // This is an unsigned byte read from the stream
118:                // It needs to be unsigned, so that bit stuff works
119:                int dataI;
120:                // The compressed code sequence is held over 2 bytes
121:                int dataIPt1, dataIPt2;
122:                // How long a code sequence is, and where in the
123:                //  dictionary to start at
124:                int len, pntr;
125:
126:                while ((flag = src.read()) != -1) {
127:                    // Compare each bit in our flag byte in turn:
128:                    for (mask = 1; mask < 256; mask <<= 1) {
129:                        // Is this a new code (un-compressed), or
130:                        //  the use of existing codes (compressed)?
131:                        if ((flag & mask) > 0) {
132:                            // Retrieve the un-compressed code
133:                            if ((dataI = src.read()) != -1) {
134:                                // Save the byte into the dictionary
135:                                buffer[(pos & 4095)] = fromInt(dataI);
136:                                pos++;
137:                                // And output the byte
138:                                res.write(new byte[] { fromInt(dataI) });
139:                            }
140:                        } else {
141:                            // We have a compressed sequence
142:                            // Grab the next 16 bits of data
143:                            dataIPt1 = src.read();
144:                            dataIPt2 = src.read();
145:                            if (dataIPt1 == -1 || dataIPt2 == -1)
146:                                break;
147:
148:                            // Build up how long the code sequence is, and
149:                            //  what position of the code to start at
150:                            // (The position is the first 12 bits, the
151:                            //  length is the last 4 bits)
152:                            len = (dataIPt2 & 15) + 3;
153:                            pntr = (dataIPt2 & 240) * 16 + dataIPt1;
154:
155:                            // If the pointer happens to be passed the end
156:                            //  of our buffer, then wrap around
157:                            if (pntr > 4078) {
158:                                pntr = pntr - 4078;
159:                            } else {
160:                                pntr = pntr + 18;
161:                            }
162:
163:                            // Loop over the codes, outputting what they correspond to
164:                            for (int i = 0; i < len; i++) {
165:                                buffer[(pos + i) & 4095] = buffer[(pntr + i) & 4095];
166:                                dataB = buffer[(pntr + i) & 4095];
167:                                res.write(new byte[] { dataB });
168:                            }
169:
170:                            // Record how far along the stream we have moved
171:                            pos = pos + len;
172:                        }
173:                    }
174:                }
175:            }
176:
177:            /**
178:             * Performs the Visio compatible streaming LZW compression.
179:             * TODO - Finish
180:             */
181:            public void compress(InputStream src, OutputStream res)
182:                    throws IOException {
183:                Compressor c = new Compressor();
184:                c.compress(src, res);
185:            }
186:
187:            /**
188:             * Helper class to handle the Visio compatible
189:             *  streaming LZW compression.
190:             * Need our own class to handle keeping track of the
191:             *  code buffer, pending bytes to write out etc.
192:             */
193:            private class Compressor {
194:                // We use 12 bit codes:
195:                // * 0-255 are real bytes
196:                // * 256-4095 are the substring codes
197:                // Java handily initialises our buffer / dictionary
198:                //  to all zeros
199:                byte[] dict = new byte[4096];
200:
201:                // The next block of data to be written out, minus
202:                //  its mask byte
203:                byte[] buffer = new byte[16];
204:                // And how long it is
205:                // (Un-compressed codes are 1 byte each, compressed codes
206:                //   are two)
207:                int bufferLen = 0;
208:
209:                // The raw length of a code is limited to 4 bits
210:                byte[] rawCode = new byte[16];
211:                // And how much we're using
212:                int rawCodeLen = 0;
213:
214:                // How far through the input and output streams we are
215:                int posInp = 0;
216:                int posOut = 0;
217:
218:                // What the next mask byte to output will be
219:                int nextMask = 0;
220:                // And how many bits we've already set
221:                int maskBitsSet = 0;
222:
223:                /**
224:                 * Returns the last place that the bytes from rawCode are found
225:                 *  at in the buffer, or -1 if they can't be found
226:                 */
227:                private int findRawCodeInBuffer() {
228:                    // Work our way back from the end
229:                    // (Visio always seems to use the last possible code)
230:                    for (int i = (buffer.length - rawCodeLen); i >= 0; i--) {
231:                        boolean matches = true;
232:                        for (int j = 0; matches && j < rawCodeLen; j++) {
233:                            if (buffer[i] == rawCode[j]) {
234:                                // Fits
235:                            } else {
236:                                // Doesn't fit, can't be a match
237:                                matches = false;
238:                            }
239:                        }
240:
241:                        // Was this position a match?
242:                        if (matches) {
243:                            return i;
244:                        }
245:                    }
246:
247:                    // Not found
248:                    return -1;
249:                }
250:
251:                /**
252:                 * Output the compressed representation for the bytes
253:                 *  found in rawCode
254:                 */
255:                private void outputCompressed(OutputStream res)
256:                        throws IOException {
257:                    // It's not worth compressing only 1 or two bytes,
258:                    //  due to the overheads
259:                    // So if asked, just output uncompressed
260:                    if (rawCodeLen < 3) {
261:                        for (int i = 0; i < rawCodeLen; i++) {
262:                            outputUncompressed(rawCode[i], res);
263:                        }
264:                        return;
265:                    }
266:
267:                    // Increment the mask bit count, we've done another code
268:                    maskBitsSet++;
269:                    // Add the length+code to the buffer
270:                    // TODO
271:                    posOut += 2;
272:
273:                    // If we're now at 8 codes, output
274:                    if (maskBitsSet == 8) {
275:                        output8Codes(res);
276:                    }
277:                }
278:
279:                /**
280:                 * Output the un-compressed byte
281:                 */
282:                private void outputUncompressed(byte b, OutputStream res)
283:                        throws IOException {
284:                    // Set the mask bit for us 
285:                    nextMask += (1 << maskBitsSet);
286:
287:                    // And add us to the buffer + dictionary
288:                    buffer[bufferLen] = fromInt(b);
289:                    bufferLen++;
290:                    dict[(posOut & 4095)] = fromInt(b);
291:                    posOut++;
292:
293:                    // If we're now at 8 codes, output
294:                    if (maskBitsSet == 8) {
295:                        output8Codes(res);
296:                    }
297:                }
298:
299:                /**
300:                 * We've got 8 code worth to write out, so
301:                 *  output along with the header
302:                 */
303:                private void output8Codes(OutputStream res) throws IOException {
304:                    // Output the mask and the data
305:                    res.write(new byte[] { fromInt(nextMask) });
306:                    res.write(buffer, 0, bufferLen);
307:
308:                    // Reset things
309:                    nextMask = 0;
310:                    maskBitsSet = 0;
311:                    bufferLen = 0;
312:                }
313:
314:                /**
315:                 * Does the compression
316:                 */
317:                private void compress(InputStream src, OutputStream res)
318:                        throws IOException {
319:                    // Have we hit the end of the file yet?
320:                    boolean going = true;
321:
322:                    // This is a byte as looked up in the dictionary
323:                    // It needs to be signed, as it'll get passed on to
324:                    //  the output stream
325:                    byte dataB;
326:                    // This is an unsigned byte read from the stream
327:                    // It needs to be unsigned, so that bit stuff works
328:                    int dataI;
329:
330:                    while (going) {
331:                        dataI = src.read();
332:                        posInp++;
333:                        if (dataI == -1) {
334:                            going = false;
335:                        }
336:                        dataB = fromInt(dataI);
337:
338:                        // If we've run out of data, output anything that's
339:                        //  pending then finish
340:                        if (!going && rawCodeLen > 0) {
341:                            outputCompressed(res);
342:                            break;
343:                        }
344:
345:                        // Try adding this new byte onto rawCode, and
346:                        //  see if all of that is still found in the
347:                        //  buffer dictionary or not
348:                        rawCode[rawCodeLen] = dataB;
349:                        rawCodeLen++;
350:                        int rawAt = findRawCodeInBuffer();
351:
352:                        // If we found it and are now at 16 bytes,
353:                        //  we need to output our pending code block
354:                        if (rawCodeLen == 16 && rawAt > -1) {
355:                            outputCompressed(res);
356:                            rawCodeLen = 0;
357:                            continue;
358:                        }
359:
360:                        // If we did find all of rawCode with our new
361:                        //  byte added on, we can wait to see what happens
362:                        //  with the next byte
363:                        if (rawAt > -1) {
364:                            continue;
365:                        }
366:
367:                        // If there was something in rawCode before, then we
368:                        //  need to output that
369:                        rawCodeLen--;
370:                        if (rawCodeLen > 0) {
371:                            // Output the old rawCode
372:                            outputCompressed(res);
373:
374:                            // Can this byte start a new rawCode, or does
375:                            //  it need outputting itself?
376:                            rawCode[0] = dataB;
377:                            rawCodeLen = 1;
378:                            if (findRawCodeInBuffer() > -1) {
379:                                // Fits in, wait for next byte
380:                                continue;
381:                            } else {
382:                                // Doesn't fit, output
383:                                outputUncompressed(dataB, res);
384:                                rawCodeLen = 0;
385:                            }
386:                        } else {
387:                            // Nothing in rawCode before, so this byte
388:                            //  isn't in the buffer dictionary
389:                            // Output it un-compressed
390:                            outputUncompressed(dataB, res);
391:                        }
392:                    }
393:                }
394:            }
395:
396:        }

www.java2java.com | Contact Us

All other trademarks are property of their respective owners.