001: package uk.org.ponder.stringutil;
002:
003: /** This abstract class forms part of the fast and better instrumented
004: * uk.org.ponder.streamutil.DirectInputStreamReader architecture. It is
005: * intended as a base class for specific byte to character decoders
006: * (such as ByteToCharUTF8), and abstracts away the non-stream and
007: * non-encoding specific tasks of working out whether anything needs
008: * doing or not, and if so how much and where it is.
009: */
010:
011: public abstract class ByteToCharConverter {
012:
013: public ByteToCharConverter() {
014: }
015:
016: /** Return code from <code>convert()</code> indicating conversion stopped because
017: * there was no space left in the output buffer. */
018:
019: public static final int STOP_OUTPUT_EXHAUSTED = 0;
020: /** Return code from <code>convert()</code> indicating conversion stopped because
021: * there was no space left in the input buffer. */
022:
023: public static final int STOP_INPUT_EXHAUSTED = 1;
024:
025: /** Return code from <code>convert()</code> indicating conversion stopped because
026: * there was no input left in the input buffer, but also that no partial input sequence
027: * was left in it. */
028: public static final int STOP_INPUT_EXHAUSTED_EXACTLY = 2;
029:
030: /** Convert as many bytes from <code>inbuffer</code> to characters in <code>outbuffer</code>
031: * as possible. The return codes from this method are listed above, indicating
032: * which out of the input and the output was actually exhausted.
033: */
034:
035: public abstract int convert();
036:
037: /** Returns the name of the byte to character (UTF-16) encoding performed by this
038: * converter */
039: public abstract String getCharacterEncoding();
040:
041: /** Returns the maximum possible number of characters that could be decoded from
042: * an input byte sequence of the specified length. Currently disused.
043: * @param inputsize The number of input bytes for which the maximum decoded characters
044: * are required.
045: */
046: public abstract int getMaxOutput(int inputsize);
047:
048: /*
049: * Offset of next character to be output
050: */
051: protected int outbufferpos;
052: protected int outbufferlimit;
053:
054: protected char[] outbuffer;
055:
056: protected int totalbytesin;
057: /*
058: * Offset of next byte to be converted
059: */
060: protected int inbufferpos;
061: protected int inbufferlimit;
062:
063: protected byte[] inbuffer;
064: /*
065: * Length of bad input that caused a MalformedInputException.
066: */
067: protected int input_sequence_length;
068: protected int output_sequence_length;
069: /*
070: * Number of lines that have gone by
071: */
072: protected int linenumber;
073:
074: private EncodingErrorHandler errorhandler;
075:
076: // The following four methods require public access since they are used
077: // from above by DirectInputStreamReader,
078: // resulting from a possible factorisation error in all of this logic.
079: // See Felixified I/O routines involving crank() etc.
080:
081: public int getOutputBufferPos() {
082: return outbufferpos;
083: }
084:
085: public int getInputBufferLimit() {
086: return inbufferlimit;
087: }
088:
089: public byte[] getInputBuffer() {
090: return inbuffer;
091: }
092:
093: public void increaseInputBufferLimit(int bytesread) {
094: inbufferlimit += bytesread;
095: }
096:
097: /** Sets the output buffer to which decoded character data should be written.
098: * @param outbuffer A character buffer to which character data can be written.
099: * @param outbufferpos The position within the buffer to which the character data
100: * can be written.
101: * @param outbufferlimit The index of the logical end of the buffer. If data
102: * is written exactly up to this point, the buffer will be considered full and
103: * decoding will stop until another buffer is supplied.
104: */
105:
106: public void setOutputBuffer(char[] outbuffer, int outbufferpos,
107: int outbufferlimit) {
108: this .outbuffer = outbuffer;
109: this .outbufferpos = outbufferpos;
110: this .outbufferlimit = outbufferlimit;
111: }
112:
113: /** Sets the error handler that will be used to report errors encountered in the
114: * byte encoding of the data.
115: * @param errorhandler An interface through which decoding errors may be reported.
116: */
117: public void setEncodingErrorHandler(
118: EncodingErrorHandler errorhandler) {
119: this .errorhandler = errorhandler;
120: }
121:
122: /** Reorganise the input buffer by rotating the current input point to the beginning,
123: * ready to receive more input after <code>inbufferlimit</code> */
124:
125: public void swizzInputBuffer() {
126: System.arraycopy(inbuffer, inbufferpos, inbuffer, 0,
127: inbufferlimit - inbufferpos);
128: // totalbytesin += inbufferpos;
129: inbufferlimit = inbufferlimit - inbufferpos;
130: inbufferpos = 0;
131: }
132:
133: /** Trigger an encoding error to be delivered to any registered EncodingErrorHandler.
134: * There is one sort of error that can only be detected from the
135: * outside of this class, namely an incomplete input sequence but no
136: * further input available. For this reason this method has been
137: * given public access to allow an error report to be triggered
138: * externally
139: * @param errortype A String reprenting the type of the error that has occurred.
140: * This string will be passed on via the EncodingErrorHandler interface.*/
141: public void handleEncodingError(String errortype) {
142: if (errorhandler != null) {
143: int max_sequence_available = inbufferlimit - inbufferpos;
144: // do not surprise our clients by returning pointers to nonexistent bytes
145: // should the error be invoked by DirectInputStreamReader as a result of
146: // incomplete final sequence.
147: if (max_sequence_available > input_sequence_length)
148: max_sequence_available = input_sequence_length;
149: errorhandler.reportEncodingError(errortype, linenumber,
150: totalbytesin, inbuffer, inbufferpos,
151: max_sequence_available);
152: }
153: }
154:
155: /** Ensure that the current input buffer is big enough to accommodate the specified
156: * number of input bytes, by reallocating it if necessary. This method does not
157: * preserve the buffer contents.
158: * @param buffersize The required input buffer size.
159: */
160: public void ensureInputBuffer(int buffersize) {
161: if (inbuffer == null || inbuffer.length < buffersize) {
162: inbuffer = new byte[buffersize];
163: }
164: }
165:
166: /** Destroy all the state stored in this converter, so it holds no resources
167: * and is ready to begin conversion again.
168: */
169:
170: public void blastState() {
171: inbufferpos = 0;
172: inbufferlimit = 0;
173: input_sequence_length = 0;
174: outbufferpos = 0;
175: outbufferlimit = 0;
176: output_sequence_length = 0;
177:
178: outbuffer = null;
179: inbuffer = null;
180:
181: linenumber = 1;
182: totalbytesin = 0;
183: errorhandler = null;
184: }
185:
186: /** Returns the number of bytes needed to complete the current input sequence.
187: * @return the number of bytes needed toc complete the current input sequence.
188: * positive if we need more bytes to complete the current sequence, zero if we have exactly
189: * used up all input, negative if there is more input remaining.
190: */
191: public int missing_bytes() {
192: return inbufferpos + input_sequence_length - inbufferlimit;
193: }
194:
195: }
|