001: /*
002:
003: Derby - Class org.apache.derbyTesting.functionTests.util.streams.ByteAlphabet
004:
005: Licensed to the Apache Software Foundation (ASF) under one or more
006: contributor license agreements. See the NOTICE file distributed with
007: this work for additional information regarding copyright ownership.
008: The ASF licenses this file to you under the Apache License, Version 2.0
009: (the "License"); you may not use this file except in compliance with
010: the License. You may obtain a copy of the License at
011:
012: http://www.apache.org/licenses/LICENSE-2.0
013:
014: Unless required by applicable law or agreed to in writing, software
015: distributed under the License is distributed on an "AS IS" BASIS,
016: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017: See the License for the specific language governing permissions and
018: limitations under the License.
019:
020: */
021:
022: package org.apache.derbyTesting.functionTests.util.streams;
023:
024: import java.io.UnsupportedEncodingException;
025:
026: /**
027: * A looping alphabet, returning bytes in a specified encoding.
028: *
029: * The alphabet loops over a list of bytes representing characters. The
030: * alphabet-object is used by looping stream, which in turn is used for testing
031: * methods requiring streaming inputs.
032: *
033: * The following alphabets have been defined:
034: * <ul><li><em>Modern latin, lowercase</em> ; letters a - z (26)
035: * <li><em>Norwegian/Danish, lowercase</em> ; letters a - z, plus three
036: * additional letters (29)
037: * <li><em>Tamil</em> ; 46 Tamil letters from UNICODE U0B80
038: * <li><em>CJK subset</em> ; 12 letter from UNICODE CJK U4E00
039: * </ul>
040: */
041: public class ByteAlphabet {
042:
043: /** The name of the alphabet. */
044: private final String name;
045: /** The encoding used to represent characters as bytes. */
046: private final String encoding;
047: /** The bytes representing the characters in the alphabet. */
048: private final byte[] bytes;
049: /** The number of characters in the alphabet. */
050: private final int charCount;
051: /** The number of byes in the alphabet. */
052: private final int byteCount;
053: /** Offset into the byte array. */
054: private int boff = 0;
055:
056: /**
057: * Create an alphabet returning bytes representing the lowercase letters
058: * a-z in the "US-ASCII" encoding.
059: */
060: public static ByteAlphabet modernLatinLowercase() {
061: return new ByteAlphabet("Modern latin lowercase, US-ASCII",
062: CharAlphabet.MODERNLATINLOWER, "US-ASCII");
063: }
064:
065: /**
066: * Create an alphabet returning bytes representing the 29 lowercase
067: * letters in the Norwegian/Danish alphabet in the "ISO-8859-1" encoding.
068: */
069: public static ByteAlphabet norwegianLowercase() {
070: return new ByteAlphabet(
071: "Norwegian/Danish lowercase, ISO-8859-1",
072: CharAlphabet.NO_DK_LOWER, "ISO-8859-1");
073: }
074:
075: /**
076: * Create an alphabet returning bytes representing a subset of the Tamil
077: * alphabet in the UTF-8 encoding.
078: */
079: public static ByteAlphabet tamilUTF8() {
080: return new ByteAlphabet("Tamil, UTF-8", CharAlphabet.TAMIL,
081: "UTF8");
082: }
083:
084: /**
085: * Create an alphabet returning bytes representing a subset of the Tamil
086: * alphabet in the UTF-16BE encoding.
087: */
088: public static ByteAlphabet tamilUTF16BE() {
089: return new ByteAlphabet("Tamil, UTF-16BE", CharAlphabet.TAMIL,
090: "UTF-16BE");
091: }
092:
093: /**
094: * Create an alphabet returning bytes representing a subset of the CJK
095: * alphabet in the UTF-8 encoding.
096: */
097: public static ByteAlphabet cjkSubsetUTF8() {
098: return new ByteAlphabet("CJK subset, UTF-8",
099: CharAlphabet.CJKSUBSET, "UTF8");
100: }
101:
102: /**
103: * Create an alphabet returning bytes representing a subset of the CJK
104: * alphabet in the UTF-16BE encoding.
105: */
106: public static ByteAlphabet cjkSubsetUTF16BE() {
107: return new ByteAlphabet("CJK subset, UTF-16BE",
108: CharAlphabet.CJKSUBSET, "UTF-16BE");
109: }
110:
111: /**
112: * Create an alphabet with the given name, the given characters and using
113: * the specified encoding to represent the characters as bytes.
114: *
115: * @param name the name of the alphabet
116: * @param chars the characters in the alphabet
117: * @param encoding the encoding to use to represent characters as bytes
118: */
119: private ByteAlphabet(String name, char[] chars, String encoding) {
120: this .name = name;
121: this .encoding = encoding;
122: this .charCount = chars.length;
123: String tmpStr = new String(chars);
124: byte[] tmpBytes;
125: int tmpByteCount;
126: try {
127: tmpBytes = tmpStr.getBytes(encoding);
128: tmpByteCount = tmpBytes.length;
129: } catch (UnsupportedEncodingException uee) {
130: // We are nasty and ignore this...
131: tmpBytes = new byte[] { 0 };
132: tmpByteCount = 1;
133: }
134: this .bytes = tmpBytes;
135: this .byteCount = tmpByteCount;
136: }
137:
138: /**
139: * Return the name of the alphabet.
140: */
141: public String getName() {
142: return this .name;
143: }
144:
145: /**
146: * Return the encoding used to represent characters as bytes.
147: */
148: public String getEncoding() {
149: return this .encoding;
150: }
151:
152: /**
153: * Return the number of characters in the alphabet.
154: */
155: public int charCount() {
156: return charCount;
157: }
158:
159: /**
160: * Return the number of bytes in the alphabet.
161: *
162: * The number of bytes in the alphabet is noramlly different from the
163: * number of characters in the alphabet, but it depends on the
164: * characters in the alphabet and encoding used to represent them as
165: * bytes.
166: */
167: public int byteCount() {
168: return byteCount;
169: }
170:
171: /**
172: * Return the next byte in the alphabet.
173: */
174: public byte nextByte() {
175: if (boff >= byteCount) {
176: boff = 0;
177: }
178: return bytes[boff++];
179: }
180:
181: /**
182: * Reset the alphabet, the next byte returned is the first byte in the
183: * alphabet, which might not be a complete character.
184: */
185: public void reset() {
186: boff = 0;
187: }
188:
189: /**
190: * Compute the next byte to read after reading the specified number
191: * of bytes.
192: *
193: * Besides from returning the index, the internal state of
194: * the alphabet is updated.
195: *
196: * @param bytesRead the number of bytes read
197: * @return the index of the next byte
198: */
199: public int nextByteToRead(int bytesRead) {
200: boff = (boff + (bytesRead % byteCount)) % byteCount;
201: return boff;
202: }
203: } // End class ByteAlphabet
|