001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.components.serializers.encoding;
018:
019: /**
020: *
021: *
022: * @author <a href="mailto:pier@apache.org">Pier Fumagalli</a>, February 2003
023: * @version CVS $Id: CompiledCharset.java 433543 2006-08-22 06:22:54Z crossley $
024: */
025: public abstract class CompiledCharset extends AbstractCharset {
026:
027: /** The encoding table of this <code>Charset</code>. */
028: protected byte encoding[];
029:
030: /**
031: * Create a new instance of this <code>CompiledCharset</code>.
032: * <p>
033: * After construction, the <code>compile()</code> method will have to
034: * be called for proper operation of this <code>Charset</code>.
035: *
036: * @param name This <code>Charset</code> name.
037: * @param aliases This <code>Charset</code> alias names.
038: * @throws NullPointerException If one of the arguments is <b>null</b>.
039: */
040: protected CompiledCharset(String name, String aliases[]) {
041: super (name, aliases);
042: this .encoding = new byte[8192];
043: for (int x = 0; x < this .encoding.length; x++)
044: this .encoding[x] = 0;
045: }
046:
047: /**
048: * Create a new instance of this <code>CompiledCharset</code>.
049: * <p>
050: * The encodings table passed to this constructor <b>needs</b> to be 8192
051: * bytes long, or (in other words), must contain exactly 65536 bits.
052: * </p>
053: * <p>
054: * As in the Java Programming Language a <code>char</code> can assume
055: * values between 0 (zero) and 65535 (inclusive), each bit in the specified
056: * array refers to a specific <code>char</code> value.
057: * </p>
058: * <p>
059: * When this specific bit is set to 1 (one or true) we assume that the
060: * charset <b>can</b> encode the given character, while when the bit is
061: * set to 0 (zero or false), the character cannot be represented using
062: * this <code>Charset</code>.
063: * </p>
064: * <p>
065: * For example, the <b>US-ASCII</b> <code>Charset</code> can represent
066: * only Java characters between 0 (zero) and 255 (inclusive), therefore
067: * the specified byte array will contain only 256 true bits.
068: * </p>
069: * <p>
070: * To check if a character can be encoded by this <code>Charset</code>,
071: * given "<code>c</code>" as the character to verify, one
072: * can write this simple formula:
073: * </p>
074: * <p>
075: * <nobr><code>((encoding[c >> 3] & (1 << (c & 0x07))) > 0)
076: * </p>
077: * <p>
078: * If the result of this operation is 0 (zero) the bit was set to zero,
079: * and therefore "<code>c</code>" cannot be represented in
080: * this <code>Charset</code>, while if the result is greater than 0 (zero)
081: * the character "<code>c</code>" can actually be represented
082: * by this <code>Charset</code>
083: * </p>
084: *
085: * @param name This <code>Charset</code> name.
086: * @param aliases This <code>Charset</code> alias names.
087: * @param encoding This <code>Charset</code> encoding table as specified
088: * above.
089: * @throws NullPointerException If one of the arguments is <b>null</b>.
090: * @throws IllegalArgumentException If the length of the encoding table
091: * is <b>not</b> 8192 precisely.
092: */
093: protected CompiledCharset(String name, String aliases[],
094: byte encoding[]) throws NullPointerException,
095: IllegalArgumentException {
096: super (name, aliases);
097: if (encoding == null)
098: throw new NullPointerException("Invalid table");
099: if (encoding.length != 8192) {
100: throw new IllegalArgumentException(
101: "Invalid encoding table size: "
102: + "current length is " + encoding.length
103: + ", required 8192.");
104: }
105: this .encoding = encoding;
106: }
107:
108: /**
109: * Check if the specified character is representable by this specifiec
110: * <code>Charset</code> instance.
111: * </p>
112: */
113: public boolean allows(char c) {
114: /* This is tied to haw the compiler does stuff. */
115: return ((this .encoding[c >> 3] & (1 << (c & 0x07))) > 0);
116: }
117:
118: /**
119: * Compile the encoding table of this <code>CompiledCharset</code>.
120: * <p>
121: * This method will invoke the <code>compile(...)</code> method for any
122: * possible value of a Java character (65536 times, from 0, zero, to
123: * 65535 inclusive), building the encoding table of the characters this
124: * <code>Charset</code> can successfully represent.
125: */
126: protected final void compile() {
127: for (int x = 0; x <= Character.MAX_VALUE; x++) {
128: if (this .compile((char) x)) {
129: int pos = x >> 3;
130: encoding[pos] = (byte) (encoding[pos] | (1 << (x & 0x07)));
131: }
132: }
133: }
134:
135: /**
136: * Return true or false wether this encoding can encode the specified
137: * character or not.
138: * <p>
139: * This method is equivalent to the <code>allows(...)</code> method, but
140: * it will be called upon construction of the encoding table.
141: * </p>
142: */
143: protected abstract boolean compile(char c);
144: }
|