001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.components.serializers.encoding;
018:
019: /**
020: *
021: *
022: * @author <a href="mailto:pier@apache.org">Pier Fumagalli</a>, February 2003
023: * @version CVS $Id: XMLEncoder.java 433543 2006-08-22 06:22:54Z crossley $
024: */
025: public class XMLEncoder extends CompiledEncoder {
026:
027: private static final char ENCODE_HEX[] = "0123456789ABCDEF"
028: .toCharArray();
029: private static final char ENCODE_QUOT[] = """.toCharArray();
030: private static final char ENCODE_AMP[] = "&".toCharArray();
031: private static final char ENCODE_APOS[] = "'".toCharArray();
032: private static final char ENCODE_LT[] = "<".toCharArray();
033: private static final char ENCODE_GT[] = ">".toCharArray();
034:
035: /**
036: * Create a new instance of this <code>XMLEncoder</code>.
037: */
038: public XMLEncoder() {
039: super ("X-W3C-XML");
040: }
041:
042: /**
043: * Create a new instance of this <code>XMLEncoder</code>.
044: *
045: * @param name A name for this <code>Encoding</code>.
046: * @throws NullPointerException If one of the arguments is <b>null</b>.
047: */
048: protected XMLEncoder(String name) {
049: super (name);
050: }
051:
052: /**
053: * Return true or false wether this encoding can encode the specified
054: * character or not.
055: * <p>
056: * This method will return true for the following character range:
057: * <br />
058: * <code>
059: * <nobr>#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]</nobr>
060: * </code>
061: * </p>
062: *
063: * @see <a href="http://www.w3.org/TR/REC-xml#charsets">W3C XML 1.0</a>
064: */
065: protected boolean compile(char c) {
066: if ((c == 0x09) || // [\t]
067: (c == 0x0a) || // [\n]
068: (c == 0x0d)) { // [\r]
069: return (true);
070: }
071:
072: if ((c == 0x22) || // ["]
073: (c == 0x26) || // [&]
074: (c == 0x27) || // [']
075: (c == 0x3c) || // [<]
076: (c == 0x3e) || // [>]
077: (c < 0x20) || // See <http://www.w3.org/TR/REC-xml#charsets>
078: ((c > 0xd7ff) && (c < 0xe000)) || (c > 0xfffd)) {
079: return (false);
080: }
081:
082: return (true);
083: }
084:
085: /**
086: * Return an array of characters representing the encoding for the
087: * specified character.
088: */
089: public char[] encode(char c) {
090: switch (c) {
091: case 0x22:
092: return (ENCODE_QUOT); // (") ["]
093: case 0x26:
094: return (ENCODE_AMP); // (&) [&]
095: case 0x27:
096: return (ENCODE_APOS); // (') [']
097: case 0x3c:
098: return (ENCODE_LT); // (<) [<]
099: case 0x3e:
100: return (ENCODE_GT); // (>) [>]
101: default: {
102: if (c > 0xfff) {
103: char ret[] = { '&', '#', 'x',
104: ENCODE_HEX[c >> 0xc & 0xf],
105: ENCODE_HEX[c >> 0x8 & 0xf],
106: ENCODE_HEX[c >> 0x4 & 0xf],
107: ENCODE_HEX[c & 0xf], ';' };
108: return (ret);
109: }
110: if (c > 0xff) {
111: char ret[] = { '&', '#', 'x',
112: ENCODE_HEX[c >> 0x8 & 0xf],
113: ENCODE_HEX[c >> 0x4 & 0xf],
114: ENCODE_HEX[c & 0xf], ';' };
115: return (ret);
116: }
117: char ret[] = { '&', '#', 'x', ENCODE_HEX[c >> 0x4 & 0xf],
118: ENCODE_HEX[c & 0xf], ';' };
119: return (ret);
120: }
121: }
122: }
123: }
|