001: /*
002: * $Header: /cvsroot/webman-cms/source/webman/com/teamkonzept/lib/TKHtmlConverter.java,v 1.5 2000/05/22 15:01:20 careck Exp $
003: *
004: */
005: package com.teamkonzept.lib;
006:
007: import java.util.*;
008:
009: /**
010: Konvertierungsklasse fuer HTML-Text-Codierung
011: */
012: public class TKHtmlConverter extends TKConverter {
013: public final static String CONV_ID = "HTML";
014: public final static String CONV_NAME = "HTML-ISO-8859_1";
015:
016: public String getName() {
017: return CONV_NAME;
018: }
019:
020: public final static String[] HTML_CODES = {
021: /* 0*/null, null, null, null, null, null, null, null, null, null,
022: /* 10*/null, null, null, null, null, null, null, null, null, null,
023: /* 20*/null, null, null, null, null, null, null, null, null, null,
024: /* 30*/null, null, null, null, "quot", null, null, null, "amp",
025: null,
026: /* 40*/null, null, null, null, null, null, null, null,
027: null, null,
028: /* 50*/null, null, null, null, null, null, null, null,
029: null, null,
030: /* 60*/"lt", null, "gt", null, null, null, null, null,
031: null, null,
032: /* 70*/null, null, null, null, null, null, null, null,
033: null, null,
034: /* 80*/null, null, null, null, null, null, null, null,
035: null, null,
036: /* 90*/null, null, null, null, null, null, null, null,
037: null, null,
038: /*100*/null, null, null, null, null, null, null, null,
039: null, null,
040: /*110*/null, null, null, null, null, null, null, null,
041: null, null,
042: /*120*/null, null, null, null, null, null, null, null,
043: null, null,
044: /*130*/null, null, /*"quot"*/null, null, null, null,
045: null, null, null, null,
046: /*140*/null, null, null, null, null, /*"#39"*/null, /*"#39*/
047: null, null, null, null,
048: /*150*/null, null, null, null, null, null, null, null,
049: null, null,
050: /*160*/"nbsp", null, null, null, null, null, null, null,
051: null, null,
052: /*170*/null, null, null, "shy", null, null, null, null,
053: null, null,
054: /*180*/null, null, null, null, null, null, null, null,
055: null, null,
056: /*190*/null, null, "Agrave", "Aacute", "Acirc", "Atilde",
057: "Auml", "Aring", "AElig", "Ccedil",
058: /*200*/"Egrave", "Eacute", "Ecirc", "Euml", "Igrave",
059: "Iacute", "Icirc", "Iuml", "ETH", "Ntilde",
060: /*210*/"Ograve", "Oacute", "Ocirc", "Otilde", "Ouml",
061: null, "Oslash", "Ugrave", "Uacute", "Ucirc",
062: /*220*/"Uuml", "Yacute", "THORN", "szlig", "agrave",
063: "aacute", "acirc", "atilde", "auml", "aring",
064: /*230*/"aelig", "ccedil", "egrave", "eacute", "ecirc",
065: "euml", "igrave", "iacute", "icirc", "iuml",
066: /*240*/"eth", "ntilde", "ograve", "oacute", "ocirc",
067: "otilde", "ouml", null, "oslash", "ugrave",
068: /*250*/"uacute", "ucirc", "uuml", "yacute", "thorn",
069: "yuml" };
070:
071: public final static byte AMPERCENT = (byte) '&';
072: public final static byte HASH = (byte) '#';
073: public final static byte SEMICOLON = (byte) ';';
074: public static Hashtable NAME_HASH = null;
075:
076: public int getMaxBytesPerChar() {
077: return 8;
078: }
079:
080: public int minCharSize(int byteCount) {
081: return byteCount;
082: }
083:
084: public synchronized Hashtable getNameHash() {
085: if (NAME_HASH != null)
086: return NAME_HASH;
087: NAME_HASH = new Hashtable(38);
088: String html;
089: for (int i = 0; i <= 255; i++) {
090: if ((html = HTML_CODES[i]) != null)
091: NAME_HASH.put(html, new Integer(i));
092: }
093: return NAME_HASH;
094: }
095:
096: public int charsToBytes(char src[], byte dst[], int srcBegin,
097: int length, int dstBegin) {
098: int lastPos = srcBegin + length;
099: int firstPos = dstBegin;
100: for (int i = srcBegin; i < lastPos; i++) {
101: char c = src[i];
102: byte b = (byte) c;
103: int code = (int) c;
104: String subst = HTML_CODES[code];
105: if (subst == null) {
106: //if( c >= ' ' && c <='\u0080' ) {
107: if (c <= '\u0080') {
108: dst[dstBegin++] = b;
109: } else if (c < '\u0100') {
110: dst[dstBegin++] = AMPERCENT;
111: dst[dstBegin++] = HASH;
112: dst[dstBegin++] = (byte) Character.forDigit(
113: (code / 100), 10);
114: dst[dstBegin++] = (byte) Character.forDigit(
115: (code % 100 / 10), 10);
116: dst[dstBegin++] = (byte) Character.forDigit(
117: (code % 10), 10);
118: dst[dstBegin++] = SEMICOLON;
119: }
120: } else {
121: dst[dstBegin++] = AMPERCENT;
122: int len = subst.length();
123: subst.getBytes(0, len, dst, dstBegin);
124: dstBegin += len;
125: dst[dstBegin++] = SEMICOLON;
126: }
127: }
128:
129: return dstBegin - firstPos;
130: }
131:
132: public int bytesToChars(byte src[], char dst[], int srcBegin,
133: int length, int dstBegin) {
134: int lastPos = srcBegin + length;
135: int firstPos = dstBegin;
136: int i = srcBegin;
137: Hashtable nameHash = getNameHash();
138:
139: while (i < lastPos) {
140: byte b = src[i++];
141: if (b == AMPERCENT) {
142: int startPos = i;
143: while (src[i] != SEMICOLON)
144: i++;
145: if (src[i] == HASH) {
146: String decStr = new String(src, 0, startPos + 1, i
147: - startPos - 1);
148: dst[dstBegin++] = (char) Integer.parseInt(decStr);
149: } else {
150: String code = new String(src, 0, startPos, i
151: - startPos);
152: dst[dstBegin++] = (char) ((Integer) nameHash
153: .get(code)).intValue();
154: }
155: i++;
156: } else {
157: dst[dstBegin++] = (char) b;
158: }
159: }
160: return dstBegin - firstPos;
161: }
162:
163: }
|