001: /*
002: * Copyright 2006 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: package com.sun.xml.internal.bind.api.impl;
027:
028: import java.util.ArrayList;
029: import java.util.HashSet;
030: import java.util.List;
031:
032: /**
033: * Methods that convert strings into various formats.
034: *
035: * <p>
036: * What JAX-RPC name binding tells us is that even such basic method
037: * like "isLetter" can be different depending on the situation.
038: *
039: * For this reason, a whole lot of methods are made non-static,
040: * even though they look like they should be static.
041: */
042: class NameUtil {
043: protected boolean isPunct(char c) {
044: return c == '-' || c == '.' || c == ':' || c == '_'
045: || c == '\u00b7' || c == '\u0387' || c == '\u06dd'
046: || c == '\u06de';
047: }
048:
049: protected static boolean isDigit(char c) {
050: return c >= '0' && c <= '9' || Character.isDigit(c);
051: }
052:
053: protected static boolean isUpper(char c) {
054: return c >= 'A' && c <= 'Z' || Character.isUpperCase(c);
055: }
056:
057: protected static boolean isLower(char c) {
058: return c >= 'a' && c <= 'z' || Character.isLowerCase(c);
059: }
060:
061: protected boolean isLetter(char c) {
062: return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z'
063: || Character.isLetter(c);
064: }
065:
066: /**
067: * Capitalizes the first character of the specified string,
068: * and de-capitalize the rest of characters.
069: */
070: public String capitalize(String s) {
071: if (!isLower(s.charAt(0)))
072: return s;
073: StringBuilder sb = new StringBuilder(s.length());
074: sb.append(Character.toUpperCase(s.charAt(0)));
075: sb.append(s.substring(1).toLowerCase());
076: return sb.toString();
077: }
078:
079: // Precondition: s[start] is not punctuation
080: private int nextBreak(String s, int start) {
081: int n = s.length();
082:
083: char c1 = s.charAt(start);
084: int t1 = classify(c1);
085:
086: for (int i = start + 1; i < n; i++) {
087: // shift (c1,t1) into (c0,t0)
088: // char c0 = c1; --- conceptually, but c0 won't be used
089: int t0 = t1;
090:
091: c1 = s.charAt(i);
092: t1 = classify(c1);
093:
094: switch (actionTable[t0 * 5 + t1]) {
095: case ACTION_CHECK_PUNCT:
096: if (isPunct(c1))
097: return i;
098: break;
099: case ACTION_CHECK_C2:
100: if (i < n - 1) {
101: char c2 = s.charAt(i + 1);
102: if (isLower(c2))
103: return i;
104: }
105: break;
106: case ACTION_BREAK:
107: return i;
108: }
109: }
110: return -1;
111: }
112:
113: // the 5-category classification that we use in this code
114: // to find work breaks
115: static protected final int UPPER_LETTER = 0;
116: static protected final int LOWER_LETTER = 1;
117: static protected final int OTHER_LETTER = 2;
118: static protected final int DIGIT = 3;
119: static protected final int OTHER = 4;
120:
121: /**
122: * Look up table for actions.
123: * type0*5+type1 would yield the action to be taken.
124: */
125: private static final byte[] actionTable = new byte[5 * 5];
126:
127: // action constants. see nextBreak for the meaning
128: static private final byte ACTION_CHECK_PUNCT = 0;
129: static private final byte ACTION_CHECK_C2 = 1;
130: static private final byte ACTION_BREAK = 2;
131: static private final byte ACTION_NOBREAK = 3;
132:
133: /**
134: * Decide the action to be taken given
135: * the classification of the preceding character 't0' and
136: * the classification of the next character 't1'.
137: */
138: private static byte decideAction(int t0, int t1) {
139: if (t0 == OTHER && t1 == OTHER)
140: return ACTION_CHECK_PUNCT;
141: if (!xor(t0 == DIGIT, t1 == DIGIT))
142: return ACTION_BREAK;
143: if (t0 == LOWER_LETTER && t1 != LOWER_LETTER)
144: return ACTION_BREAK;
145: if (!xor(t0 <= OTHER_LETTER, t1 <= OTHER_LETTER))
146: return ACTION_BREAK;
147: if (!xor(t0 == OTHER_LETTER, t1 == OTHER_LETTER))
148: return ACTION_BREAK;
149:
150: if (t0 == UPPER_LETTER && t1 == UPPER_LETTER)
151: return ACTION_CHECK_C2;
152:
153: return ACTION_NOBREAK;
154: }
155:
156: private static boolean xor(boolean x, boolean y) {
157: return (x && y) || (!x && !y);
158: }
159:
160: static {
161: // initialize the action table
162: for (int t0 = 0; t0 < 5; t0++)
163: for (int t1 = 0; t1 < 5; t1++)
164: actionTable[t0 * 5 + t1] = decideAction(t0, t1);
165: }
166:
167: /**
168: * Classify a character into 5 categories that determine the word break.
169: */
170: protected int classify(char c0) {
171: switch (Character.getType(c0)) {
172: case Character.UPPERCASE_LETTER:
173: return UPPER_LETTER;
174: case Character.LOWERCASE_LETTER:
175: return LOWER_LETTER;
176: case Character.TITLECASE_LETTER:
177: case Character.MODIFIER_LETTER:
178: case Character.OTHER_LETTER:
179: return OTHER_LETTER;
180: case Character.DECIMAL_DIGIT_NUMBER:
181: return DIGIT;
182: default:
183: return OTHER;
184: }
185: }
186:
187: /**
188: * Tokenizes a string into words and capitalizes the first
189: * character of each word.
190: *
191: * <p>
192: * This method uses a change in character type as a splitter
193: * of two words. For example, "abc100ghi" will be splitted into
194: * {"Abc", "100","Ghi"}.
195: */
196: public List<String> toWordList(String s) {
197: ArrayList<String> ss = new ArrayList<String>();
198: int n = s.length();
199: for (int i = 0; i < n;) {
200:
201: // Skip punctuation
202: while (i < n) {
203: if (!isPunct(s.charAt(i)))
204: break;
205: i++;
206: }
207: if (i >= n)
208: break;
209:
210: // Find next break and collect word
211: int b = nextBreak(s, i);
212: String w = (b == -1) ? s.substring(i) : s.substring(i, b);
213: ss.add(escape(capitalize(w)));
214: if (b == -1)
215: break;
216: i = b;
217: }
218:
219: // we can't guarantee a valid Java identifier anyway,
220: // so there's not much point in rejecting things in this way.
221: // if (ss.size() == 0)
222: // throw new IllegalArgumentException("Zero-length identifier");
223: return ss;
224: }
225:
226: protected String toMixedCaseName(List<String> ss, boolean startUpper) {
227: StringBuilder sb = new StringBuilder();
228: if (!ss.isEmpty()) {
229: sb.append(startUpper ? ss.get(0) : ss.get(0).toLowerCase());
230: for (int i = 1; i < ss.size(); i++)
231: sb.append(ss.get(i));
232: }
233: return sb.toString();
234: }
235:
236: protected String toMixedCaseVariableName(String[] ss,
237: boolean startUpper, boolean cdrUpper) {
238: if (cdrUpper)
239: for (int i = 1; i < ss.length; i++)
240: ss[i] = capitalize(ss[i]);
241: StringBuilder sb = new StringBuilder();
242: if (ss.length > 0) {
243: sb.append(startUpper ? ss[0] : ss[0].toLowerCase());
244: for (int i = 1; i < ss.length; i++)
245: sb.append(ss[i]);
246: }
247: return sb.toString();
248: }
249:
250: /**
251: * Formats a string into "THIS_KIND_OF_FORMAT_ABC_DEF".
252: *
253: * @return
254: * Always return a string but there's no guarantee that
255: * the generated code is a valid Java identifier.
256: */
257: public String toConstantName(String s) {
258: return toConstantName(toWordList(s));
259: }
260:
261: /**
262: * Formats a string into "THIS_KIND_OF_FORMAT_ABC_DEF".
263: *
264: * @return
265: * Always return a string but there's no guarantee that
266: * the generated code is a valid Java identifier.
267: */
268: public String toConstantName(List<String> ss) {
269: StringBuilder sb = new StringBuilder();
270: if (!ss.isEmpty()) {
271: sb.append(ss.get(0).toUpperCase());
272: for (int i = 1; i < ss.size(); i++) {
273: sb.append('_');
274: sb.append(ss.get(i).toUpperCase());
275: }
276: }
277: return sb.toString();
278: }
279:
280: /**
281: * Escapes characters is the given string so that they can be
282: * printed by only using US-ASCII characters.
283: *
284: * The escaped characters will be appended to the given
285: * StringBuffer.
286: *
287: * @param sb
288: * StringBuffer that receives escaped string.
289: * @param s
290: * String to be escaped. <code>s.substring(start)</code>
291: * will be escaped and copied to the string buffer.
292: */
293: public static void escape(StringBuilder sb, String s, int start) {
294: int n = s.length();
295: for (int i = start; i < n; i++) {
296: char c = s.charAt(i);
297: if (Character.isJavaIdentifierPart(c))
298: sb.append(c);
299: else {
300: sb.append('_');
301: if (c <= '\u000f')
302: sb.append("000");
303: else if (c <= '\u00ff')
304: sb.append("00");
305: else if (c <= '\u0fff')
306: sb.append('0');
307: sb.append(Integer.toString(c, 16));
308: }
309: }
310: }
311:
312: /**
313: * Escapes characters that are unusable as Java identifiers
314: * by replacing unsafe characters with safe characters.
315: */
316: private static String escape(String s) {
317: int n = s.length();
318: for (int i = 0; i < n; i++)
319: if (!Character.isJavaIdentifierPart(s.charAt(i))) {
320: StringBuilder sb = new StringBuilder(s.substring(0, i));
321: escape(sb, s, i);
322: return sb.toString();
323: }
324: return s;
325: }
326:
327: /**
328: * Checks if a given string is usable as a Java identifier.
329: */
330: public static boolean isJavaIdentifier(String s) {
331: if (s.length() == 0)
332: return false;
333: if (reservedKeywords.contains(s))
334: return false;
335:
336: if (!Character.isJavaIdentifierStart(s.charAt(0)))
337: return false;
338:
339: for (int i = 1; i < s.length(); i++)
340: if (!Character.isJavaIdentifierPart(s.charAt(i)))
341: return false;
342:
343: return true;
344: }
345:
346: /**
347: * Checks if the given string is a valid Java package name.
348: */
349: public static boolean isJavaPackageName(String s) {
350: while (s.length() != 0) {
351: int idx = s.indexOf('.');
352: if (idx == -1)
353: idx = s.length();
354: if (!isJavaIdentifier(s.substring(0, idx)))
355: return false;
356:
357: s = s.substring(idx);
358: if (s.length() != 0)
359: s = s.substring(1); // remove '.'
360: }
361: return true;
362: }
363:
364: /** All reserved keywords of Java. */
365: private static HashSet<String> reservedKeywords = new HashSet<String>();
366:
367: static {
368: // see http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
369: String[] words = new String[] { "abstract", "boolean", "break",
370: "byte", "case", "catch", "char", "class", "const",
371: "continue", "default", "do", "double", "else",
372: "extends", "final", "finally", "float", "for", "goto",
373: "if", "implements", "import", "instanceof", "int",
374: "interface", "long", "native", "new", "package",
375: "private", "protected", "public", "return", "short",
376: "static", "strictfp", "super", "switch",
377: "synchronized", "this", "throw", "throws", "transient",
378: "try", "void", "volatile", "while",
379:
380: // technically these are not reserved words but they cannot be used as identifiers.
381: "true", "false", "null",
382:
383: // and I believe assert is also a new keyword
384: "assert",
385:
386: // and 5.0 keywords
387: "enum" };
388: for (String word : words)
389: reservedKeywords.add(word);
390: }
391: }
|