001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common Development
008: * and Distribution License("CDDL") (collectively, the "License"). You
009: * may not use this file except in compliance with the License. You can obtain
010: * a copy of the License at https://glassfish.dev.java.net/public/CDDL+GPL.html
011: * or glassfish/bootstrap/legal/LICENSE.txt. See the License for the specific
012: * language governing permissions and limitations under the License.
013: *
014: * When distributing the software, include this License Header Notice in each
015: * file and include the License file at glassfish/bootstrap/legal/LICENSE.txt.
016: * Sun designates this particular file as subject to the "Classpath" exception
017: * as provided by Sun in the GPL Version 2 section of the License file that
018: * accompanied this code. If applicable, add the following below the License
019: * Header, with the fields enclosed by brackets [] replaced by your own
020: * identifying information: "Portions Copyrighted [year]
021: * [name of copyright owner]"
022: *
023: * Contributor(s):
024: *
025: * If you wish your version of this file to be governed by only the CDDL or
026: * only the GPL Version 2, indicate your decision by adding "[Contributor]
027: * elects to include this software in this distribution under the [CDDL or GPL
028: * Version 2] license." If you don't indicate a single choice of license, a
029: * recipient has the option to distribute your version of this file under
030: * either the CDDL, the GPL Version 2 or to extend the choice of license to
031: * its licensees as provided above. However, if you add GPL Version 2 code
032: * and therefore, elected the GPL Version 2 license, then the option applies
033: * only if the new code is made subject to such option by the copyright
034: * holder.
035: */
036:
037: package com.sun.xml.bind.api.impl;
038:
039: import java.util.ArrayList;
040: import java.util.HashSet;
041: import java.util.List;
042:
043: /**
044: * Methods that convert strings into various formats.
045: *
046: * <p>
047: * What JAX-RPC name binding tells us is that even such basic method
048: * like "isLetter" can be different depending on the situation.
049: *
050: * For this reason, a whole lot of methods are made non-static,
051: * even though they look like they should be static.
052: */
053: class NameUtil {
054: protected boolean isPunct(char c) {
055: return c == '-' || c == '.' || c == ':' || c == '_'
056: || c == '\u00b7' || c == '\u0387' || c == '\u06dd'
057: || c == '\u06de';
058: }
059:
060: protected static boolean isDigit(char c) {
061: return c >= '0' && c <= '9' || Character.isDigit(c);
062: }
063:
064: protected static boolean isUpper(char c) {
065: return c >= 'A' && c <= 'Z' || Character.isUpperCase(c);
066: }
067:
068: protected static boolean isLower(char c) {
069: return c >= 'a' && c <= 'z' || Character.isLowerCase(c);
070: }
071:
072: protected boolean isLetter(char c) {
073: return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z'
074: || Character.isLetter(c);
075: }
076:
077: /**
078: * Capitalizes the first character of the specified string,
079: * and de-capitalize the rest of characters.
080: */
081: public String capitalize(String s) {
082: if (!isLower(s.charAt(0)))
083: return s;
084: StringBuilder sb = new StringBuilder(s.length());
085: sb.append(Character.toUpperCase(s.charAt(0)));
086: sb.append(s.substring(1).toLowerCase());
087: return sb.toString();
088: }
089:
090: // Precondition: s[start] is not punctuation
091: private int nextBreak(String s, int start) {
092: int n = s.length();
093:
094: char c1 = s.charAt(start);
095: int t1 = classify(c1);
096:
097: for (int i = start + 1; i < n; i++) {
098: // shift (c1,t1) into (c0,t0)
099: // char c0 = c1; --- conceptually, but c0 won't be used
100: int t0 = t1;
101:
102: c1 = s.charAt(i);
103: t1 = classify(c1);
104:
105: switch (actionTable[t0 * 5 + t1]) {
106: case ACTION_CHECK_PUNCT:
107: if (isPunct(c1))
108: return i;
109: break;
110: case ACTION_CHECK_C2:
111: if (i < n - 1) {
112: char c2 = s.charAt(i + 1);
113: if (isLower(c2))
114: return i;
115: }
116: break;
117: case ACTION_BREAK:
118: return i;
119: }
120: }
121: return -1;
122: }
123:
124: // the 5-category classification that we use in this code
125: // to find work breaks
126: static protected final int UPPER_LETTER = 0;
127: static protected final int LOWER_LETTER = 1;
128: static protected final int OTHER_LETTER = 2;
129: static protected final int DIGIT = 3;
130: static protected final int OTHER = 4;
131:
132: /**
133: * Look up table for actions.
134: * type0*5+type1 would yield the action to be taken.
135: */
136: private static final byte[] actionTable = new byte[5 * 5];
137:
138: // action constants. see nextBreak for the meaning
139: static private final byte ACTION_CHECK_PUNCT = 0;
140: static private final byte ACTION_CHECK_C2 = 1;
141: static private final byte ACTION_BREAK = 2;
142: static private final byte ACTION_NOBREAK = 3;
143:
144: /**
145: * Decide the action to be taken given
146: * the classification of the preceding character 't0' and
147: * the classification of the next character 't1'.
148: */
149: private static byte decideAction(int t0, int t1) {
150: if (t0 == OTHER && t1 == OTHER)
151: return ACTION_CHECK_PUNCT;
152: if (!xor(t0 == DIGIT, t1 == DIGIT))
153: return ACTION_BREAK;
154: if (t0 == LOWER_LETTER && t1 != LOWER_LETTER)
155: return ACTION_BREAK;
156: if (!xor(t0 <= OTHER_LETTER, t1 <= OTHER_LETTER))
157: return ACTION_BREAK;
158: if (!xor(t0 == OTHER_LETTER, t1 == OTHER_LETTER))
159: return ACTION_BREAK;
160:
161: if (t0 == UPPER_LETTER && t1 == UPPER_LETTER)
162: return ACTION_CHECK_C2;
163:
164: return ACTION_NOBREAK;
165: }
166:
167: private static boolean xor(boolean x, boolean y) {
168: return (x && y) || (!x && !y);
169: }
170:
171: static {
172: // initialize the action table
173: for (int t0 = 0; t0 < 5; t0++)
174: for (int t1 = 0; t1 < 5; t1++)
175: actionTable[t0 * 5 + t1] = decideAction(t0, t1);
176: }
177:
178: /**
179: * Classify a character into 5 categories that determine the word break.
180: */
181: protected int classify(char c0) {
182: switch (Character.getType(c0)) {
183: case Character.UPPERCASE_LETTER:
184: return UPPER_LETTER;
185: case Character.LOWERCASE_LETTER:
186: return LOWER_LETTER;
187: case Character.TITLECASE_LETTER:
188: case Character.MODIFIER_LETTER:
189: case Character.OTHER_LETTER:
190: return OTHER_LETTER;
191: case Character.DECIMAL_DIGIT_NUMBER:
192: return DIGIT;
193: default:
194: return OTHER;
195: }
196: }
197:
198: /**
199: * Tokenizes a string into words and capitalizes the first
200: * character of each word.
201: *
202: * <p>
203: * This method uses a change in character type as a splitter
204: * of two words. For example, "abc100ghi" will be splitted into
205: * {"Abc", "100","Ghi"}.
206: */
207: public List<String> toWordList(String s) {
208: ArrayList<String> ss = new ArrayList<String>();
209: int n = s.length();
210: for (int i = 0; i < n;) {
211:
212: // Skip punctuation
213: while (i < n) {
214: if (!isPunct(s.charAt(i)))
215: break;
216: i++;
217: }
218: if (i >= n)
219: break;
220:
221: // Find next break and collect word
222: int b = nextBreak(s, i);
223: String w = (b == -1) ? s.substring(i) : s.substring(i, b);
224: ss.add(escape(capitalize(w)));
225: if (b == -1)
226: break;
227: i = b;
228: }
229:
230: // we can't guarantee a valid Java identifier anyway,
231: // so there's not much point in rejecting things in this way.
232: // if (ss.size() == 0)
233: // throw new IllegalArgumentException("Zero-length identifier");
234: return ss;
235: }
236:
237: protected String toMixedCaseName(List<String> ss, boolean startUpper) {
238: StringBuilder sb = new StringBuilder();
239: if (!ss.isEmpty()) {
240: sb.append(startUpper ? ss.get(0) : ss.get(0).toLowerCase());
241: for (int i = 1; i < ss.size(); i++)
242: sb.append(ss.get(i));
243: }
244: return sb.toString();
245: }
246:
247: protected String toMixedCaseVariableName(String[] ss,
248: boolean startUpper, boolean cdrUpper) {
249: if (cdrUpper)
250: for (int i = 1; i < ss.length; i++)
251: ss[i] = capitalize(ss[i]);
252: StringBuilder sb = new StringBuilder();
253: if (ss.length > 0) {
254: sb.append(startUpper ? ss[0] : ss[0].toLowerCase());
255: for (int i = 1; i < ss.length; i++)
256: sb.append(ss[i]);
257: }
258: return sb.toString();
259: }
260:
261: /**
262: * Formats a string into "THIS_KIND_OF_FORMAT_ABC_DEF".
263: *
264: * @return
265: * Always return a string but there's no guarantee that
266: * the generated code is a valid Java identifier.
267: */
268: public String toConstantName(String s) {
269: return toConstantName(toWordList(s));
270: }
271:
272: /**
273: * Formats a string into "THIS_KIND_OF_FORMAT_ABC_DEF".
274: *
275: * @return
276: * Always return a string but there's no guarantee that
277: * the generated code is a valid Java identifier.
278: */
279: public String toConstantName(List<String> ss) {
280: StringBuilder sb = new StringBuilder();
281: if (!ss.isEmpty()) {
282: sb.append(ss.get(0).toUpperCase());
283: for (int i = 1; i < ss.size(); i++) {
284: sb.append('_');
285: sb.append(ss.get(i).toUpperCase());
286: }
287: }
288: return sb.toString();
289: }
290:
291: /**
292: * Escapes characters is the given string so that they can be
293: * printed by only using US-ASCII characters.
294: *
295: * The escaped characters will be appended to the given
296: * StringBuffer.
297: *
298: * @param sb
299: * StringBuffer that receives escaped string.
300: * @param s
301: * String to be escaped. <code>s.substring(start)</code>
302: * will be escaped and copied to the string buffer.
303: */
304: public static void escape(StringBuilder sb, String s, int start) {
305: int n = s.length();
306: for (int i = start; i < n; i++) {
307: char c = s.charAt(i);
308: if (Character.isJavaIdentifierPart(c))
309: sb.append(c);
310: else {
311: sb.append('_');
312: if (c <= '\u000f')
313: sb.append("000");
314: else if (c <= '\u00ff')
315: sb.append("00");
316: else if (c <= '\u0fff')
317: sb.append('0');
318: sb.append(Integer.toString(c, 16));
319: }
320: }
321: }
322:
323: /**
324: * Escapes characters that are unusable as Java identifiers
325: * by replacing unsafe characters with safe characters.
326: */
327: private static String escape(String s) {
328: int n = s.length();
329: for (int i = 0; i < n; i++)
330: if (!Character.isJavaIdentifierPart(s.charAt(i))) {
331: StringBuilder sb = new StringBuilder(s.substring(0, i));
332: escape(sb, s, i);
333: return sb.toString();
334: }
335: return s;
336: }
337:
338: /**
339: * Checks if a given string is usable as a Java identifier.
340: */
341: public static boolean isJavaIdentifier(String s) {
342: if (s.length() == 0)
343: return false;
344: if (reservedKeywords.contains(s))
345: return false;
346:
347: if (!Character.isJavaIdentifierStart(s.charAt(0)))
348: return false;
349:
350: for (int i = 1; i < s.length(); i++)
351: if (!Character.isJavaIdentifierPart(s.charAt(i)))
352: return false;
353:
354: return true;
355: }
356:
357: /**
358: * Checks if the given string is a valid Java package name.
359: */
360: public static boolean isJavaPackageName(String s) {
361: while (s.length() != 0) {
362: int idx = s.indexOf('.');
363: if (idx == -1)
364: idx = s.length();
365: if (!isJavaIdentifier(s.substring(0, idx)))
366: return false;
367:
368: s = s.substring(idx);
369: if (s.length() != 0)
370: s = s.substring(1); // remove '.'
371: }
372: return true;
373: }
374:
375: /** All reserved keywords of Java. */
376: private static HashSet<String> reservedKeywords = new HashSet<String>();
377:
378: static {
379: // see http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
380: String[] words = new String[] { "abstract", "boolean", "break",
381: "byte", "case", "catch", "char", "class", "const",
382: "continue", "default", "do", "double", "else",
383: "extends", "final", "finally", "float", "for", "goto",
384: "if", "implements", "import", "instanceof", "int",
385: "interface", "long", "native", "new", "package",
386: "private", "protected", "public", "return", "short",
387: "static", "strictfp", "super", "switch",
388: "synchronized", "this", "throw", "throws", "transient",
389: "try", "void", "volatile", "while",
390:
391: // technically these are not reserved words but they cannot be used as identifiers.
392: "true", "false", "null",
393:
394: // and I believe assert is also a new keyword
395: "assert",
396:
397: // and 5.0 keywords
398: "enum" };
399: for (String word : words)
400: reservedKeywords.add(word);
401: }
402: }
|