0001: //##header
0002: /*
0003: *******************************************************************************
0004: * Copyright (C) 2002-2006, International Business Machines Corporation and *
0005: * others. All Rights Reserved. *
0006: *******************************************************************************
0007: */
0008: //#ifndef FOUNDATION
0009: package com.ibm.icu.dev.test.util;
0010:
0011: import java.io.BufferedReader;
0012: import java.io.BufferedWriter;
0013: import java.io.File;
0014: import java.io.FileInputStream;
0015: import java.io.FileOutputStream;
0016: import java.io.IOException;
0017: import java.io.InputStreamReader;
0018: import java.io.OutputStreamWriter;
0019: import java.io.PrintWriter;
0020: import java.io.StringWriter;
0021: import java.text.MessageFormat;
0022: import java.util.Collection;
0023: import java.util.HashMap;
0024: import java.util.HashSet;
0025: import java.util.Locale;
0026: import java.util.Map;
0027:
0028: import com.ibm.icu.impl.Utility;
0029: import com.ibm.icu.text.NumberFormat;
0030: import com.ibm.icu.text.Transliterator;
0031: import com.ibm.icu.text.UTF16;
0032: import com.ibm.icu.text.UnicodeSet;
0033:
0034: public class BagFormatter {
0035: static final boolean DEBUG = false;
0036: public static final boolean SHOW_FILES;
0037: static {
0038: boolean showFiles = false;
0039: try {
0040: showFiles = System.getProperty("SHOW_FILES") != null;
0041: } catch (SecurityException e) {
0042: }
0043: SHOW_FILES = showFiles;
0044: }
0045:
0046: public static final PrintWriter CONSOLE = new PrintWriter(
0047: System.out, true);
0048:
0049: private static PrintWriter log = CONSOLE;
0050:
0051: private boolean abbreviated = false;
0052: private String separator = ",";
0053: private String prefix = "[";
0054: private String suffix = "]";
0055: private UnicodeProperty.Factory source;
0056: private UnicodeLabel nameSource;
0057: private UnicodeLabel labelSource;
0058: private UnicodeLabel rangeBreakSource;
0059: private UnicodeLabel valueSource;
0060: private String propName = "";
0061: private boolean showCount = true;
0062: private boolean skipNullValues = true;
0063: //private boolean suppressReserved = true;
0064: private boolean hexValue = false;
0065: private static final String NULL_VALUE = "_NULL_VALUE_";
0066: private int fullTotal = -1;
0067: private boolean showTotal = true;
0068: private String lineSeparator = "\r\n";
0069: private Tabber tabber = new Tabber.MonoTabber();
0070:
0071: /**
0072: * Compare two UnicodeSets, and show the differences
0073: * @param name1 name of first set to be compared
0074: * @param set1 first set
0075: * @param name2 name of second set to be compared
0076: * @param set2 second set
0077: * @return formatted string
0078: */
0079: public String showSetDifferences(String name1, UnicodeSet set1,
0080: String name2, UnicodeSet set2) {
0081:
0082: StringWriter result = new StringWriter();
0083: showSetDifferences(new PrintWriter(result), name1, set1, name2,
0084: set2);
0085: result.flush();
0086: return result.getBuffer().toString();
0087: }
0088:
0089: public String showSetDifferences(String name1, Collection set1,
0090: String name2, Collection set2) {
0091:
0092: StringWriter result = new StringWriter();
0093: showSetDifferences(new PrintWriter(result), name1, set1, name2,
0094: set2);
0095: result.flush();
0096: return result.getBuffer().toString();
0097: }
0098:
0099: public void showSetDifferences(PrintWriter pw, String name1,
0100: UnicodeSet set1, String name2, UnicodeSet set2) {
0101: showSetDifferences(pw, name1, set1, name2, set2, -1);
0102: }
0103:
0104: /**
0105: * Compare two UnicodeSets, and show the differences
0106: * @param name1 name of first set to be compared
0107: * @param set1 first set
0108: * @param name2 name of second set to be compared
0109: * @param set2 second set
0110: */
0111: public void showSetDifferences(PrintWriter pw, String name1,
0112: UnicodeSet set1, String name2, UnicodeSet set2, int flags) {
0113: if (pw == null)
0114: pw = CONSOLE;
0115: String[] names = { name1, name2 };
0116:
0117: UnicodeSet temp;
0118:
0119: if ((flags & 1) != 0) {
0120: temp = new UnicodeSet(set1).removeAll(set2);
0121: pw.print(lineSeparator);
0122: pw.print(inOut.format(names));
0123: pw.print(lineSeparator);
0124: showSetNames(pw, temp);
0125: }
0126:
0127: if ((flags & 2) != 0) {
0128: temp = new UnicodeSet(set2).removeAll(set1);
0129: pw.print(lineSeparator);
0130: pw.print(outIn.format(names));
0131: pw.print(lineSeparator);
0132: showSetNames(pw, temp);
0133: }
0134:
0135: if ((flags & 4) != 0) {
0136: temp = new UnicodeSet(set2).retainAll(set1);
0137: pw.print(lineSeparator);
0138: pw.print(inIn.format(names));
0139: pw.print(lineSeparator);
0140: showSetNames(pw, temp);
0141: }
0142: pw.flush();
0143: }
0144:
0145: public void showSetDifferences(PrintWriter pw, String name1,
0146: Collection set1, String name2, Collection set2) {
0147:
0148: if (pw == null)
0149: pw = CONSOLE;
0150: String[] names = { name1, name2 };
0151: // damn'd collection doesn't have a clone, so
0152: // we go with Set, even though that
0153: // may not preserve order and duplicates
0154: Collection temp = new HashSet(set1);
0155: temp.removeAll(set2);
0156: pw.println();
0157: pw.println(inOut.format(names));
0158: showSetNames(pw, temp);
0159:
0160: temp.clear();
0161: temp.addAll(set2);
0162: temp.removeAll(set1);
0163: pw.println();
0164: pw.println(outIn.format(names));
0165: showSetNames(pw, temp);
0166:
0167: temp.clear();
0168: temp.addAll(set1);
0169: temp.retainAll(set2);
0170: pw.println();
0171: pw.println(inIn.format(names));
0172: showSetNames(pw, temp);
0173: }
0174:
0175: /**
0176: * Returns a list of items in the collection, with each separated by the separator.
0177: * Each item must not be null; its toString() is called for a printable representation
0178: * @param c source collection
0179: * @return a String representation of the list
0180: * @internal
0181: */
0182: public String showSetNames(Object c) {
0183: StringWriter buffer = new StringWriter();
0184: PrintWriter output = new PrintWriter(buffer);
0185: showSetNames(output, c);
0186: return buffer.toString();
0187: }
0188:
0189: /**
0190: * Returns a list of items in the collection, with each separated by the separator.
0191: * Each item must not be null; its toString() is called for a printable representation
0192: * @param output destination to which to write names
0193: * @param c source collection
0194: * @internal
0195: */
0196: public void showSetNames(PrintWriter output, Object c) {
0197: mainVisitor.doAt(c, output);
0198: output.flush();
0199: }
0200:
0201: /**
0202: * Returns a list of items in the collection, with each separated by the separator.
0203: * Each item must not be null; its toString() is called for a printable representation
0204: * @param filename destination to which to write names
0205: * @param c source collection
0206: * @internal
0207: */
0208: public void showSetNames(String filename, Object c)
0209: throws IOException {
0210: PrintWriter pw = new PrintWriter(new OutputStreamWriter(
0211: new FileOutputStream(filename), "utf-8"));
0212: showSetNames(log, c);
0213: pw.close();
0214: }
0215:
0216: public String getAbbreviatedName(String source, String pattern,
0217: String substitute) {
0218:
0219: int matchEnd = NameIterator.findMatchingEnd(source, pattern);
0220: int sdiv = source.length() - matchEnd;
0221: int pdiv = pattern.length() - matchEnd;
0222: StringBuffer result = new StringBuffer();
0223: addMatching(source.substring(0, sdiv), pattern.substring(0,
0224: pdiv), substitute, result);
0225: addMatching(source.substring(sdiv), pattern.substring(pdiv),
0226: substitute, result);
0227: return result.toString();
0228: }
0229:
0230: abstract public static class Relation {
0231: abstract public String getRelation(String a, String b);
0232: }
0233:
0234: static class NullRelation extends Relation {
0235: public String getRelation(String a, String b) {
0236: return "";
0237: }
0238: }
0239:
0240: private Relation r = new NullRelation();
0241:
0242: public BagFormatter setRelation(Relation r) {
0243: this .r = r;
0244: return this ; // for chaining
0245: }
0246:
0247: public Relation getRelation() {
0248: return r;
0249: }
0250:
0251: /*
0252: r.getRelati on(last, s) + quote(s) + "\t#" + UnicodeSetFormatter.getResolvedName(s)
0253: */
0254: /*
0255: static final UnicodeSet NO_NAME =
0256: new UnicodeSet("[\\u0080\\u0081\\u0084\\u0099\\p{Cn}\\p{Co}]");
0257: static final UnicodeSet HAS_NAME = new UnicodeSet(NO_NAME).complement();
0258: static final UnicodeSet NAME_CHARACTERS =
0259: new UnicodeSet("[A-Za-z0-9\\<\\>\\-\\ ]");
0260:
0261: public UnicodeSet getSetForName(String namePattern) {
0262: UnicodeSet result = new UnicodeSet();
0263: Matcher m = Pattern.compile(namePattern).matcher("");
0264: // check for no-name items, and add in bulk
0265: m.reset("<no name>");
0266: if (m.matches()) {
0267: result.addAll(NO_NAME);
0268: }
0269: // check all others
0270: UnicodeSetIterator usi = new UnicodeSetIterator(HAS_NAME);
0271: while (usi.next()) {
0272: String name = getName(usi.codepoint);
0273: if (name == null)
0274: continue;
0275: m.reset(name);
0276: if (m.matches()) {
0277: result.add(usi.codepoint);
0278: }
0279: }
0280: // Note: if Regex had some API so that if we could tell that
0281: // an initial substring couldn't match, e.g. "CJK IDEOGRAPH-"
0282: // then we could optimize by skipping whole swathes of characters
0283: return result;
0284: }
0285: */
0286:
0287: public BagFormatter setMergeRanges(boolean in) {
0288: mergeRanges = in;
0289: return this ;
0290: }
0291:
0292: public BagFormatter setShowSetAlso(boolean b) {
0293: showSetAlso = b;
0294: return this ;
0295: }
0296:
0297: public String getName(int codePoint) {
0298: return getName("", codePoint, codePoint);
0299: }
0300:
0301: public String getName(String separator, int start, int end) {
0302: if (getNameSource() == null
0303: || getNameSource() == UnicodeLabel.NULL)
0304: return "";
0305: String result = getName(start, false);
0306: if (start == end)
0307: return separator + result;
0308: String endString = getName(end, false);
0309: if (result.length() == 0 && endString.length() == 0)
0310: return separator;
0311: if (abbreviated)
0312: endString = getAbbreviatedName(endString, result, "~");
0313: return separator + result + ".." + endString;
0314: }
0315:
0316: public String getName(String s) {
0317: return getName(s, false);
0318: }
0319:
0320: public static class NameLabel extends UnicodeLabel {
0321: UnicodeProperty nameProp;
0322: UnicodeSet control;
0323: UnicodeSet private_use;
0324: UnicodeSet noncharacter;
0325: UnicodeSet surrogate;
0326:
0327: public NameLabel(UnicodeProperty.Factory source) {
0328: nameProp = source.getProperty("Name");
0329: control = source.getSet("gc=Cc");
0330: private_use = source.getSet("gc=Co");
0331: surrogate = source.getSet("gc=Cs");
0332: noncharacter = source.getSet("noncharactercodepoint=true");
0333: }
0334:
0335: public String getValue(int codePoint, boolean isShort) {
0336: String hcp = !isShort ? "U+" + Utility.hex(codePoint, 4)
0337: + " " : "";
0338: String result = nameProp.getValue(codePoint);
0339: if (result != null)
0340: return hcp + result;
0341: if (control.contains(codePoint))
0342: return "<control-" + Utility.hex(codePoint, 4) + ">";
0343: if (private_use.contains(codePoint))
0344: return "<private-use-" + Utility.hex(codePoint, 4)
0345: + ">";
0346: if (noncharacter.contains(codePoint))
0347: return "<noncharacter-" + Utility.hex(codePoint, 4)
0348: + ">";
0349: if (surrogate.contains(codePoint))
0350: return "<surrogate-" + Utility.hex(codePoint, 4) + ">";
0351: //if (suppressReserved) return "";
0352: return hcp + "<reserved-" + Utility.hex(codePoint, 4) + ">";
0353: }
0354:
0355: }
0356:
0357: // refactored
0358: public String getName(int codePoint, boolean withCodePoint) {
0359: String result = getNameSource().getValue(codePoint,
0360: !withCodePoint);
0361: return fixName == null ? result : fixName.transliterate(result);
0362: }
0363:
0364: public String getName(String s, boolean withCodePoint) {
0365: String result = getNameSource().getValue(s, separator,
0366: !withCodePoint);
0367: return fixName == null ? result : fixName.transliterate(result);
0368: }
0369:
0370: public String hex(String s) {
0371: return hex(s, separator);
0372: }
0373:
0374: public String hex(String s, String separator) {
0375: return UnicodeLabel.HEX.getValue(s, separator, true);
0376: }
0377:
0378: public String hex(int start, int end) {
0379: String s = Utility.hex(start, 4);
0380: if (start == end)
0381: return s;
0382: return s + ".." + Utility.hex(end, 4);
0383: }
0384:
0385: public BagFormatter setUnicodePropertyFactory(
0386: UnicodeProperty.Factory source) {
0387: this .source = source;
0388: return this ;
0389: }
0390:
0391: public UnicodeProperty.Factory getUnicodePropertyFactory() {
0392: if (source == null)
0393: source = ICUPropertyFactory.make();
0394: return source;
0395: }
0396:
0397: public BagFormatter() {
0398: }
0399:
0400: public BagFormatter(UnicodeProperty.Factory source) {
0401: setUnicodePropertyFactory(source);
0402: }
0403:
0404: public String join(Object o) {
0405: return labelVisitor.join(o);
0406: }
0407:
0408: // ===== PRIVATES =====
0409:
0410: private Join labelVisitor = new Join();
0411:
0412: private boolean mergeRanges = true;
0413: private Transliterator showLiteral = null;
0414: private Transliterator fixName = null;
0415: private boolean showSetAlso = false;
0416:
0417: private RangeFinder rf = new RangeFinder();
0418:
0419: private MessageFormat inOut = new MessageFormat(
0420: "In {0}, but not in {1}:");
0421: private MessageFormat outIn = new MessageFormat(
0422: "Not in {0}, but in {1}:");
0423: private MessageFormat inIn = new MessageFormat(
0424: "In both {0}, and in {1}:");
0425:
0426: private MyVisitor mainVisitor = new MyVisitor();
0427:
0428: /*
0429: private String getLabels(int start, int end) {
0430: Set names = new TreeSet();
0431: for (int cp = start; cp <= end; ++cp) {
0432: names.add(getLabel(cp));
0433: }
0434: return labelVisitor.join(names);
0435: }
0436: */
0437:
0438: private void addMatching(String source, String pattern,
0439: String substitute, StringBuffer result) {
0440: NameIterator n1 = new NameIterator(source);
0441: NameIterator n2 = new NameIterator(pattern);
0442: boolean first = true;
0443: while (true) {
0444: String s1 = n1.next();
0445: if (s1 == null)
0446: break;
0447: String s2 = n2.next();
0448: if (!first)
0449: result.append(" ");
0450: first = false;
0451: if (s1.equals(s2))
0452: result.append(substitute);
0453: else
0454: result.append(s1);
0455: }
0456: }
0457:
0458: private static NumberFormat nf = NumberFormat
0459: .getIntegerInstance(Locale.ENGLISH);
0460: static {
0461: nf.setGroupingUsed(false);
0462: }
0463:
0464: private class MyVisitor extends Visitor {
0465: private PrintWriter output;
0466: String commentSeparator;
0467: int counter;
0468: int valueSize;
0469: int labelSize;
0470:
0471: public void doAt(Object c, PrintWriter output) {
0472: this .output = output;
0473: counter = 0;
0474:
0475: tabber.clear();
0476: tabber.add(mergeRanges ? 14 : 6, Tabber.LEFT);
0477:
0478: if (propName.length() > 0)
0479: tabber.add(propName.length() + 2, Tabber.LEFT);
0480:
0481: valueSize = getValueSource().getMaxWidth(shortValue);
0482: if (DEBUG)
0483: System.out.println("ValueSize: " + valueSize);
0484: if (valueSize > 0)
0485: tabber.add(valueSize + 2, Tabber.LEFT); // value
0486:
0487: tabber.add(3, Tabber.LEFT); // comment character
0488:
0489: labelSize = getLabelSource(true).getMaxWidth(shortLabel);
0490: if (labelSize > 0)
0491: tabber.add(labelSize + 1, Tabber.LEFT); // value
0492:
0493: if (mergeRanges && showCount)
0494: tabber.add(5, Tabber.RIGHT);
0495:
0496: if (showLiteral != null)
0497: tabber.add(4, Tabber.LEFT);
0498: //myTabber.add(7,Tabber.LEFT);
0499:
0500: commentSeparator = (showCount || showLiteral != null
0501: || getLabelSource(true) != UnicodeLabel.NULL || getNameSource() != UnicodeLabel.NULL) ? "\t #"
0502: : "";
0503:
0504: if (DEBUG)
0505: System.out.println("Tabber: " + tabber.toString());
0506: if (DEBUG)
0507: System.out.println("Tabber: "
0508: + tabber.process("a\tb\td\td\tf\tg\th"));
0509: doAt(c);
0510: }
0511:
0512: public String format(Object o) {
0513: StringWriter sw = new StringWriter();
0514: PrintWriter pw = new PrintWriter(sw);
0515: doAt(o);
0516: pw.flush();
0517: String result = sw.getBuffer().toString();
0518: pw.close();
0519: return result;
0520: }
0521:
0522: protected void doBefore(Object container, Object o) {
0523: if (showSetAlso && container instanceof UnicodeSet) {
0524: output.print("#" + container + lineSeparator);
0525: }
0526: }
0527:
0528: protected void doBetween(Object container, Object lastItem,
0529: Object nextItem) {
0530: }
0531:
0532: protected void doAfter(Object container, Object o) {
0533: if (fullTotal != -1 && fullTotal != counter) {
0534: if (showTotal) {
0535: output.print(lineSeparator);
0536: output
0537: .print("# The above property value applies to "
0538: + nf.format(fullTotal - counter)
0539: + " code points not listed here."
0540: + lineSeparator);
0541: output.print("# Total code points: "
0542: + nf.format(fullTotal) + lineSeparator);
0543: }
0544: fullTotal = -1;
0545: } else if (showTotal) {
0546: output.print(lineSeparator);
0547: output.print("# Total code points: "
0548: + nf.format(counter) + lineSeparator);
0549: }
0550: }
0551:
0552: protected void doSimpleAt(Object o) {
0553: if (o instanceof Map.Entry) {
0554: Map.Entry oo = (Map.Entry) o;
0555: Object key = oo.getKey();
0556: Object value = oo.getValue();
0557: doBefore(o, key);
0558: doAt(key);
0559: output.print("->");
0560: doAt(value);
0561: doAfter(o, value);
0562: counter++;
0563: } else if (o instanceof Visitor.CodePointRange) {
0564: doAt((Visitor.CodePointRange) o);
0565: } else {
0566: String thing = o.toString();
0567: String value = getValueSource() == UnicodeLabel.NULL ? ""
0568: : getValueSource().getValue(thing, ",", true);
0569: if (value.length() != 0)
0570: value = "\t; " + value;
0571: String label = getLabelSource(true) == UnicodeLabel.NULL ? ""
0572: : getLabelSource(true).getValue(thing, ",",
0573: true);
0574: if (label.length() != 0)
0575: label = " " + label;
0576: output.print(tabber.process(hex(thing) + value
0577: + commentSeparator + label
0578: + insertLiteral(thing) + "\t" + getName(thing))
0579: + lineSeparator);
0580: counter++;
0581: }
0582: }
0583:
0584: protected void doAt(Visitor.CodePointRange usi) {
0585: if (!mergeRanges) {
0586: for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) {
0587: showLine(cp, cp);
0588: }
0589: } else {
0590: rf.reset(usi.codepoint, usi.codepointEnd + 1);
0591: while (rf.next()) {
0592: showLine(rf.start, rf.limit - 1);
0593: }
0594: }
0595: }
0596:
0597: private void showLine(int start, int end) {
0598: String label = getLabelSource(true).getValue(start,
0599: shortLabel);
0600: String value = getValue(start, shortValue);
0601: if (value == NULL_VALUE)
0602: return;
0603:
0604: counter += end - start + 1;
0605: String pn = propName;
0606: if (pn.length() != 0) {
0607: pn = "\t; " + pn;
0608: }
0609: if (valueSize > 0) {
0610: value = "\t; " + value;
0611: } else if (value.length() > 0) {
0612: throw new IllegalArgumentException("maxwidth bogus "
0613: + value + ","
0614: + getValueSource().getMaxWidth(shortValue));
0615: }
0616: if (labelSize > 0) {
0617: label = "\t" + label;
0618: } else if (label.length() > 0) {
0619: throw new IllegalArgumentException("maxwidth bogus "
0620: + label + ", "
0621: + getLabelSource(true).getMaxWidth(shortLabel));
0622: }
0623:
0624: String count = "";
0625: if (mergeRanges && showCount) {
0626: if (end == start)
0627: count = "\t";
0628: else
0629: count = "\t [" + nf.format(end - start + 1) + "]";
0630: }
0631:
0632: output.print(tabber.process(hex(start, end) + pn + value
0633: + commentSeparator + label + count
0634: + insertLiteral(start, end)
0635: + getName("\t ", start, end))
0636: + lineSeparator);
0637: }
0638:
0639: private String insertLiteral(String thing) {
0640: return (showLiteral == null ? "" : " \t("
0641: + showLiteral.transliterate(thing) + ") ");
0642: }
0643:
0644: private String insertLiteral(int start, int end) {
0645: return (showLiteral == null ? "" : " \t("
0646: + showLiteral.transliterate(UTF16.valueOf(start))
0647: + ((start != end) ? (".." + showLiteral
0648: .transliterate(UTF16.valueOf(end))) : "")
0649: + ") ");
0650: }
0651: /*
0652: private String insertLiteral(int cp) {
0653: return (showLiteral == null ? ""
0654: : " \t(" + showLiteral.transliterate(UTF16.valueOf(cp)) + ") ");
0655: }
0656: */
0657: }
0658:
0659: /**
0660: * Iterate through a string, breaking at words.
0661: * @author Davis
0662: */
0663: private static class NameIterator {
0664: String source;
0665: int position;
0666: int start;
0667: int limit;
0668:
0669: NameIterator(String source) {
0670: this .source = source;
0671: this .start = 0;
0672: this .limit = source.length();
0673: }
0674:
0675: /**
0676: * Find next word, including trailing spaces
0677: * @return the next word
0678: */
0679: String next() {
0680: if (position >= limit)
0681: return null;
0682: int pos = source.indexOf(' ', position);
0683: if (pos < 0 || pos >= limit)
0684: pos = limit;
0685: String result = source.substring(position, pos);
0686: position = pos + 1;
0687: return result;
0688: }
0689:
0690: static int findMatchingEnd(String s1, String s2) {
0691: int i = s1.length();
0692: int j = s2.length();
0693: try {
0694: while (true) {
0695: --i; // decrement both before calling function!
0696: --j;
0697: if (s1.charAt(i) != s2.charAt(j))
0698: break;
0699: }
0700: } catch (Exception e) {
0701: } // run off start
0702:
0703: ++i; // counteract increment
0704: i = s1.indexOf(' ', i); // move forward to space
0705: if (i < 0)
0706: return 0;
0707: return s1.length() - i;
0708: }
0709: }
0710:
0711: private class RangeFinder {
0712: int start, limit;
0713: private int veryLimit;
0714:
0715: //String label, value;
0716: void reset(int start, int limit) {
0717: this .limit = start;
0718: this .veryLimit = limit;
0719: }
0720:
0721: boolean next() {
0722: if (limit >= veryLimit)
0723: return false;
0724: start = limit; // set to end of last
0725: String label = getLabelSource(false).getValue(limit, true);
0726: String value = getValue(limit, true);
0727: String breaker = getRangeBreakSource()
0728: .getValue(limit, true);
0729: if (DEBUG && limit < 0x7F)
0730: System.out.println("Label: " + label + ", Value: "
0731: + value + ", Break: " + breaker);
0732: limit++;
0733: for (; limit < veryLimit; limit++) {
0734: String s = getLabelSource(false).getValue(limit, true);
0735: String v = getValue(limit, true);
0736: String b = getRangeBreakSource().getValue(limit, true);
0737: if (DEBUG && limit < 0x7F)
0738: System.out.println("*Label: " + label + ", Value: "
0739: + value + ", Break: " + breaker);
0740: if (!equalTo(s, label) || !equalTo(v, value)
0741: || !equalTo(b, breaker))
0742: break;
0743: }
0744: // at this point, limit is the first item that has a different label than source
0745: // OR, we got to the end, and limit == veryLimit
0746: return true;
0747: }
0748: }
0749:
0750: boolean equalTo(Object a, Object b) {
0751: if (a == b)
0752: return true;
0753: if (a == null)
0754: return false;
0755: return a.equals(b);
0756: }
0757:
0758: boolean shortLabel = true;
0759: boolean shortValue = true;
0760:
0761: public String getPrefix() {
0762: return prefix;
0763: }
0764:
0765: public String getSuffix() {
0766: return suffix;
0767: }
0768:
0769: public BagFormatter setPrefix(String string) {
0770: prefix = string;
0771: return this ;
0772: }
0773:
0774: public BagFormatter setSuffix(String string) {
0775: suffix = string;
0776: return this ;
0777: }
0778:
0779: public boolean isAbbreviated() {
0780: return abbreviated;
0781: }
0782:
0783: public BagFormatter setAbbreviated(boolean b) {
0784: abbreviated = b;
0785: return this ;
0786: }
0787:
0788: public UnicodeLabel getLabelSource(boolean visible) {
0789: if (labelSource == null) {
0790: Map labelMap = new HashMap();
0791: //labelMap.put("Lo","L&");
0792: labelMap.put("Lu", "L&");
0793: labelMap.put("Lt", "L&");
0794: labelMap.put("Ll", "L&");
0795: labelSource = new UnicodeProperty.FilteredProperty(
0796: getUnicodePropertyFactory().getProperty(
0797: "General_Category"),
0798: new UnicodeProperty.MapFilter(labelMap))
0799: .setAllowValueAliasCollisions(true);
0800: }
0801: return labelSource;
0802: }
0803:
0804: /**
0805: * @deprecated
0806: */
0807: public static void addAll(UnicodeSet source, Collection target) {
0808: source.addAllTo(target);
0809: }
0810:
0811: // UTILITIES
0812:
0813: public static final Transliterator hex = Transliterator
0814: .getInstance("[^\\u0009\\u0020-\\u007E\\u00A0-\\u00FF] hex");
0815:
0816: public static BufferedReader openUTF8Reader(String dir,
0817: String filename) throws IOException {
0818: return openReader(dir, filename, "UTF-8");
0819: }
0820:
0821: public static BufferedReader openReader(String dir,
0822: String filename, String encoding) throws IOException {
0823: File file = new File(dir + filename);
0824: if (SHOW_FILES && log != null) {
0825: log.println("Opening File: " + file.getCanonicalPath());
0826: }
0827: return new BufferedReader(new InputStreamReader(
0828: new FileInputStream(file), encoding), 4 * 1024);
0829: }
0830:
0831: public static PrintWriter openUTF8Writer(String dir, String filename)
0832: throws IOException {
0833: return openWriter(dir, filename, "UTF-8");
0834: }
0835:
0836: public static PrintWriter openWriter(String dir, String filename,
0837: String encoding) throws IOException {
0838: File file = new File(dir + filename);
0839: if (SHOW_FILES && log != null) {
0840: log.println("Creating File: " + file.getCanonicalPath());
0841: }
0842: String parentName = file.getParent();
0843: if (parentName != null) {
0844: File parent = new File(parentName);
0845: parent.mkdirs();
0846: }
0847: return new PrintWriter(new BufferedWriter(
0848: new OutputStreamWriter(new FileOutputStream(file),
0849: encoding), 4 * 1024));
0850: }
0851:
0852: public static PrintWriter getLog() {
0853: return log;
0854: }
0855:
0856: public BagFormatter setLog(PrintWriter writer) {
0857: log = writer;
0858: return this ;
0859: }
0860:
0861: public String getSeparator() {
0862: return separator;
0863: }
0864:
0865: public BagFormatter setSeparator(String string) {
0866: separator = string;
0867: return this ;
0868: }
0869:
0870: public Transliterator getShowLiteral() {
0871: return showLiteral;
0872: }
0873:
0874: public BagFormatter setShowLiteral(Transliterator transliterator) {
0875: showLiteral = transliterator;
0876: return this ;
0877: }
0878:
0879: // ===== CONVENIENCES =====
0880: private class Join extends Visitor {
0881: StringBuffer output = new StringBuffer();
0882: int depth = 0;
0883:
0884: String join(Object o) {
0885: output.setLength(0);
0886: doAt(o);
0887: return output.toString();
0888: }
0889:
0890: protected void doBefore(Object container, Object item) {
0891: ++depth;
0892: output.append(prefix);
0893: }
0894:
0895: protected void doAfter(Object container, Object item) {
0896: output.append(suffix);
0897: --depth;
0898: }
0899:
0900: protected void doBetween(Object container, Object lastItem,
0901: Object nextItem) {
0902: output.append(separator);
0903: }
0904:
0905: protected void doSimpleAt(Object o) {
0906: if (o != null)
0907: output.append(o.toString());
0908: }
0909: }
0910:
0911: /**
0912: * @param label
0913: */
0914: public BagFormatter setLabelSource(UnicodeLabel label) {
0915: if (label == null)
0916: label = UnicodeLabel.NULL;
0917: labelSource = label;
0918: return this ;
0919: }
0920:
0921: /**
0922: * @return the NameLable representing the source
0923: */
0924: public UnicodeLabel getNameSource() {
0925: if (nameSource == null) {
0926: nameSource = new NameLabel(getUnicodePropertyFactory());
0927: }
0928: return nameSource;
0929: }
0930:
0931: /**
0932: * @param label
0933: */
0934: public BagFormatter setNameSource(UnicodeLabel label) {
0935: if (label == null)
0936: label = UnicodeLabel.NULL;
0937: nameSource = label;
0938: return this ;
0939: }
0940:
0941: /**
0942: * @return the UnicodeLabel representing the value
0943: */
0944: public UnicodeLabel getValueSource() {
0945: if (valueSource == null)
0946: valueSource = UnicodeLabel.NULL;
0947: return valueSource;
0948: }
0949:
0950: private String getValue(int cp, boolean shortValue) {
0951: String result = getValueSource().getValue(cp, shortValue);
0952: if (result == null)
0953: return NULL_VALUE;
0954: if (hexValue)
0955: result = hex(result, " ");
0956: return result;
0957: }
0958:
0959: /**
0960: * @param label
0961: */
0962: public BagFormatter setValueSource(UnicodeLabel label) {
0963: if (label == null)
0964: label = UnicodeLabel.NULL;
0965: valueSource = label;
0966: return this ;
0967: }
0968:
0969: public BagFormatter setValueSource(String label) {
0970: return setValueSource(new UnicodeLabel.Constant(label));
0971: }
0972:
0973: /**
0974: * @return true if showCount is true
0975: */
0976: public boolean isShowCount() {
0977: return showCount;
0978: }
0979:
0980: /**
0981: * @param b true to show the count
0982: * @return this (for chaining)
0983: */
0984: public BagFormatter setShowCount(boolean b) {
0985: showCount = b;
0986: return this ;
0987: }
0988:
0989: /**
0990: * @return the property name
0991: */
0992: public String getPropName() {
0993: return propName;
0994: }
0995:
0996: /**
0997: * @param string
0998: * @return this (for chaining)
0999: */
1000: public BagFormatter setPropName(String string) {
1001: if (string == null)
1002: string = "";
1003: propName = string;
1004: return this ;
1005: }
1006:
1007: /**
1008: * @return true if this is a hexValue
1009: */
1010: public boolean isHexValue() {
1011: return hexValue;
1012: }
1013:
1014: /**
1015: * @param b
1016: * @return this (for chaining)
1017: */
1018: public BagFormatter setHexValue(boolean b) {
1019: hexValue = b;
1020: return this ;
1021: }
1022:
1023: /**
1024: * @return the full total
1025: */
1026: public int getFullTotal() {
1027: return fullTotal;
1028: }
1029:
1030: /**
1031: * @param i set the full total
1032: * @return this (for chaining)
1033: */
1034: public BagFormatter setFullTotal(int i) {
1035: fullTotal = i;
1036: return this ;
1037: }
1038:
1039: /**
1040: * @return the line separator
1041: */
1042: public String getLineSeparator() {
1043: return lineSeparator;
1044: }
1045:
1046: /**
1047: * @param string
1048: * @return this (for chaining)
1049: */
1050: public BagFormatter setLineSeparator(String string) {
1051: lineSeparator = string;
1052: return this ;
1053: }
1054:
1055: /**
1056: * @return the UnicodeLabel representing the range break source
1057: */
1058: public UnicodeLabel getRangeBreakSource() {
1059: if (rangeBreakSource == null) {
1060: Map labelMap = new HashMap();
1061: // reflects the code point types on p 25
1062: labelMap.put("Lo", "G&");
1063: labelMap.put("Lm", "G&");
1064: labelMap.put("Lu", "G&");
1065: labelMap.put("Lt", "G&");
1066: labelMap.put("Ll", "G&");
1067: labelMap.put("Mn", "G&");
1068: labelMap.put("Me", "G&");
1069: labelMap.put("Mc", "G&");
1070: labelMap.put("Nd", "G&");
1071: labelMap.put("Nl", "G&");
1072: labelMap.put("No", "G&");
1073: labelMap.put("Zs", "G&");
1074: labelMap.put("Pd", "G&");
1075: labelMap.put("Ps", "G&");
1076: labelMap.put("Pe", "G&");
1077: labelMap.put("Pc", "G&");
1078: labelMap.put("Po", "G&");
1079: labelMap.put("Pi", "G&");
1080: labelMap.put("Pf", "G&");
1081: labelMap.put("Sm", "G&");
1082: labelMap.put("Sc", "G&");
1083: labelMap.put("Sk", "G&");
1084: labelMap.put("So", "G&");
1085:
1086: labelMap.put("Zl", "Cf");
1087: labelMap.put("Zp", "Cf");
1088:
1089: rangeBreakSource = new UnicodeProperty.FilteredProperty(
1090: getUnicodePropertyFactory().getProperty(
1091: "General_Category"),
1092: new UnicodeProperty.MapFilter(labelMap))
1093: .setAllowValueAliasCollisions(true);
1094:
1095: /*
1096: "Cn", // = Other, Not Assigned 0
1097: "Cc", // = Other, Control 15
1098: "Cf", // = Other, Format 16
1099: UnicodeProperty.UNUSED, // missing
1100: "Co", // = Other, Private Use 18
1101: "Cs", // = Other, Surrogate 19
1102: */
1103: }
1104: return rangeBreakSource;
1105: }
1106:
1107: /**
1108: * @param label
1109: */
1110: public BagFormatter setRangeBreakSource(UnicodeLabel label) {
1111: if (label == null)
1112: label = UnicodeLabel.NULL;
1113: rangeBreakSource = label;
1114: return this ;
1115: }
1116:
1117: /**
1118: * @return Returns the fixName.
1119: */
1120: public Transliterator getFixName() {
1121: return fixName;
1122: }
1123:
1124: /**
1125: * @param fixName The fixName to set.
1126: */
1127: public void setFixName(Transliterator fixName) {
1128: this .fixName = fixName;
1129: }
1130:
1131: public Tabber getTabber() {
1132: return tabber;
1133: }
1134:
1135: public void setTabber(Tabber tabber) {
1136: this .tabber = tabber;
1137: }
1138:
1139: public boolean isShowTotal() {
1140: return showTotal;
1141: }
1142:
1143: public void setShowTotal(boolean showTotal) {
1144: this .showTotal = showTotal;
1145: }
1146: }
1147: //#endif
|