/*
Java Internationalization
By Andy Deitsch, David Czarnecki
ISBN: 0-596-00019-7
O'Reilly
*/
import java.text.*;
import java.util.Locale;
public class HangulTextBoundaryDetection {
// A helper function to print out the boundary positions
static void printBoundaries(String source, BreakIterator bi) {
bi.setText(source);
int boundary = bi.first();
while (boundary != BreakIterator.DONE) {
System.out.print(boundary + " ");
boundary = bi.next();
}
}
public static void main(String s[]) {
// we create a string composed of 6 jamo
String hangul = "\u1112\u1161\u11ab\u1100\u1173\u11af";
// Retreive a character and a word BreakIterator object
// that is locale-sensitive for Korean text.
BreakIterator ci = BreakIterator.getCharacterInstance(Locale.KOREAN);
BreakIterator wi = BreakIterator.getWordInstance(Locale.KOREAN);
System.out.print("Character Boundaries: ");
printBoundaries(hangul, ci);
System.out.print("\nWord Boundaries:");
printBoundaries(hangul, wi);
}
}
|