0001: /*
0002: *******************************************************************************
0003: * Copyright (C) 2001-2005, International Business Machines
0004: * Corporation and others. All Rights Reserved.
0005: *******************************************************************************
0006: */
0007:
0008: package com.ibm.icu.text;
0009:
0010: import java.io.IOException;
0011: import java.util.MissingResourceException;
0012:
0013: import com.ibm.icu.impl.UBiDiProps;
0014:
0015: import com.ibm.icu.lang.*;
0016:
0017: /**
0018: * Shape Arabic text on a character basis.
0019: *
0020: * <p>ArabicShaping performs basic operations for "shaping" Arabic text. It is most
0021: * useful for use with legacy data formats and legacy display technology
0022: * (simple terminals). All operations are performed on Unicode characters.</p>
0023: *
0024: * <p>Text-based shaping means that some character code points in the text are
0025: * replaced by others depending on the context. It transforms one kind of text
0026: * into another. In comparison, modern displays for Arabic text select
0027: * appropriate, context-dependent font glyphs for each text element, which means
0028: * that they transform text into a glyph vector.</p>
0029: *
0030: * <p>Text transformations are necessary when modern display technology is not
0031: * available or when text needs to be transformed to or from legacy formats that
0032: * use "shaped" characters. Since the Arabic script is cursive, connecting
0033: * adjacent letters to each other, computers select images for each letter based
0034: * on the surrounding letters. This usually results in four images per Arabic
0035: * letter: initial, middle, final, and isolated forms. In Unicode, on the other
0036: * hand, letters are normally stored abstract, and a display system is expected
0037: * to select the necessary glyphs. (This makes searching and other text
0038: * processing easier because the same letter has only one code.) It is possible
0039: * to mimic this with text transformations because there are characters in
0040: * Unicode that are rendered as letters with a specific shape
0041: * (or cursive connectivity). They were included for interoperability with
0042: * legacy systems and codepages, and for unsophisticated display systems.</p>
0043: *
0044: * <p>A second kind of text transformations is supported for Arabic digits:
0045: * For compatibility with legacy codepages that only include European digits,
0046: * it is possible to replace one set of digits by another, changing the
0047: * character code points. These operations can be performed for either
0048: * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic
0049: * digits (U+06f0...U+06f9).</p>
0050: *
0051: * <p>Some replacements may result in more or fewer characters (code points).
0052: * By default, this means that the destination buffer may receive text with a
0053: * length different from the source length. Some legacy systems rely on the
0054: * length of the text to be constant. They expect extra spaces to be added
0055: * or consumed either next to the affected character or at the end of the
0056: * text.</p>
0057: * @stable ICU 2.0
0058: */
0059: public final class ArabicShaping {
0060: private final int options;
0061: private boolean isLogical; // convenience
0062:
0063: /**
0064: * Convert a range of text in the source array, putting the result
0065: * into a range of text in the destination array, and return the number
0066: * of characters written.
0067: *
0068: * @param source An array containing the input text
0069: * @param sourceStart The start of the range of text to convert
0070: * @param sourceLength The length of the range of text to convert
0071: * @param dest The destination array that will receive the result.
0072: * It may be <code>NULL</code> only if <code>destSize</code> is 0.
0073: * @param destStart The start of the range of the destination buffer to use.
0074: * @param destSize The size (capacity) of the destination buffer.
0075: * If <code>destSize</code> is 0, then no output is produced,
0076: * but the necessary buffer size is returned ("preflighting"). This
0077: * does not validate the text against the options, for example,
0078: * if letters are being unshaped, and spaces are being consumed
0079: * following lamalef, this will not detect a lamalef without a
0080: * corresponding space. An error will be thrown when the actual
0081: * conversion is attempted.
0082: * @return The number of chars written to the destination buffer.
0083: * If an error occurs, then no output was written, or it may be
0084: * incomplete.
0085: * @throws ArabicShapingException if the text cannot be converted according to the options.
0086: * @stable ICU 2.0
0087: */
0088: public int shape(char[] source, int sourceStart, int sourceLength,
0089: char[] dest, int destStart, int destSize)
0090: throws ArabicShapingException {
0091: if (source == null) {
0092: throw new IllegalArgumentException("source can not be null");
0093: }
0094: if (sourceStart < 0 || sourceLength < 0
0095: || sourceStart + sourceLength > source.length) {
0096: throw new IllegalArgumentException("bad source start ("
0097: + sourceStart + ") or length (" + sourceLength
0098: + ") for buffer of length " + source.length);
0099: }
0100: if (dest == null && destSize != 0) {
0101: throw new IllegalArgumentException(
0102: "null dest requires destSize == 0");
0103: }
0104: if ((destSize != 0)
0105: && (destStart < 0 || destSize < 0 || destStart
0106: + destSize > dest.length)) {
0107: throw new IllegalArgumentException("bad dest start ("
0108: + destStart + ") or size (" + destSize
0109: + ") for buffer of length " + dest.length);
0110: }
0111:
0112: return internalShape(source, sourceStart, sourceLength, dest,
0113: destStart, destSize);
0114: }
0115:
0116: /**
0117: * Convert a range of text in place. This may only be used if the Length option
0118: * does not grow or shrink the text.
0119: *
0120: * @param source An array containing the input text
0121: * @param start The start of the range of text to convert
0122: * @param length The length of the range of text to convert
0123: * @throws ArabicShapingException if the text cannot be converted according to the options.
0124: * @stable ICU 2.0
0125: */
0126: public void shape(char[] source, int start, int length)
0127: throws ArabicShapingException {
0128: if ((options & LENGTH_MASK) == LENGTH_GROW_SHRINK) {
0129: throw new ArabicShapingException(
0130: "Cannot shape in place with length option grow/shrink.");
0131: }
0132: shape(source, start, length, source, start, length);
0133: }
0134:
0135: /**
0136: * Convert a string, returning the new string.
0137: *
0138: * @param text the string to convert
0139: * @return the converted string
0140: * @throws ArabicShapingException if the string cannot be converted according to the options.
0141: * @stable ICU 2.0
0142: */
0143: public String shape(String text) throws ArabicShapingException {
0144: char[] src = text.toCharArray();
0145: char[] dest = src;
0146: if (((options & LENGTH_MASK) == LENGTH_GROW_SHRINK)
0147: && ((options & LETTERS_MASK) == LETTERS_UNSHAPE)) {
0148:
0149: dest = new char[src.length * 2]; // max
0150: }
0151: int len = shape(src, 0, src.length, dest, 0, dest.length);
0152:
0153: return new String(dest, 0, len);
0154: }
0155:
0156: /**
0157: * Construct ArabicShaping using the options flags.
0158: * The flags are as follows:<br>
0159: * 'LENGTH' flags control whether the text can change size, and if not,
0160: * how to maintain the size of the text when LamAlef ligatures are
0161: * formed or broken.<br>
0162: * 'TEXT_DIRECTION' flags control whether the text is read and written
0163: * in visual order or in logical order.<br>
0164: * 'LETTERS_SHAPE' flags control whether conversion is to or from
0165: * presentation forms.<br>
0166: * 'DIGITS' flags control whether digits are shaped, and whether from
0167: * European to Arabic-Indic or vice-versa.<br>
0168: * 'DIGIT_TYPE' flags control whether standard or extended Arabic-Indic
0169: * digits are used when performing digit conversion.
0170: * @stable ICU 2.0
0171: */
0172: public ArabicShaping(int options) {
0173: this .options = options;
0174: if ((options & DIGITS_MASK) > 0x80) {
0175: throw new IllegalArgumentException("bad DIGITS options");
0176: }
0177: isLogical = (options & TEXT_DIRECTION_MASK) == TEXT_DIRECTION_LOGICAL;
0178: }
0179:
0180: /**
0181: * Memory option: allow the result to have a different length than the source.
0182: * @stable ICU 2.0
0183: */
0184: public static final int LENGTH_GROW_SHRINK = 0;
0185:
0186: /**
0187: * Memory option: the result must have the same length as the source.
0188: * If more room is necessary, then try to consume spaces next to modified characters.
0189: * @stable ICU 2.0
0190: */
0191: public static final int LENGTH_FIXED_SPACES_NEAR = 1;
0192:
0193: /**
0194: * Memory option: the result must have the same length as the source.
0195: * If more room is necessary, then try to consume spaces at the end of the text.
0196: * @stable ICU 2.0
0197: */
0198: public static final int LENGTH_FIXED_SPACES_AT_END = 2;
0199:
0200: /**
0201: * Memory option: the result must have the same length as the source.
0202: * If more room is necessary, then try to consume spaces at the beginning of the text.
0203: * @stable ICU 2.0
0204: */
0205: public static final int LENGTH_FIXED_SPACES_AT_BEGINNING = 3;
0206:
0207: /**
0208: * Bit mask for memory options.
0209: * @stable ICU 2.0
0210: */
0211: public static final int LENGTH_MASK = 3;
0212:
0213: /**
0214: * Direction indicator: the source is in logical (keyboard) order.
0215: * @stable ICU 2.0
0216: */
0217: public static final int TEXT_DIRECTION_LOGICAL = 0;
0218:
0219: /**
0220: * Direction indicator: the source is in visual (display) order, that is,
0221: * the leftmost displayed character is stored first.
0222: * @stable ICU 2.0
0223: */
0224: public static final int TEXT_DIRECTION_VISUAL_LTR = 4;
0225:
0226: /**
0227: * Bit mask for direction indicators.
0228: * @stable ICU 2.0
0229: */
0230: public static final int TEXT_DIRECTION_MASK = 4;
0231:
0232: /**
0233: * Letter shaping option: do not perform letter shaping.
0234: * @stable ICU 2.0
0235: */
0236: public static final int LETTERS_NOOP = 0;
0237:
0238: /**
0239: * Letter shaping option: replace normative letter characters in the U+0600 (Arabic) block,
0240: * by shaped ones in the U+FE70 (Presentation Forms B) block. Performs Lam-Alef ligature
0241: * substitution.
0242: * @stable ICU 2.0
0243: */
0244: public static final int LETTERS_SHAPE = 8;
0245:
0246: /**
0247: * Letter shaping option: replace shaped letter characters in the U+FE70 (Presentation Forms B) block
0248: * by normative ones in the U+0600 (Arabic) block. Converts Lam-Alef ligatures to pairs of Lam and
0249: * Alef characters, consuming spaces if required.
0250: * @stable ICU 2.0
0251: */
0252: public static final int LETTERS_UNSHAPE = 0x10;
0253:
0254: /**
0255: * Letter shaping option: replace normative letter characters in the U+0600 (Arabic) block,
0256: * except for the TASHKEEL characters at U+064B...U+0652, by shaped ones in the U+Fe70
0257: * (Presentation Forms B) block. The TASHKEEL characters will always be converted to
0258: * the isolated forms rather than to their correct shape.
0259: * @stable ICU 2.0
0260: */
0261: public static final int LETTERS_SHAPE_TASHKEEL_ISOLATED = 0x18;
0262:
0263: /**
0264: * Bit mask for letter shaping options.
0265: * @stable ICU 2.0
0266: */
0267: public static final int LETTERS_MASK = 0x18;
0268:
0269: /**
0270: * Digit shaping option: do not perform digit shaping.
0271: * @stable ICU 2.0
0272: */
0273: public static final int DIGITS_NOOP = 0;
0274:
0275: /**
0276: * Digit shaping option: Replace European digits (U+0030...U+0039) by Arabic-Indic digits.
0277: * @stable ICU 2.0
0278: */
0279: public static final int DIGITS_EN2AN = 0x20;
0280:
0281: /**
0282: * Digit shaping option: Replace Arabic-Indic digits by European digits (U+0030...U+0039).
0283: * @stable ICU 2.0
0284: */
0285: public static final int DIGITS_AN2EN = 0x40;
0286:
0287: /**
0288: * Digit shaping option:
0289: * Replace European digits (U+0030...U+0039) by Arabic-Indic digits
0290: * if the most recent strongly directional character
0291: * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
0292: * The initial state at the start of the text is assumed to be not an Arabic,
0293: * letter, so European digits at the start of the text will not change.
0294: * Compare to DIGITS_ALEN2AN_INIT_AL.
0295: * @stable ICU 2.0
0296: */
0297: public static final int DIGITS_EN2AN_INIT_LR = 0x60;
0298:
0299: /**
0300: * Digit shaping option:
0301: * Replace European digits (U+0030...U+0039) by Arabic-Indic digits
0302: * if the most recent strongly directional character
0303: * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
0304: * The initial state at the start of the text is assumed to be an Arabic,
0305: * letter, so European digits at the start of the text will change.
0306: * Compare to DIGITS_ALEN2AN_INT_LR.
0307: * @stable ICU 2.0
0308: */
0309: public static final int DIGITS_EN2AN_INIT_AL = 0x80;
0310:
0311: /** Not a valid option value. */
0312: private static final int DIGITS_RESERVED = 0xa0;
0313:
0314: /**
0315: * Bit mask for digit shaping options.
0316: * @stable ICU 2.0
0317: */
0318: public static final int DIGITS_MASK = 0xe0;
0319:
0320: /**
0321: * Digit type option: Use Arabic-Indic digits (U+0660...U+0669).
0322: * @stable ICU 2.0
0323: */
0324: public static final int DIGIT_TYPE_AN = 0;
0325:
0326: /**
0327: * Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9).
0328: * @stable ICU 2.0
0329: */
0330: public static final int DIGIT_TYPE_AN_EXTENDED = 0x100;
0331:
0332: /**
0333: * Bit mask for digit type options.
0334: * @stable ICU 2.0
0335: */
0336: public static final int DIGIT_TYPE_MASK = 0x0100; // 0x3f00?
0337:
0338: /**
0339: * @stable ICU 2.0
0340: */
0341: public boolean equals(Object rhs) {
0342: return rhs != null && rhs.getClass() == ArabicShaping.class
0343: && options == ((ArabicShaping) rhs).options;
0344: }
0345:
0346: /**
0347: * @stable ICU 2.0
0348: */
0349: ///CLOVER:OFF
0350: public int hashCode() {
0351: return options;
0352: }
0353:
0354: /**
0355: * @stable ICU 2.0
0356: */
0357: public String toString() {
0358: StringBuffer buf = new StringBuffer(super .toString());
0359: buf.append('[');
0360: switch (options & LENGTH_MASK) {
0361: case LENGTH_GROW_SHRINK:
0362: buf.append("grow/shrink");
0363: break;
0364: case LENGTH_FIXED_SPACES_NEAR:
0365: buf.append("spaces near");
0366: break;
0367: case LENGTH_FIXED_SPACES_AT_END:
0368: buf.append("spaces at end");
0369: break;
0370: case LENGTH_FIXED_SPACES_AT_BEGINNING:
0371: buf.append("spaces at beginning");
0372: break;
0373: }
0374: switch (options & TEXT_DIRECTION_MASK) {
0375: case TEXT_DIRECTION_LOGICAL:
0376: buf.append(", logical");
0377: break;
0378: case TEXT_DIRECTION_VISUAL_LTR:
0379: buf.append(", visual");
0380: break;
0381: }
0382: switch (options & LETTERS_MASK) {
0383: case LETTERS_NOOP:
0384: buf.append(", no letter shaping");
0385: break;
0386: case LETTERS_SHAPE:
0387: buf.append(", shape letters");
0388: break;
0389: case LETTERS_SHAPE_TASHKEEL_ISOLATED:
0390: buf.append(", shape letters tashkeel isolated");
0391: break;
0392: case LETTERS_UNSHAPE:
0393: buf.append(", unshape letters");
0394: break;
0395: }
0396: switch (options & DIGITS_MASK) {
0397: case DIGITS_NOOP:
0398: buf.append(", no digit shaping");
0399: break;
0400: case DIGITS_EN2AN:
0401: buf.append(", shape digits to AN");
0402: break;
0403: case DIGITS_AN2EN:
0404: buf.append(", shape digits to EN");
0405: break;
0406: case DIGITS_EN2AN_INIT_LR:
0407: buf.append(", shape digits to AN contextually: default EN");
0408: break;
0409: case DIGITS_EN2AN_INIT_AL:
0410: buf.append(", shape digits to AN contextually: default AL");
0411: break;
0412: }
0413: switch (options & DIGIT_TYPE_MASK) {
0414: case DIGIT_TYPE_AN:
0415: buf.append(", standard Arabic-Indic digits");
0416: break;
0417: case DIGIT_TYPE_AN_EXTENDED:
0418: buf.append(", extended Arabic-Indic digits");
0419: break;
0420: }
0421: buf.append("]");
0422:
0423: return buf.toString();
0424: }
0425:
0426: ///CLOVER:ON
0427:
0428: //
0429: // ported api
0430: //
0431:
0432: private static final int IRRELEVANT = 4;
0433: private static final int LAMTYPE = 16;
0434: private static final int ALEFTYPE = 32;
0435:
0436: private static final int LINKR = 1;
0437: private static final int LINKL = 2;
0438: private static final int LINK_MASK = 3;
0439:
0440: private static final int irrelevantPos[] = { 0x0, 0x2, 0x4, 0x6,
0441: 0x8, 0xA, 0xC, 0xE };
0442:
0443: private static final char convertLamAlef[] = { '\u0622', // FEF5
0444: '\u0622', // FEF6
0445: '\u0623', // FEF7
0446: '\u0623', // FEF8
0447: '\u0625', // FEF9
0448: '\u0625', // FEFA
0449: '\u0627', // FEFB
0450: '\u0627' // FEFC
0451: };
0452:
0453: private static final char convertNormalizedLamAlef[] = { '\u0622', // 065C
0454: '\u0623', // 065D
0455: '\u0625', // 065E
0456: '\u0627', // 065F
0457: };
0458:
0459: private static final int[] araLink = { 1 + 32 + 256 * 0x11, /*0x0622*/
0460: 1 + 32 + 256 * 0x13, /*0x0623*/
0461: 1 + 256 * 0x15, /*0x0624*/
0462: 1 + 32 + 256 * 0x17, /*0x0625*/
0463: 1 + 2 + 256 * 0x19, /*0x0626*/
0464: 1 + 32 + 256 * 0x1D, /*0x0627*/
0465: 1 + 2 + 256 * 0x1F, /*0x0628*/
0466: 1 + 256 * 0x23, /*0x0629*/
0467: 1 + 2 + 256 * 0x25, /*0x062A*/
0468: 1 + 2 + 256 * 0x29, /*0x062B*/
0469: 1 + 2 + 256 * 0x2D, /*0x062C*/
0470: 1 + 2 + 256 * 0x31, /*0x062D*/
0471: 1 + 2 + 256 * 0x35, /*0x062E*/
0472: 1 + 256 * 0x39, /*0x062F*/
0473: 1 + 256 * 0x3B, /*0x0630*/
0474: 1 + 256 * 0x3D, /*0x0631*/
0475: 1 + 256 * 0x3F, /*0x0632*/
0476: 1 + 2 + 256 * 0x41, /*0x0633*/
0477: 1 + 2 + 256 * 0x45, /*0x0634*/
0478: 1 + 2 + 256 * 0x49, /*0x0635*/
0479: 1 + 2 + 256 * 0x4D, /*0x0636*/
0480: 1 + 2 + 256 * 0x51, /*0x0637*/
0481: 1 + 2 + 256 * 0x55, /*0x0638*/
0482: 1 + 2 + 256 * 0x59, /*0x0639*/
0483: 1 + 2 + 256 * 0x5D, /*0x063A*/
0484: 0, 0, 0, 0, 0, /*0x063B-0x063F*/
0485: 1 + 2, /*0x0640*/
0486: 1 + 2 + 256 * 0x61, /*0x0641*/
0487: 1 + 2 + 256 * 0x65, /*0x0642*/
0488: 1 + 2 + 256 * 0x69, /*0x0643*/
0489: 1 + 2 + 16 + 256 * 0x6D, /*0x0644*/
0490: 1 + 2 + 256 * 0x71, /*0x0645*/
0491: 1 + 2 + 256 * 0x75, /*0x0646*/
0492: 1 + 2 + 256 * 0x79, /*0x0647*/
0493: 1 + 256 * 0x7D, /*0x0648*/
0494: 1 + 256 * 0x7F, /*0x0649*/
0495: 1 + 2 + 256 * 0x81, /*0x064A*/
0496: 4, 4, 4, 4, /*0x064B-0x064E*/
0497: 4, 4, 4, 4, /*0x064F-0x0652*/
0498: 4, 4, 4, 0, 0, /*0x0653-0x0657*/
0499: 0, 0, 0, 0, /*0x0658-0x065B*/
0500: 1 + 256 * 0x85, /*0x065C*/
0501: 1 + 256 * 0x87, /*0x065D*/
0502: 1 + 256 * 0x89, /*0x065E*/
0503: 1 + 256 * 0x8B, /*0x065F*/
0504: 0, 0, 0, 0, 0, /*0x0660-0x0664*/
0505: 0, 0, 0, 0, 0, /*0x0665-0x0669*/
0506: 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/
0507: 4, /*0x0670*/
0508: 0, /*0x0671*/
0509: 1 + 32, /*0x0672*/
0510: 1 + 32, /*0x0673*/
0511: 0, /*0x0674*/
0512: 1 + 32, /*0x0675*/
0513: 1, 1, /*0x0676-0x0677*/
0514: 1 + 2, 1 + 2, 1 + 2, 1 + 2, 1 + 2, 1 + 2, /*0x0678-0x067D*/
0515: 1 + 2, 1 + 2, 1 + 2, 1 + 2, 1 + 2, 1 + 2, /*0x067E-0x0683*/
0516: 1 + 2, 1 + 2, 1 + 2, 1 + 2, /*0x0684-0x0687*/
0517: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*0x0688-0x0691*/
0518: 1, 1, 1, 1, 1, 1, 1, 1, /*0x0692-0x0699*/
0519: 1 + 2, 1 + 2, 1 + 2, 1 + 2, 1 + 2, 1 + 2, /*0x069A-0x06A3*/
0520: 1 + 2, 1 + 2, 1 + 2, 1 + 2, /*0x069A-0x06A3*/
0521: 1 + 2, 1 + 2, 1 + 2, 1 + 2, 1 + 2, 1 + 2, /*0x06A4-0x06AD*/
0522: 1 + 2, 1 + 2, 1 + 2, 1 + 2, /*0x06A4-0x06AD*/
0523: 1 + 2, 1 + 2, 1 + 2, 1 + 2, 1 + 2, 1 + 2, /*0x06AE-0x06B7*/
0524: 1 + 2, 1 + 2, 1 + 2, 1 + 2, /*0x06AE-0x06B7*/
0525: 1 + 2, 1 + 2, 1 + 2, 1 + 2, 1 + 2, 1 + 2, /*0x06B8-0x06BF*/
0526: 1 + 2, 1 + 2, /*0x06B8-0x06BF*/
0527: 1, /*0x06C0*/
0528: 1 + 2, /*0x06C1*/
0529: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*0x06C2-0x06CB*/
0530: 1 + 2, /*0x06CC*/
0531: 1, /*0x06CD*/
0532: 1 + 2, 1 + 2, 1 + 2, 1 + 2, /*0x06CE-0x06D1*/
0533: 1, 1 /*0x06D2-0x06D3*/
0534: };
0535:
0536: private static final int[] presLink = { 1 + 2, /*0xFE70*/
0537: 1 + 2, /*0xFE71*/
0538: 1 + 2, 0, 1 + 2, 0, 1 + 2, /*0xFE72-0xFE76*/
0539: 1 + 2, /*0xFE77*/
0540: 1 + 2, 1 + 2, 1 + 2, 1 + 2, /*0xFE78-0xFE81*/
0541: 1 + 2, 1 + 2, 1 + 2, 1 + 2, /*0xFE82-0xFE85*/
0542: 0, 0 + 32, 1 + 32, 0 + 32, /*0xFE86-0xFE89*/
0543: 1 + 32, 0, 1, 0 + 32, /*0xFE8A-0xFE8D*/
0544: 1 + 32, 0, 2, 1 + 2, /*0xFE8E-0xFE91*/
0545: 1, 0 + 32, 1 + 32, 0, /*0xFE92-0xFE95*/
0546: 2, 1 + 2, 1, 0, /*0xFE96-0xFE99*/
0547: 1, 0, 2, 1 + 2, /*0xFE9A-0xFE9D*/
0548: 1, 0, 2, 1 + 2, /*0xFE9E-0xFEA1*/
0549: 1, 0, 2, 1 + 2, /*0xFEA2-0xFEA5*/
0550: 1, 0, 2, 1 + 2, /*0xFEA6-0xFEA9*/
0551: 1, 0, 2, 1 + 2, /*0xFEAA-0xFEAD*/
0552: 1, 0, 1, 0, /*0xFEAE-0xFEB1*/
0553: 1, 0, 1, 0, /*0xFEB2-0xFEB5*/
0554: 1, 0, 2, 1 + 2, /*0xFEB6-0xFEB9*/
0555: 1, 0, 2, 1 + 2, /*0xFEBA-0xFEBD*/
0556: 1, 0, 2, 1 + 2, /*0xFEBE-0xFEC1*/
0557: 1, 0, 2, 1 + 2, /*0xFEC2-0xFEC5*/
0558: 1, 0, 2, 1 + 2, /*0xFEC6-0xFEC9*/
0559: 1, 0, 2, 1 + 2, /*0xFECA-0xFECD*/
0560: 1, 0, 2, 1 + 2, /*0xFECE-0xFED1*/
0561: 1, 0, 2, 1 + 2, /*0xFED2-0xFED5*/
0562: 1, 0, 2, 1 + 2, /*0xFED6-0xFED9*/
0563: 1, 0, 2, 1 + 2, /*0xFEDA-0xFEDD*/
0564: 1, 0, 2, 1 + 2, /*0xFEDE-0xFEE1*/
0565: 1, 0 + 16, 2 + 16, 1 + 2 + 16, /*0xFEE2-0xFEE5*/
0566: 1 + 16, 0, 2, 1 + 2, /*0xFEE6-0xFEE9*/
0567: 1, 0, 2, 1 + 2, /*0xFEEA-0xFEED*/
0568: 1, 0, 2, 1 + 2, /*0xFEEE-0xFEF1*/
0569: 1, 0, 1, 0, /*0xFEF2-0xFEF5*/
0570: 1, 0, 2, 1 + 2, /*0xFEF6-0xFEF9*/
0571: 1, 0, 1, 0, /*0xFEFA-0xFEFD*/
0572: 1, 0, 1, 0, 1 };
0573:
0574: private static int[] convertFEto06 = {
0575: /***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
0576: /*FE7*/0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E,
0577: 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652,
0578: /*FE8*/0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624,
0579: 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627,
0580: 0x628,
0581: /*FE9*/0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A,
0582: 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C,
0583: 0x62C,
0584: /*FEA*/0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E,
0585: 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631,
0586: 0x632,
0587: /*FEB*/0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634,
0588: 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636,
0589: 0x636,
0590: /*FEC*/0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638,
0591: 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A,
0592: 0x63A,
0593: /*FED*/0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642,
0594: 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644,
0595: 0x644,
0596: /*FEE*/0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646,
0597: 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648,
0598: 0x649,
0599: /*FEF*/0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C,
0600: 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F };
0601:
0602: private static final int shapeTable[][][] = {
0603: { { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 1, 0, 3 },
0604: { 0, 1, 0, 1 } },
0605: { { 0, 0, 2, 2 }, { 0, 0, 1, 2 }, { 0, 1, 1, 2 },
0606: { 0, 1, 1, 3 } },
0607: { { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 1, 0, 3 },
0608: { 0, 1, 0, 3 } },
0609: { { 0, 0, 1, 2 }, { 0, 0, 1, 2 }, { 0, 1, 1, 2 },
0610: { 0, 1, 1, 3 } } };
0611:
0612: /*
0613: * This function shapes European digits to Arabic-Indic digits
0614: * in-place, writing over the input characters. Data is in visual
0615: * order.
0616: */
0617: private void shapeToArabicDigitsWithContext(char[] dest, int start,
0618: int length, char digitBase, boolean lastStrongWasAL) {
0619: UBiDiProps bdp;
0620: try {
0621: bdp = UBiDiProps.getSingleton();
0622: } catch (IOException e) {
0623: throw new MissingResourceException(e.getMessage(),
0624: "(BidiProps)", "");
0625: }
0626: digitBase -= '0'; // move common adjustment out of loop
0627:
0628: for (int i = start + length; --i >= start;) {
0629: char ch = dest[i];
0630: switch (bdp.getClass(ch)) {
0631: case UCharacterDirection.LEFT_TO_RIGHT:
0632: case UCharacterDirection.RIGHT_TO_LEFT:
0633: lastStrongWasAL = false;
0634: break;
0635: case UCharacterDirection.RIGHT_TO_LEFT_ARABIC:
0636: lastStrongWasAL = true;
0637: break;
0638: case UCharacterDirection.EUROPEAN_NUMBER:
0639: if (lastStrongWasAL && ch <= '\u0039') {
0640: dest[i] = (char) (ch + digitBase);
0641: }
0642: break;
0643: default:
0644: break;
0645: }
0646: }
0647: }
0648:
0649: /*
0650: * Name : invertBuffer
0651: * Function: This function inverts the buffer, it's used
0652: * in case the user specifies the buffer to be
0653: * TEXT_DIRECTION_LOGICAL
0654: */
0655: private static void invertBuffer(char[] buffer, int start,
0656: int length) {
0657:
0658: for (int i = start, j = start + length - 1; i < j; i++, --j) {
0659: char temp = buffer[i];
0660: buffer[i] = buffer[j];
0661: buffer[j] = temp;
0662: }
0663: }
0664:
0665: /*
0666: * Name : changeLamAlef
0667: * Function: Converts the Alef characters into an equivalent
0668: * LamAlef location in the 0x06xx Range, this is an
0669: * intermediate stage in the operation of the program
0670: * later it'll be converted into the 0xFExx LamAlefs
0671: * in the shaping function.
0672: */
0673: private static char changeLamAlef(char ch) {
0674: switch (ch) {
0675: case '\u0622':
0676: return '\u065C';
0677: case '\u0623':
0678: return '\u065D';
0679: case '\u0625':
0680: return '\u065E';
0681: case '\u0627':
0682: return '\u065F';
0683: default:
0684: return '\u0000'; // not a lamalef
0685: }
0686: }
0687:
0688: /*
0689: * Name : specialChar
0690: * Function: Special Arabic characters need special handling in the shapeUnicode
0691: * function, this function returns 1 or 2 for these special characters
0692: */
0693: private static int specialChar(char ch) {
0694: if ((ch > '\u0621' && ch < '\u0626') || (ch == '\u0627')
0695: || (ch > '\u062E' && ch < '\u0633')
0696: || (ch > '\u0647' && ch < '\u064A') || (ch == '\u0629')) {
0697: return 1;
0698: } else if (ch >= '\u064B' && ch <= '\u0652') {
0699: return 2;
0700: } else if (ch >= 0x0653 && ch <= 0x0655 || ch == 0x0670
0701: || ch >= 0xFE70 && ch <= 0xFE7F) {
0702: return 3;
0703: } else {
0704: return 0;
0705: }
0706: }
0707:
0708: /*
0709: * Name : getLink
0710: * Function: Resolves the link between the characters as
0711: * Arabic characters have four forms :
0712: * Isolated, Initial, Middle and Final Form
0713: */
0714: private static int getLink(char ch) {
0715: if (ch >= '\u0622' && ch <= '\u06D3') {
0716: return araLink[ch - '\u0622'];
0717: } else if (ch == '\u200D') {
0718: return 3;
0719: } else if (ch >= '\u206D' && ch <= '\u206F') {
0720: return 4;
0721: } else if (ch >= '\uFE70' && ch <= '\uFEFC') {
0722: return presLink[ch - '\uFE70'];
0723: } else {
0724: return 0;
0725: }
0726: }
0727:
0728: /*
0729: * Name : countSpaces
0730: * Function: Counts the number of spaces
0731: * at each end of the logical buffer
0732: */
0733: private static int countSpacesLeft(char[] dest, int start, int count) {
0734: for (int i = start, e = start + count; i < e; ++i) {
0735: if (dest[i] != '\u0020') {
0736: return i - start;
0737: }
0738: }
0739: return count;
0740: }
0741:
0742: private static int countSpacesRight(char[] dest, int start,
0743: int count) {
0744:
0745: for (int i = start + count; --i >= start;) {
0746: if (dest[i] != '\u0020') {
0747: return start + count - 1 - i;
0748: }
0749: }
0750: return count;
0751: }
0752:
0753: /*
0754: * Name : isTashkeelChar
0755: * Function: Returns 1 for Tashkeel characters else return 0
0756: */
0757: private static boolean isTashkeelChar(char ch) {
0758: return ch >= '\u064B' && ch <= '\u0652';
0759: }
0760:
0761: /*
0762: * Name : isAlefChar
0763: * Function: Returns 1 for Alef characters else return 0
0764: */
0765: private static boolean isAlefChar(char ch) {
0766: return ch == '\u0622' || ch == '\u0623' || ch == '\u0625'
0767: || ch == '\u0627';
0768: }
0769:
0770: /*
0771: * Name : isLamAlefChar
0772: * Function: Returns 1 for LamAlef characters else return 0
0773: */
0774: private static boolean isLamAlefChar(char ch) {
0775: return ch >= '\uFEF5' && ch <= '\uFEFC';
0776: }
0777:
0778: private static boolean isNormalizedLamAlefChar(char ch) {
0779: return ch >= '\u065C' && ch <= '\u065F';
0780: }
0781:
0782: /*
0783: * Name : calculateSize
0784: * Function: This function calculates the destSize to be used in preflighting
0785: * when the destSize is equal to 0
0786: */
0787: private int calculateSize(char[] source, int sourceStart,
0788: int sourceLength) {
0789:
0790: int destSize = sourceLength;
0791:
0792: switch (options & LETTERS_MASK) {
0793: case LETTERS_SHAPE:
0794: case LETTERS_SHAPE_TASHKEEL_ISOLATED:
0795: if (isLogical) {
0796: for (int i = sourceStart, e = sourceStart
0797: + sourceLength - 1; i < e; ++i) {
0798: if (source[i] == '\u0644'
0799: && isAlefChar(source[i + 1])) {
0800: --destSize;
0801: }
0802: }
0803: } else { // visual
0804: for (int i = sourceStart + 1, e = sourceStart
0805: + sourceLength; i < e; ++i) {
0806: if (source[i] == '\u0644'
0807: && isAlefChar(source[i - 1])) {
0808: --destSize;
0809: }
0810: }
0811: }
0812: break;
0813:
0814: case LETTERS_UNSHAPE:
0815: for (int i = sourceStart, e = sourceStart + sourceLength; i < e; ++i) {
0816: if (isLamAlefChar(source[i])) {
0817: destSize++;
0818: }
0819: }
0820: break;
0821:
0822: default:
0823: break;
0824: }
0825:
0826: return destSize;
0827: }
0828:
0829: /*
0830: * Name : removeLamAlefSpaces
0831: * Function: The shapeUnicode function converts Lam + Alef into LamAlef + space,
0832: * this function removes the spaces behind the LamAlefs according to
0833: * the options the user specifies, the spaces are removed to the end
0834: * of the buffer, or shrink the buffer and remove spaces for good
0835: * or leave the buffer as it is LamAlef + space.
0836: */
0837: private int removeLamAlefSpaces(char[] dest, int start, int length) {
0838:
0839: int lenOptions = options & LENGTH_MASK;
0840: if (!isLogical) {
0841: switch (lenOptions) {
0842: case LENGTH_FIXED_SPACES_AT_BEGINNING:
0843: lenOptions = LENGTH_FIXED_SPACES_AT_END;
0844: break;
0845: case LENGTH_FIXED_SPACES_AT_END:
0846: lenOptions = LENGTH_FIXED_SPACES_AT_BEGINNING;
0847: break;
0848: default:
0849: break;
0850: }
0851: }
0852:
0853: if (lenOptions == LENGTH_FIXED_SPACES_NEAR) {
0854: for (int i = start, e = i + length; i < e; ++i) {
0855: if (dest[i] == '\uffff') {
0856: dest[i] = '\u0020';
0857: }
0858: }
0859: } else {
0860: final int e = start + length;
0861: int w = e;
0862: int r = e;
0863: while (--r >= start) {
0864: char ch = dest[r];
0865: if (ch != '\uffff') {
0866: --w;
0867: if (w != r) {
0868: dest[w] = ch;
0869: }
0870: }
0871: }
0872:
0873: if (lenOptions == LENGTH_FIXED_SPACES_AT_END) {
0874: while (w > start) {
0875: dest[--w] = '\u0020';
0876: }
0877: } else {
0878: if (w > start) {
0879: // shift, assume small buffer size so don't use arraycopy
0880: r = w;
0881: w = start;
0882: while (r < e) {
0883: dest[w++] = dest[r++];
0884: }
0885: } else {
0886: w = e;
0887: }
0888: if (lenOptions == LENGTH_GROW_SHRINK) {
0889: length = w - start;
0890: } else { // spaces at beginning
0891: while (w < e) {
0892: dest[w++] = '\u0020';
0893: }
0894: }
0895: }
0896: }
0897: return length;
0898: }
0899:
0900: /*
0901: * Name : expandLamAlef
0902: * Function: LamAlef needs special handling as the LamAlef is
0903: * one character while expanding it will give two
0904: * characters Lam + Alef, so we need to expand the LamAlef
0905: * in near or far spaces according to the options the user
0906: * specifies or increase the buffer size.
0907: * Dest has enough room for the expansion if we are growing.
0908: * lamalef are normalized to the 'special characters'
0909: */
0910: private int expandLamAlef(char[] dest, int start, int length,
0911: int lacount) throws ArabicShapingException {
0912:
0913: int lenOptions = options & LENGTH_MASK;
0914: if (!isLogical) {
0915: switch (lenOptions) {
0916: case LENGTH_FIXED_SPACES_AT_BEGINNING:
0917: lenOptions = LENGTH_FIXED_SPACES_AT_END;
0918: break;
0919: case LENGTH_FIXED_SPACES_AT_END:
0920: lenOptions = LENGTH_FIXED_SPACES_AT_BEGINNING;
0921: break;
0922: default:
0923: break;
0924: }
0925: }
0926:
0927: switch (lenOptions) {
0928: case LENGTH_GROW_SHRINK: {
0929: for (int r = start + length, w = r + lacount; --r >= start;) {
0930: char ch = dest[r];
0931: if (isNormalizedLamAlefChar(ch)) {
0932: dest[--w] = '\u0644';
0933: dest[--w] = convertNormalizedLamAlef[ch - '\u065C'];
0934: } else {
0935: dest[--w] = ch;
0936: }
0937: }
0938: }
0939: length += lacount;
0940: break;
0941:
0942: case LENGTH_FIXED_SPACES_NEAR: {
0943: if (isNormalizedLamAlefChar(dest[start])) {
0944: throw new ArabicShapingException("no space for lamalef");
0945: }
0946: for (int i = start + length; --i > start;) { // don't check start, already checked
0947: char ch = dest[i];
0948: if (isNormalizedLamAlefChar(ch)) {
0949: if (dest[i - 1] == '\u0020') {
0950: dest[i] = '\u0644';
0951: dest[--i] = convertNormalizedLamAlef[ch - '\u065C'];
0952: } else {
0953: throw new ArabicShapingException(
0954: "no space for lamalef");
0955: }
0956: }
0957: }
0958: }
0959: break;
0960:
0961: case LENGTH_FIXED_SPACES_AT_END: {
0962: if (lacount > countSpacesLeft(dest, start, length)) {
0963: throw new ArabicShapingException("no space for lamalef");
0964: }
0965: for (int r = start + lacount, w = start, e = start + length; r < e; ++r) {
0966: char ch = dest[r];
0967: if (isNormalizedLamAlefChar(ch)) {
0968: dest[w++] = convertNormalizedLamAlef[ch - '\u065C'];
0969: dest[w++] = '\u0644';
0970: } else {
0971: dest[w++] = ch;
0972: }
0973: }
0974: }
0975: break;
0976:
0977: case LENGTH_FIXED_SPACES_AT_BEGINNING: {
0978: if (lacount > countSpacesRight(dest, start, length)) {
0979: throw new ArabicShapingException("no space for lamalef");
0980: }
0981: for (int r = start + length - lacount, w = start + length; --r >= start;) {
0982: char ch = dest[r];
0983: if (isNormalizedLamAlefChar(ch)) {
0984: dest[--w] = '\u0644';
0985: dest[--w] = convertNormalizedLamAlef[ch - '\u065C'];
0986: } else {
0987: dest[--w] = ch;
0988: }
0989: }
0990: }
0991: break;
0992: }
0993:
0994: return length;
0995: }
0996:
0997: /* Convert the input buffer from FExx Range into 06xx Range
0998: * to put all characters into the 06xx range
0999: * even the lamalef is converted to the special region in
1000: * the 06xx range. Return the number of lamalef chars found.
1001: */
1002: private int normalize(char[] dest, int start, int length) {
1003: int lacount = 0;
1004: for (int i = start, e = i + length; i < e; ++i) {
1005: char ch = dest[i];
1006: if (ch >= '\uFE70' && ch <= '\uFEFC') {
1007: if (isLamAlefChar(ch)) {
1008: ++lacount;
1009: }
1010: dest[i] = (char) convertFEto06[ch - '\uFE70'];
1011: }
1012: }
1013: return lacount;
1014: }
1015:
1016: /*
1017: * Name : shapeUnicode
1018: * Function: Converts an Arabic Unicode buffer in 06xx Range into a shaped
1019: * arabic Unicode buffer in FExx Range
1020: */
1021: private int shapeUnicode(char[] dest, int start, int length,
1022: int destSize, int tashkeelFlag) {
1023:
1024: normalize(dest, start, length);
1025:
1026: // resolve the link between the characters.
1027: // Arabic characters have four forms: Isolated, Initial, Medial and Final.
1028: // Tashkeel characters have two, isolated or medial, and sometimes only isolated.
1029: // tashkeelFlag == 0: shape normally, 1: shape isolated, 2: don't shape
1030:
1031: boolean lamalef_found = false;
1032: int i = start + length - 1;
1033: int currLink = getLink(dest[i]);
1034: int nextLink = 0;
1035: int prevLink = 0;
1036: int lastLink = 0;
1037: int prevPos = i;
1038: int lastPos = i;
1039: int nx = -2;
1040: int nw = 0;
1041:
1042: while (i >= 0) {
1043: // If high byte of currLink > 0 then there might be more than one shape
1044: if ((currLink & '\uFF00') > 0 || isTashkeelChar(dest[i])) {
1045: nw = i - 1;
1046: nx = -2;
1047: while (nx < 0) { // we need to know about next char
1048: if (nw == -1) {
1049: nextLink = 0;
1050: nx = Integer.MAX_VALUE;
1051: } else {
1052: nextLink = getLink(dest[nw]);
1053: if ((nextLink & IRRELEVANT) == 0) {
1054: nx = nw;
1055: } else {
1056: --nw;
1057: }
1058: }
1059: }
1060:
1061: if (((currLink & ALEFTYPE) > 0)
1062: && ((lastLink & LAMTYPE) > 0)) {
1063: lamalef_found = true;
1064: char wLamalef = changeLamAlef(dest[i]); // get from 0x065C-0x065f
1065: if (wLamalef != '\u0000') {
1066: // replace alef by marker, it will be removed later
1067: dest[i] = '\uffff';
1068: dest[lastPos] = wLamalef;
1069: i = lastPos;
1070: }
1071:
1072: lastLink = prevLink;
1073: currLink = getLink(wLamalef); // requires '\u0000', unfortunately
1074: }
1075:
1076: // get the proper shape according to link ability of neighbors
1077: // and of character; depends on the order of the shapes
1078: // (isolated, initial, middle, final) in the compatibility area
1079:
1080: int flag = specialChar(dest[i]);
1081:
1082: int shape = shapeTable[nextLink & LINK_MASK][lastLink
1083: & LINK_MASK][currLink & LINK_MASK];
1084:
1085: if (flag == 1) {
1086: shape &= 0x1;
1087: } else if (flag == 2) {
1088: if (tashkeelFlag == 0
1089: && ((lastLink & LINKL) != 0)
1090: && ((nextLink & LINKR) != 0)
1091: && dest[i] != '\u064C'
1092: && dest[i] != '\u064D'
1093: && !((nextLink & ALEFTYPE) == ALEFTYPE && (lastLink & LAMTYPE) == LAMTYPE)) {
1094:
1095: shape = 1;
1096: } else {
1097: shape = 0;
1098: }
1099: }
1100:
1101: if (flag == 2) {
1102: if (tashkeelFlag < 2) {
1103: dest[i] = (char) ('\uFE70' + irrelevantPos[dest[i] - '\u064B'] + shape);
1104: } // else leave tashkeel alone
1105: } else {
1106: dest[i] = (char) ('\uFE70' + (currLink >> 8) + shape);
1107: }
1108: }
1109:
1110: // move one notch forward
1111: if ((currLink & IRRELEVANT) == 0) {
1112: prevLink = lastLink;
1113: lastLink = currLink;
1114: prevPos = lastPos;
1115: lastPos = i;
1116: }
1117:
1118: --i;
1119: if (i == nx) {
1120: currLink = nextLink;
1121: nx = -2;
1122: } else if (i != -1) {
1123: currLink = getLink(dest[i]);
1124: }
1125: }
1126:
1127: // If we found a lam/alef pair in the buffer
1128: // call removeLamAlefSpaces to remove the spaces that were added
1129:
1130: if (lamalef_found) {
1131: destSize = removeLamAlefSpaces(dest, start, length);
1132: } else {
1133: destSize = length;
1134: }
1135:
1136: return destSize;
1137: }
1138:
1139: /*
1140: * Name : deShapeUnicode
1141: * Function: Converts an Arabic Unicode buffer in FExx Range into unshaped
1142: * arabic Unicode buffer in 06xx Range
1143: */
1144: private int deShapeUnicode(char[] dest, int start, int length,
1145: int destSize) throws ArabicShapingException {
1146:
1147: int lamalef_count = normalize(dest, start, length);
1148:
1149: // If there was a lamalef in the buffer call expandLamAlef
1150: if (lamalef_count != 0) {
1151: // need to adjust dest to fit expanded buffer... !!!
1152: destSize = expandLamAlef(dest, start, length, lamalef_count);
1153: } else {
1154: destSize = length;
1155: }
1156:
1157: return destSize;
1158: }
1159:
1160: private int internalShape(char[] source, int sourceStart,
1161: int sourceLength, char[] dest, int destStart, int destSize)
1162: throws ArabicShapingException {
1163:
1164: if (sourceLength == 0) {
1165: return 0;
1166: }
1167:
1168: if (destSize == 0) {
1169: if (((options & LETTERS_MASK) != LETTERS_NOOP)
1170: && ((options & LENGTH_MASK) == LENGTH_GROW_SHRINK)) {
1171:
1172: return calculateSize(source, sourceStart, sourceLength);
1173: } else {
1174: return sourceLength; // by definition
1175: }
1176: }
1177:
1178: // always use temp buffer
1179: char[] temp = new char[sourceLength * 2]; // all lamalefs requiring expansion
1180: System.arraycopy(source, sourceStart, temp, 0, sourceLength);
1181:
1182: if (isLogical) {
1183: invertBuffer(temp, 0, sourceLength);
1184: }
1185:
1186: int outputSize = sourceLength;
1187:
1188: switch (options & LETTERS_MASK) {
1189: case LETTERS_SHAPE_TASHKEEL_ISOLATED:
1190: outputSize = shapeUnicode(temp, 0, sourceLength, destSize,
1191: 1);
1192: break;
1193:
1194: case LETTERS_SHAPE:
1195: outputSize = shapeUnicode(temp, 0, sourceLength, destSize,
1196: 0);
1197: break;
1198:
1199: case LETTERS_UNSHAPE:
1200: outputSize = deShapeUnicode(temp, 0, sourceLength, destSize);
1201: break;
1202:
1203: default:
1204: break;
1205: }
1206:
1207: if (outputSize > destSize) {
1208: throw new ArabicShapingException(
1209: "not enough room for result data");
1210: }
1211:
1212: if ((options & DIGITS_MASK) != DIGITS_NOOP) {
1213: char digitBase = '\u0030'; // European digits
1214: switch (options & DIGIT_TYPE_MASK) {
1215: case DIGIT_TYPE_AN:
1216: digitBase = '\u0660'; // Arabic-Indic digits
1217: break;
1218:
1219: case DIGIT_TYPE_AN_EXTENDED:
1220: digitBase = '\u06f0'; // Eastern Arabic-Indic digits (Persian and Urdu)
1221: break;
1222:
1223: default:
1224: break;
1225: }
1226:
1227: switch (options & DIGITS_MASK) {
1228: case DIGITS_EN2AN: {
1229: int digitDelta = digitBase - '\u0030';
1230: for (int i = 0; i < outputSize; ++i) {
1231: char ch = temp[i];
1232: if (ch <= '\u0039' && ch >= '\u0030') {
1233: temp[i] += digitDelta;
1234: }
1235: }
1236: }
1237: break;
1238:
1239: case DIGITS_AN2EN: {
1240: char digitTop = (char) (digitBase + 9);
1241: int digitDelta = '\u0030' - digitBase;
1242: for (int i = 0; i < outputSize; ++i) {
1243: char ch = temp[i];
1244: if (ch <= digitTop && ch >= digitBase) {
1245: temp[i] += digitDelta;
1246: }
1247: }
1248: }
1249: break;
1250:
1251: case DIGITS_EN2AN_INIT_LR:
1252: shapeToArabicDigitsWithContext(temp, 0, outputSize,
1253: digitBase, false);
1254: break;
1255:
1256: case DIGITS_EN2AN_INIT_AL:
1257: shapeToArabicDigitsWithContext(temp, 0, outputSize,
1258: digitBase, true);
1259: break;
1260:
1261: default:
1262: break;
1263: }
1264: }
1265:
1266: if (isLogical) {
1267: invertBuffer(temp, 0, outputSize);
1268: }
1269:
1270: System.arraycopy(temp, 0, dest, destStart, outputSize);
1271:
1272: return outputSize;
1273: }
1274: }
|