0001: package com.lowagie.text.pdf;
0002:
0003: /*
0004: * Copyright 2003 Paulo Soares
0005: *
0006: * The contents of this file are subject to the Mozilla Public License Version 1.1
0007: * (the "License"); you may not use this file except in compliance with the License.
0008: * You may obtain a copy of the License at http://www.mozilla.org/MPL/
0009: *
0010: * Software distributed under the License is distributed on an "AS IS" basis,
0011: * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
0012: * for the specific language governing rights and limitations under the License.
0013: *
0014: * The Original Code is 'iText, a free JAVA-PDF library'.
0015: *
0016: * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
0017: * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
0018: * All Rights Reserved.
0019: * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
0020: * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
0021: *
0022: * Contributor(s): all the names of the contributors are added in the source code
0023: * where applicable.
0024: *
0025: * Alternatively, the contents of this file may be used under the terms of the
0026: * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
0027: * provisions of LGPL are applicable instead of those above. If you wish to
0028: * allow use of your version of this file only under the terms of the LGPL
0029: * License and not to allow others to use your version of this file under
0030: * the MPL, indicate your decision by deleting the provisions above and
0031: * replace them with the notice and other provisions required by the LGPL.
0032: * If you do not delete the provisions above, a recipient may use your version
0033: * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
0034: *
0035: * This library is free software; you can redistribute it and/or modify it
0036: * under the terms of the MPL as stated above or under the terms of the GNU
0037: * Library General Public License as published by the Free Software Foundation;
0038: * either version 2 of the License, or any later version.
0039: *
0040: * This library is distributed in the hope that it will be useful, but WITHOUT
0041: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
0042: * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
0043: * details.
0044: *
0045: * If you didn't download this code from the following link, you should check if
0046: * you aren't using an obsolete version:
0047: * http://www.lowagie.com/iText/
0048: */
0049:
0050: /*
0051: * (C) Copyright IBM Corp. 1999, All Rights Reserved
0052: *
0053: * version 1.1
0054: */
0055:
0056: /*
0057: * As stated in the Javadoc comments below, materials from Unicode.org
0058: * are used in this class. The following license applies to these materials:
0059: * http://www.unicode.org/copyright.html#Exhibit1
0060: *
0061: * EXHIBIT 1
0062: * UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
0063: *
0064: * Unicode Data Files include all data files under the directories
0065: * http://www.unicode.org/Public/, http://www.unicode.org/reports/,
0066: * and http://www.unicode.org/cldr/data/ .
0067: * Unicode Software includes any source code published in the Unicode Standard
0068: * or under the directories http://www.unicode.org/Public/, http://www.unicode.org/reports/,
0069: * and http://www.unicode.org/cldr/data/.
0070: *
0071: * NOTICE TO USER: Carefully read the following legal agreement. BY DOWNLOADING,
0072: * INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S DATA FILES ("DATA FILES"),
0073: * AND/OR SOFTWARE ("SOFTWARE"), YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY,
0074: * ALL OF THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
0075: * DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
0076: *
0077: * COPYRIGHT AND PERMISSION NOTICE
0078: * Copyright (C) 1991-2007 Unicode, Inc. All rights reserved. Distributed under
0079: * the Terms of Use in http://www.unicode.org/copyright.html.
0080: *
0081: * Permission is hereby granted, free of charge, to any person obtaining a copy
0082: * of the Unicode data files and any associated documentation (the "Data Files")
0083: * or Unicode software and any associated documentation (the "Software") to deal
0084: * in the Data Files or Software without restriction, including without limitation
0085: * the rights to use, copy, modify, merge, publish, distribute, and/or sell copies
0086: * of the Data Files or Software, and to permit persons to whom the Data Files
0087: * or Software are furnished to do so, provided that (a) the above copyright
0088: * notice(s) and this permission notice appear with all copies of the Data Files
0089: * or Software, (b) both the above copyright notice(s) and this permission notice
0090: * appear in associated documentation, and (c) there is clear notice in each
0091: * modified Data File or in the Software as well as in the documentation associated
0092: * with the Data File(s) or Software that the data or software has been modified.
0093: *
0094: * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0095: * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0096: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
0097: * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
0098: * LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY
0099: * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
0100: * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
0101: * CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA FILES OR SOFTWARE.
0102: *
0103: * Except as contained in this notice, the name of a copyright holder shall not
0104: * be used in advertising or otherwise to promote the sale, use or other dealings
0105: * in these Data Files or Software without prior written authorization of the
0106: * copyright holder.
0107: */
0108:
0109: /**
0110: * Reference implementation of the Unicode 3.0 Bidi algorithm.
0111: *
0112: * <p>
0113: * This implementation is not optimized for performance. It is intended
0114: * as a reference implementation that closely follows the specification
0115: * of the Bidirectional Algorithm in The Unicode Standard version 3.0.
0116: * <p>
0117: * <b>Input:</b><br>
0118: * There are two levels of input to the algorithm, since clients may prefer
0119: * to supply some information from out-of-band sources rather than relying on
0120: * the default behavior.
0121: * <ol>
0122: * <li>unicode type array
0123: * <li>unicode type array, with externally supplied base line direction
0124: * </ol>
0125: * <p><b>Output:</b><br>
0126: * Output is separated into several stages as well, to better enable clients
0127: * to evaluate various aspects of implementation conformance.
0128: * <ol>
0129: * <li>levels array over entire paragraph
0130: * <li>reordering array over entire paragraph
0131: * <li>levels array over line
0132: * <li>reordering array over line
0133: * </ol>
0134: * Note that for conformance, algorithms are only required to generate correct
0135: * reordering and character directionality (odd or even levels) over a line.
0136: * Generating identical level arrays over a line is not required. Bidi
0137: * explicit format codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned
0138: * arbitrary levels and positions as long as the other text matches.
0139: * <p>
0140: * As the algorithm is defined to operate on a single paragraph at a time,
0141: * this implementation is written to handle single paragraphs. Thus
0142: * rule P1 is presumed by this implementation-- the data provided to the
0143: * implementation is assumed to be a single paragraph, and either contains no
0144: * 'B' codes, or a single 'B' code at the end of the input. 'B' is allowed
0145: * as input to illustrate how the algorithm assigns it a level.
0146: * <p>
0147: * Also note that rules L3 and L4 depend on the rendering engine that uses
0148: * the result of the bidi algorithm. This implementation assumes that the
0149: * rendering engine expects combining marks in visual order (e.g. to the
0150: * left of their base character in RTL runs) and that it adjust the glyphs
0151: * used to render mirrored characters that are in RTL runs so that they
0152: * render appropriately.
0153: *
0154: * @author Doug Felt
0155: */
0156:
0157: public final class BidiOrder {
0158: private byte[] initialTypes;
0159: private byte[] embeddings; // generated from processing format codes
0160: private byte paragraphEmbeddingLevel = -1; // undefined
0161:
0162: private int textLength; // for convenience
0163: private byte[] resultTypes; // for paragraph, not lines
0164: private byte[] resultLevels; // for paragraph, not lines
0165:
0166: // The bidi types
0167:
0168: /** Left-to-right*/
0169: public static final byte L = 0;
0170:
0171: /** Left-to-Right Embedding */
0172: public static final byte LRE = 1;
0173:
0174: /** Left-to-Right Override */
0175: public static final byte LRO = 2;
0176:
0177: /** Right-to-Left */
0178: public static final byte R = 3;
0179:
0180: /** Right-to-Left Arabic */
0181: public static final byte AL = 4;
0182:
0183: /** Right-to-Left Embedding */
0184: public static final byte RLE = 5;
0185:
0186: /** Right-to-Left Override */
0187: public static final byte RLO = 6;
0188:
0189: /** Pop Directional Format */
0190: public static final byte PDF = 7;
0191:
0192: /** European Number */
0193: public static final byte EN = 8;
0194:
0195: /** European Number Separator */
0196: public static final byte ES = 9;
0197:
0198: /** European Number Terminator */
0199: public static final byte ET = 10;
0200:
0201: /** Arabic Number */
0202: public static final byte AN = 11;
0203:
0204: /** Common Number Separator */
0205: public static final byte CS = 12;
0206:
0207: /** Non-Spacing Mark */
0208: public static final byte NSM = 13;
0209:
0210: /** Boundary Neutral */
0211: public static final byte BN = 14;
0212:
0213: /** Paragraph Separator */
0214: public static final byte B = 15;
0215:
0216: /** Segment Separator */
0217: public static final byte S = 16;
0218:
0219: /** Whitespace */
0220: public static final byte WS = 17;
0221:
0222: /** Other Neutrals */
0223: public static final byte ON = 18;
0224:
0225: /** Minimum bidi type value. */
0226: public static final byte TYPE_MIN = 0;
0227:
0228: /** Maximum bidi type value. */
0229: public static final byte TYPE_MAX = 18;
0230:
0231: //
0232: // Input
0233: //
0234:
0235: /**
0236: * Initialize using an array of direction types. Types range from TYPE_MIN to TYPE_MAX inclusive
0237: * and represent the direction codes of the characters in the text.
0238: *
0239: * @param types the types array
0240: */
0241: public BidiOrder(byte[] types) {
0242: validateTypes(types);
0243:
0244: this .initialTypes = (byte[]) types.clone(); // client type array remains unchanged
0245:
0246: runAlgorithm();
0247: }
0248:
0249: /**
0250: * Initialize using an array of direction types and an externally supplied paragraph embedding level.
0251: * The embedding level may be -1, 0, or 1. -1 means to apply the default algorithm (rules P2 and P3),
0252: * 0 is for LTR paragraphs, and 1 is for RTL paragraphs.
0253: *
0254: * @param types the types array
0255: * @param paragraphEmbeddingLevel the externally supplied paragraph embedding level.
0256: */
0257: public BidiOrder(byte[] types, byte paragraphEmbeddingLevel) {
0258: validateTypes(types);
0259: validateParagraphEmbeddingLevel(paragraphEmbeddingLevel);
0260:
0261: this .initialTypes = (byte[]) types.clone(); // client type array remains unchanged
0262: this .paragraphEmbeddingLevel = paragraphEmbeddingLevel;
0263:
0264: runAlgorithm();
0265: }
0266:
0267: public BidiOrder(char text[], int offset, int length,
0268: byte paragraphEmbeddingLevel) {
0269: initialTypes = new byte[length];
0270: for (int k = 0; k < length; ++k) {
0271: initialTypes[k] = rtypes[text[offset + k]];
0272: }
0273: validateParagraphEmbeddingLevel(paragraphEmbeddingLevel);
0274:
0275: this .paragraphEmbeddingLevel = paragraphEmbeddingLevel;
0276:
0277: runAlgorithm();
0278: }
0279:
0280: public final static byte getDirection(char c) {
0281: return rtypes[c];
0282: }
0283:
0284: /**
0285: * The algorithm.
0286: * Does not include line-based processing (Rules L1, L2).
0287: * These are applied later in the line-based phase of the algorithm.
0288: */
0289: private void runAlgorithm() {
0290: textLength = initialTypes.length;
0291:
0292: // Initialize output types.
0293: // Result types initialized to input types.
0294: resultTypes = (byte[]) initialTypes.clone();
0295:
0296: // 1) determining the paragraph level
0297: // Rule P1 is the requirement for entering this algorithm.
0298: // Rules P2, P3.
0299: // If no externally supplied paragraph embedding level, use default.
0300: if (paragraphEmbeddingLevel == -1) {
0301: determineParagraphEmbeddingLevel();
0302: }
0303:
0304: // Initialize result levels to paragraph embedding level.
0305: resultLevels = new byte[textLength];
0306: setLevels(0, textLength, paragraphEmbeddingLevel);
0307:
0308: // 2) Explicit levels and directions
0309: // Rules X1-X8.
0310: determineExplicitEmbeddingLevels();
0311:
0312: // Rule X9.
0313: textLength = removeExplicitCodes();
0314:
0315: // Rule X10.
0316: // Run remainder of algorithm one level run at a time
0317: byte prevLevel = paragraphEmbeddingLevel;
0318: int start = 0;
0319: while (start < textLength) {
0320: byte level = resultLevels[start];
0321: byte prevType = typeForLevel(Math.max(prevLevel, level));
0322:
0323: int limit = start + 1;
0324: while (limit < textLength && resultLevels[limit] == level) {
0325: ++limit;
0326: }
0327:
0328: byte succLevel = limit < textLength ? resultLevels[limit]
0329: : paragraphEmbeddingLevel;
0330: byte succType = typeForLevel(Math.max(succLevel, level));
0331:
0332: // 3) resolving weak types
0333: // Rules W1-W7.
0334: resolveWeakTypes(start, limit, level, prevType, succType);
0335:
0336: // 4) resolving neutral types
0337: // Rules N1-N3.
0338: resolveNeutralTypes(start, limit, level, prevType, succType);
0339:
0340: // 5) resolving implicit embedding levels
0341: // Rules I1, I2.
0342: resolveImplicitLevels(start, limit, level, prevType,
0343: succType);
0344:
0345: prevLevel = level;
0346: start = limit;
0347: }
0348:
0349: // Reinsert explicit codes and assign appropriate levels to 'hide' them.
0350: // This is for convenience, so the resulting level array maps 1-1
0351: // with the initial array.
0352: // See the implementation suggestions section of TR#9 for guidelines on
0353: // how to implement the algorithm without removing and reinserting the codes.
0354: textLength = reinsertExplicitCodes(textLength);
0355: }
0356:
0357: /**
0358: * 1) determining the paragraph level.
0359: * <p>
0360: * Rules P2, P3.
0361: * <p>
0362: * At the end of this function, the member variable paragraphEmbeddingLevel is set to either 0 or 1.
0363: */
0364: private void determineParagraphEmbeddingLevel() {
0365: byte strongType = -1; // unknown
0366:
0367: // Rule P2.
0368: for (int i = 0; i < textLength; ++i) {
0369: byte t = resultTypes[i];
0370: if (t == L || t == AL || t == R) {
0371: strongType = t;
0372: break;
0373: }
0374: }
0375:
0376: // Rule P3.
0377: if (strongType == -1) { // none found
0378: // default embedding level when no strong types found is 0.
0379: paragraphEmbeddingLevel = 0;
0380: } else if (strongType == L) {
0381: paragraphEmbeddingLevel = 0;
0382: } else { // AL, R
0383: paragraphEmbeddingLevel = 1;
0384: }
0385: }
0386:
0387: /**
0388: * Process embedding format codes.
0389: * <p>
0390: * Calls processEmbeddings to generate an embedding array from the explicit format codes. The
0391: * embedding overrides in the array are then applied to the result types, and the result levels are
0392: * initialized.
0393: * @see #processEmbeddings
0394: */
0395: private void determineExplicitEmbeddingLevels() {
0396: embeddings = processEmbeddings(resultTypes,
0397: paragraphEmbeddingLevel);
0398:
0399: for (int i = 0; i < textLength; ++i) {
0400: byte level = embeddings[i];
0401: if ((level & 0x80) != 0) {
0402: level &= 0x7f;
0403: resultTypes[i] = typeForLevel(level);
0404: }
0405: resultLevels[i] = level;
0406: }
0407: }
0408:
0409: /**
0410: * Rules X9.
0411: * Remove explicit codes so that they may be ignored during the remainder
0412: * of the main portion of the algorithm. The length of the resulting text
0413: * is returned.
0414: * @return the length of the data excluding explicit codes and BN.
0415: */
0416: private int removeExplicitCodes() {
0417: int w = 0;
0418: for (int i = 0; i < textLength; ++i) {
0419: byte t = initialTypes[i];
0420: if (!(t == LRE || t == RLE || t == LRO || t == RLO
0421: || t == PDF || t == BN)) {
0422: embeddings[w] = embeddings[i];
0423: resultTypes[w] = resultTypes[i];
0424: resultLevels[w] = resultLevels[i];
0425: w++;
0426: }
0427: }
0428: return w; // new textLength while explicit levels are removed
0429: }
0430:
0431: /**
0432: * Reinsert levels information for explicit codes.
0433: * This is for ease of relating the level information
0434: * to the original input data. Note that the levels
0435: * assigned to these codes are arbitrary, they're
0436: * chosen so as to avoid breaking level runs.
0437: * @param textLength the length of the data after compression
0438: * @return the length of the data (original length of
0439: * types array supplied to constructor)
0440: */
0441: private int reinsertExplicitCodes(int textLength) {
0442: for (int i = initialTypes.length; --i >= 0;) {
0443: byte t = initialTypes[i];
0444: if (t == LRE || t == RLE || t == LRO || t == RLO
0445: || t == PDF || t == BN) {
0446: embeddings[i] = 0;
0447: resultTypes[i] = t;
0448: resultLevels[i] = -1;
0449: } else {
0450: --textLength;
0451: embeddings[i] = embeddings[textLength];
0452: resultTypes[i] = resultTypes[textLength];
0453: resultLevels[i] = resultLevels[textLength];
0454: }
0455: }
0456:
0457: // now propagate forward the levels information (could have
0458: // propagated backward, the main thing is not to introduce a level
0459: // break where one doesn't already exist).
0460:
0461: if (resultLevels[0] == -1) {
0462: resultLevels[0] = paragraphEmbeddingLevel;
0463: }
0464: for (int i = 1; i < initialTypes.length; ++i) {
0465: if (resultLevels[i] == -1) {
0466: resultLevels[i] = resultLevels[i - 1];
0467: }
0468: }
0469:
0470: // Embedding information is for informational purposes only
0471: // so need not be adjusted.
0472:
0473: return initialTypes.length;
0474: }
0475:
0476: /**
0477: * 2) determining explicit levels
0478: * Rules X1 - X8
0479: *
0480: * The interaction of these rules makes handling them a bit complex.
0481: * This examines resultTypes but does not modify it. It returns embedding and
0482: * override information in the result array. The low 7 bits are the level, the high
0483: * bit is set if the level is an override, and clear if it is an embedding.
0484: */
0485: private static byte[] processEmbeddings(byte[] resultTypes,
0486: byte paragraphEmbeddingLevel) {
0487: final int EXPLICIT_LEVEL_LIMIT = 62;
0488:
0489: int textLength = resultTypes.length;
0490: byte[] embeddings = new byte[textLength];
0491:
0492: // This stack will store the embedding levels and override status in a single byte
0493: // as described above.
0494: byte[] embeddingValueStack = new byte[EXPLICIT_LEVEL_LIMIT];
0495: int stackCounter = 0;
0496:
0497: // An LRE or LRO at level 60 is invalid, since the new level 62 is invalid. But
0498: // an RLE at level 60 is valid, since the new level 61 is valid. The current wording
0499: // of the rules requires that the RLE remain valid even if a previous LRE is invalid.
0500: // This keeps track of ignored LRE or LRO codes at level 60, so that the matching PDFs
0501: // will not try to pop the stack.
0502: int overflowAlmostCounter = 0;
0503:
0504: // This keeps track of ignored pushes at level 61 or higher, so that matching PDFs will
0505: // not try to pop the stack.
0506: int overflowCounter = 0;
0507:
0508: // Rule X1.
0509:
0510: // Keep the level separate from the value (level | override status flag) for ease of access.
0511: byte currentEmbeddingLevel = paragraphEmbeddingLevel;
0512: byte currentEmbeddingValue = paragraphEmbeddingLevel;
0513:
0514: // Loop through types, handling all remaining rules
0515: for (int i = 0; i < textLength; ++i) {
0516:
0517: embeddings[i] = currentEmbeddingValue;
0518:
0519: byte t = resultTypes[i];
0520:
0521: // Rules X2, X3, X4, X5
0522: switch (t) {
0523: case RLE:
0524: case LRE:
0525: case RLO:
0526: case LRO:
0527: // Only need to compute new level if current level is valid
0528: if (overflowCounter == 0) {
0529: byte newLevel;
0530: if (t == RLE || t == RLO) {
0531: newLevel = (byte) ((currentEmbeddingLevel + 1) | 1); // least greater odd
0532: } else { // t == LRE || t == LRO
0533: newLevel = (byte) ((currentEmbeddingLevel + 2) & ~1); // least greater even
0534: }
0535:
0536: // If the new level is valid, push old embedding level and override status
0537: // No check for valid stack counter, since the level check suffices.
0538: if (newLevel < EXPLICIT_LEVEL_LIMIT) {
0539: embeddingValueStack[stackCounter] = currentEmbeddingValue;
0540: stackCounter++;
0541:
0542: currentEmbeddingLevel = newLevel;
0543: if (t == LRO || t == RLO) { // override
0544: currentEmbeddingValue = (byte) (newLevel | 0x80);
0545: } else {
0546: currentEmbeddingValue = newLevel;
0547: }
0548:
0549: // Adjust level of format mark (for expositional purposes only, this gets
0550: // removed later).
0551: embeddings[i] = currentEmbeddingValue;
0552: break;
0553: }
0554:
0555: // Otherwise new level is invalid, but a valid level can still be achieved if this
0556: // level is 60 and we encounter an RLE or RLO further on. So record that we
0557: // 'almost' overflowed.
0558: if (currentEmbeddingLevel == 60) {
0559: overflowAlmostCounter++;
0560: break;
0561: }
0562: }
0563:
0564: // Otherwise old or new level is invalid.
0565: overflowCounter++;
0566: break;
0567:
0568: case PDF:
0569: // The only case where this did not actually overflow but may have almost overflowed
0570: // is when there was an RLE or RLO on level 60, which would result in level 61. So we
0571: // only test the almost overflow condition in that case.
0572: //
0573: // Also note that there may be a PDF without any pushes at all.
0574:
0575: if (overflowCounter > 0) {
0576: --overflowCounter;
0577: } else if (overflowAlmostCounter > 0
0578: && currentEmbeddingLevel != 61) {
0579: --overflowAlmostCounter;
0580: } else if (stackCounter > 0) {
0581: --stackCounter;
0582: currentEmbeddingValue = embeddingValueStack[stackCounter];
0583: currentEmbeddingLevel = (byte) (currentEmbeddingValue & 0x7f);
0584: }
0585: break;
0586:
0587: case B:
0588: // Rule X8.
0589:
0590: // These values are reset for clarity, in this implementation B can only
0591: // occur as the last code in the array.
0592: stackCounter = 0;
0593: overflowCounter = 0;
0594: overflowAlmostCounter = 0;
0595: currentEmbeddingLevel = paragraphEmbeddingLevel;
0596: currentEmbeddingValue = paragraphEmbeddingLevel;
0597:
0598: embeddings[i] = paragraphEmbeddingLevel;
0599: break;
0600:
0601: default:
0602: break;
0603: }
0604: }
0605:
0606: return embeddings;
0607: }
0608:
0609: /**
0610: * 3) resolving weak types
0611: * Rules W1-W7.
0612: *
0613: * Note that some weak types (EN, AN) remain after this processing is complete.
0614: */
0615: private void resolveWeakTypes(int start, int limit, byte level,
0616: byte sor, byte eor) {
0617:
0618: // Rule W1.
0619: // Changes all NSMs.
0620: byte preceedingCharacterType = sor;
0621: for (int i = start; i < limit; ++i) {
0622: byte t = resultTypes[i];
0623: if (t == NSM) {
0624: resultTypes[i] = preceedingCharacterType;
0625: } else {
0626: preceedingCharacterType = t;
0627: }
0628: }
0629:
0630: // Rule W2.
0631: // EN does not change at the start of the run, because sor != AL.
0632: for (int i = start; i < limit; ++i) {
0633: if (resultTypes[i] == EN) {
0634: for (int j = i - 1; j >= start; --j) {
0635: byte t = resultTypes[j];
0636: if (t == L || t == R || t == AL) {
0637: if (t == AL) {
0638: resultTypes[i] = AN;
0639: }
0640: break;
0641: }
0642: }
0643: }
0644: }
0645:
0646: // Rule W3.
0647: for (int i = start; i < limit; ++i) {
0648: if (resultTypes[i] == AL) {
0649: resultTypes[i] = R;
0650: }
0651: }
0652:
0653: // Rule W4.
0654: // Since there must be values on both sides for this rule to have an
0655: // effect, the scan skips the first and last value.
0656: //
0657: // Although the scan proceeds left to right, and changes the type values
0658: // in a way that would appear to affect the computations later in the scan,
0659: // there is actually no problem. A change in the current value can only
0660: // affect the value to its immediate right, and only affect it if it is
0661: // ES or CS. But the current value can only change if the value to its
0662: // right is not ES or CS. Thus either the current value will not change,
0663: // or its change will have no effect on the remainder of the analysis.
0664:
0665: for (int i = start + 1; i < limit - 1; ++i) {
0666: if (resultTypes[i] == ES || resultTypes[i] == CS) {
0667: byte prevSepType = resultTypes[i - 1];
0668: byte succSepType = resultTypes[i + 1];
0669: if (prevSepType == EN && succSepType == EN) {
0670: resultTypes[i] = EN;
0671: } else if (resultTypes[i] == CS && prevSepType == AN
0672: && succSepType == AN) {
0673: resultTypes[i] = AN;
0674: }
0675: }
0676: }
0677:
0678: // Rule W5.
0679: for (int i = start; i < limit; ++i) {
0680: if (resultTypes[i] == ET) {
0681: // locate end of sequence
0682: int runstart = i;
0683: int runlimit = findRunLimit(runstart, limit,
0684: new byte[] { ET });
0685:
0686: // check values at ends of sequence
0687: byte t = runstart == start ? sor
0688: : resultTypes[runstart - 1];
0689:
0690: if (t != EN) {
0691: t = runlimit == limit ? eor : resultTypes[runlimit];
0692: }
0693:
0694: if (t == EN) {
0695: setTypes(runstart, runlimit, EN);
0696: }
0697:
0698: // continue at end of sequence
0699: i = runlimit;
0700: }
0701: }
0702:
0703: // Rule W6.
0704: for (int i = start; i < limit; ++i) {
0705: byte t = resultTypes[i];
0706: if (t == ES || t == ET || t == CS) {
0707: resultTypes[i] = ON;
0708: }
0709: }
0710:
0711: // Rule W7.
0712: for (int i = start; i < limit; ++i) {
0713: if (resultTypes[i] == EN) {
0714: // set default if we reach start of run
0715: byte prevStrongType = sor;
0716: for (int j = i - 1; j >= start; --j) {
0717: byte t = resultTypes[j];
0718: if (t == L || t == R) { // AL's have been removed
0719: prevStrongType = t;
0720: break;
0721: }
0722: }
0723: if (prevStrongType == L) {
0724: resultTypes[i] = L;
0725: }
0726: }
0727: }
0728: }
0729:
0730: /**
0731: * 6) resolving neutral types
0732: * Rules N1-N2.
0733: */
0734: private void resolveNeutralTypes(int start, int limit, byte level,
0735: byte sor, byte eor) {
0736:
0737: for (int i = start; i < limit; ++i) {
0738: byte t = resultTypes[i];
0739: if (t == WS || t == ON || t == B || t == S) {
0740: // find bounds of run of neutrals
0741: int runstart = i;
0742: int runlimit = findRunLimit(runstart, limit,
0743: new byte[] { B, S, WS, ON });
0744:
0745: // determine effective types at ends of run
0746: byte leadingType;
0747: byte trailingType;
0748:
0749: if (runstart == start) {
0750: leadingType = sor;
0751: } else {
0752: leadingType = resultTypes[runstart - 1];
0753: if (leadingType == L || leadingType == R) {
0754: // found the strong type
0755: } else if (leadingType == AN) {
0756: leadingType = R;
0757: } else if (leadingType == EN) {
0758: // Since EN's with previous strong L types have been changed
0759: // to L in W7, the leadingType must be R.
0760: leadingType = R;
0761: }
0762: }
0763:
0764: if (runlimit == limit) {
0765: trailingType = eor;
0766: } else {
0767: trailingType = resultTypes[runlimit];
0768: if (trailingType == L || trailingType == R) {
0769: // found the strong type
0770: } else if (trailingType == AN) {
0771: trailingType = R;
0772: } else if (trailingType == EN) {
0773: trailingType = R;
0774: }
0775: }
0776:
0777: byte resolvedType;
0778: if (leadingType == trailingType) {
0779: // Rule N1.
0780: resolvedType = leadingType;
0781: } else {
0782: // Rule N2.
0783: // Notice the embedding level of the run is used, not
0784: // the paragraph embedding level.
0785: resolvedType = typeForLevel(level);
0786: }
0787:
0788: setTypes(runstart, runlimit, resolvedType);
0789:
0790: // skip over run of (former) neutrals
0791: i = runlimit;
0792: }
0793: }
0794: }
0795:
0796: /**
0797: * 7) resolving implicit embedding levels
0798: * Rules I1, I2.
0799: */
0800: private void resolveImplicitLevels(int start, int limit,
0801: byte level, byte sor, byte eor) {
0802: if ((level & 1) == 0) { // even level
0803: for (int i = start; i < limit; ++i) {
0804: byte t = resultTypes[i];
0805: // Rule I1.
0806: if (t == L) {
0807: // no change
0808: } else if (t == R) {
0809: resultLevels[i] += 1;
0810: } else { // t == AN || t == EN
0811: resultLevels[i] += 2;
0812: }
0813: }
0814: } else { // odd level
0815: for (int i = start; i < limit; ++i) {
0816: byte t = resultTypes[i];
0817: // Rule I2.
0818: if (t == R) {
0819: // no change
0820: } else { // t == L || t == AN || t == EN
0821: resultLevels[i] += 1;
0822: }
0823: }
0824: }
0825: }
0826:
0827: //
0828: // Output
0829: //
0830:
0831: public byte[] getLevels() {
0832: return getLevels(new int[] { textLength });
0833: }
0834:
0835: /**
0836: * Return levels array breaking lines at offsets in linebreaks. <br>
0837: * Rule L1.
0838: * <p>
0839: * The returned levels array contains the resolved level for each
0840: * bidi code passed to the constructor.
0841: * <p>
0842: * The linebreaks array must include at least one value.
0843: * The values must be in strictly increasing order (no duplicates)
0844: * between 1 and the length of the text, inclusive. The last value
0845: * must be the length of the text.
0846: *
0847: * @param linebreaks the offsets at which to break the paragraph
0848: * @return the resolved levels of the text
0849: */
0850: public byte[] getLevels(int[] linebreaks) {
0851:
0852: // Note that since the previous processing has removed all
0853: // P, S, and WS values from resultTypes, the values referred to
0854: // in these rules are the initial types, before any processing
0855: // has been applied (including processing of overrides).
0856: //
0857: // This example implementation has reinserted explicit format codes
0858: // and BN, in order that the levels array correspond to the
0859: // initial text. Their final placement is not normative.
0860: // These codes are treated like WS in this implementation,
0861: // so they don't interrupt sequences of WS.
0862:
0863: validateLineBreaks(linebreaks, textLength);
0864:
0865: byte[] result = (byte[]) resultLevels.clone(); // will be returned to caller
0866:
0867: // don't worry about linebreaks since if there is a break within
0868: // a series of WS values preceeding S, the linebreak itself
0869: // causes the reset.
0870: for (int i = 0; i < result.length; ++i) {
0871: byte t = initialTypes[i];
0872: if (t == B || t == S) {
0873: // Rule L1, clauses one and two.
0874: result[i] = paragraphEmbeddingLevel;
0875:
0876: // Rule L1, clause three.
0877: for (int j = i - 1; j >= 0; --j) {
0878: if (isWhitespace(initialTypes[j])) { // including format codes
0879: result[j] = paragraphEmbeddingLevel;
0880: } else {
0881: break;
0882: }
0883: }
0884: }
0885: }
0886:
0887: // Rule L1, clause four.
0888: int start = 0;
0889: for (int i = 0; i < linebreaks.length; ++i) {
0890: int limit = linebreaks[i];
0891: for (int j = limit - 1; j >= start; --j) {
0892: if (isWhitespace(initialTypes[j])) { // including format codes
0893: result[j] = paragraphEmbeddingLevel;
0894: } else {
0895: break;
0896: }
0897: }
0898:
0899: start = limit;
0900: }
0901:
0902: return result;
0903: }
0904:
0905: /**
0906: * Return reordering array breaking lines at offsets in linebreaks.
0907: * <p>
0908: * The reordering array maps from a visual index to a logical index.
0909: * Lines are concatenated from left to right. So for example, the
0910: * fifth character from the left on the third line is
0911: * <pre> getReordering(linebreaks)[linebreaks[1] + 4]</pre>
0912: * (linebreaks[1] is the position after the last character of the
0913: * second line, which is also the index of the first character on the
0914: * third line, and adding four gets the fifth character from the left).
0915: * <p>
0916: * The linebreaks array must include at least one value.
0917: * The values must be in strictly increasing order (no duplicates)
0918: * between 1 and the length of the text, inclusive. The last value
0919: * must be the length of the text.
0920: *
0921: * @param linebreaks the offsets at which to break the paragraph.
0922: */
0923: public int[] getReordering(int[] linebreaks) {
0924: validateLineBreaks(linebreaks, textLength);
0925:
0926: byte[] levels = getLevels(linebreaks);
0927:
0928: return computeMultilineReordering(levels, linebreaks);
0929: }
0930:
0931: /**
0932: * Return multiline reordering array for a given level array.
0933: * Reordering does not occur across a line break.
0934: */
0935: private static int[] computeMultilineReordering(byte[] levels,
0936: int[] linebreaks) {
0937: int[] result = new int[levels.length];
0938:
0939: int start = 0;
0940: for (int i = 0; i < linebreaks.length; ++i) {
0941: int limit = linebreaks[i];
0942:
0943: byte[] templevels = new byte[limit - start];
0944: System.arraycopy(levels, start, templevels, 0,
0945: templevels.length);
0946:
0947: int[] temporder = computeReordering(templevels);
0948: for (int j = 0; j < temporder.length; ++j) {
0949: result[start + j] = temporder[j] + start;
0950: }
0951:
0952: start = limit;
0953: }
0954:
0955: return result;
0956: }
0957:
0958: /**
0959: * Return reordering array for a given level array. This reorders a single line.
0960: * The reordering is a visual to logical map. For example,
0961: * the leftmost char is string.charAt(order[0]).
0962: * Rule L2.
0963: */
0964: private static int[] computeReordering(byte[] levels) {
0965: int lineLength = levels.length;
0966:
0967: int[] result = new int[lineLength];
0968:
0969: // initialize order
0970: for (int i = 0; i < lineLength; ++i) {
0971: result[i] = i;
0972: }
0973:
0974: // locate highest level found on line.
0975: // Note the rules say text, but no reordering across line bounds is performed,
0976: // so this is sufficient.
0977: byte highestLevel = 0;
0978: byte lowestOddLevel = 63;
0979: for (int i = 0; i < lineLength; ++i) {
0980: byte level = levels[i];
0981: if (level > highestLevel) {
0982: highestLevel = level;
0983: }
0984: if (((level & 1) != 0) && level < lowestOddLevel) {
0985: lowestOddLevel = level;
0986: }
0987: }
0988:
0989: for (int level = highestLevel; level >= lowestOddLevel; --level) {
0990: for (int i = 0; i < lineLength; ++i) {
0991: if (levels[i] >= level) {
0992: // find range of text at or above this level
0993: int start = i;
0994: int limit = i + 1;
0995: while (limit < lineLength && levels[limit] >= level) {
0996: ++limit;
0997: }
0998:
0999: // reverse run
1000: for (int j = start, k = limit - 1; j < k; ++j, --k) {
1001: int temp = result[j];
1002: result[j] = result[k];
1003: result[k] = temp;
1004: }
1005:
1006: // skip to end of level run
1007: i = limit;
1008: }
1009: }
1010: }
1011:
1012: return result;
1013: }
1014:
1015: /**
1016: * Return the base level of the paragraph.
1017: */
1018: public byte getBaseLevel() {
1019: return paragraphEmbeddingLevel;
1020: }
1021:
1022: // --- internal utilities -------------------------------------------------
1023:
1024: /**
1025: * Return true if the type is considered a whitespace type for the line break rules.
1026: */
1027: private static boolean isWhitespace(byte biditype) {
1028: switch (biditype) {
1029: case LRE:
1030: case RLE:
1031: case LRO:
1032: case RLO:
1033: case PDF:
1034: case BN:
1035: case WS:
1036: return true;
1037: default:
1038: return false;
1039: }
1040: }
1041:
1042: /**
1043: * Return the strong type (L or R) corresponding to the level.
1044: */
1045: private static byte typeForLevel(int level) {
1046: return ((level & 0x1) == 0) ? L : R;
1047: }
1048:
1049: /**
1050: * Return the limit of the run starting at index that includes only resultTypes in validSet.
1051: * This checks the value at index, and will return index if that value is not in validSet.
1052: */
1053: private int findRunLimit(int index, int limit, byte[] validSet) {
1054: --index;
1055: loop: while (++index < limit) {
1056: byte t = resultTypes[index];
1057: for (int i = 0; i < validSet.length; ++i) {
1058: if (t == validSet[i]) {
1059: continue loop;
1060: }
1061: }
1062: // didn't find a match in validSet
1063: return index;
1064: }
1065: return limit;
1066: }
1067:
1068: /**
1069: * Return the start of the run including index that includes only resultTypes in validSet.
1070: * This assumes the value at index is valid, and does not check it.
1071: */
1072: private int findRunStart(int index, byte[] validSet) {
1073: loop: while (--index >= 0) {
1074: byte t = resultTypes[index];
1075: for (int i = 0; i < validSet.length; ++i) {
1076: if (t == validSet[i]) {
1077: continue loop;
1078: }
1079: }
1080: return index + 1;
1081: }
1082: return 0;
1083: }
1084:
1085: /**
1086: * Set resultTypes from start up to (but not including) limit to newType.
1087: */
1088: private void setTypes(int start, int limit, byte newType) {
1089: for (int i = start; i < limit; ++i) {
1090: resultTypes[i] = newType;
1091: }
1092: }
1093:
1094: /**
1095: * Set resultLevels from start up to (but not including) limit to newLevel.
1096: */
1097: private void setLevels(int start, int limit, byte newLevel) {
1098: for (int i = start; i < limit; ++i) {
1099: resultLevels[i] = newLevel;
1100: }
1101: }
1102:
1103: // --- input validation ---------------------------------------------------
1104:
1105: /**
1106: * Throw exception if type array is invalid.
1107: */
1108: private static void validateTypes(byte[] types) {
1109: if (types == null) {
1110: throw new IllegalArgumentException("types is null");
1111: }
1112: for (int i = 0; i < types.length; ++i) {
1113: if (types[i] < TYPE_MIN || types[i] > TYPE_MAX) {
1114: throw new IllegalArgumentException(
1115: "illegal type value at " + i + ": " + types[i]);
1116: }
1117: }
1118: for (int i = 0; i < types.length - 1; ++i) {
1119: if (types[i] == B) {
1120: throw new IllegalArgumentException(
1121: "B type before end of paragraph at index: " + i);
1122: }
1123: }
1124: }
1125:
1126: /**
1127: * Throw exception if paragraph embedding level is invalid. Special allowance for -1 so that
1128: * default processing can still be performed when using this API.
1129: */
1130: private static void validateParagraphEmbeddingLevel(
1131: byte paragraphEmbeddingLevel) {
1132: if (paragraphEmbeddingLevel != -1
1133: && paragraphEmbeddingLevel != 0
1134: && paragraphEmbeddingLevel != 1) {
1135: throw new IllegalArgumentException(
1136: "illegal paragraph embedding level: "
1137: + paragraphEmbeddingLevel);
1138: }
1139: }
1140:
1141: /**
1142: * Throw exception if line breaks array is invalid.
1143: */
1144: private static void validateLineBreaks(int[] linebreaks,
1145: int textLength) {
1146: int prev = 0;
1147: for (int i = 0; i < linebreaks.length; ++i) {
1148: int next = linebreaks[i];
1149: if (next <= prev) {
1150: throw new IllegalArgumentException("bad linebreak: "
1151: + next + " at index: " + i);
1152: }
1153: prev = next;
1154: }
1155: if (prev != textLength) {
1156: throw new IllegalArgumentException(
1157: "last linebreak must be at " + textLength);
1158: }
1159: }
1160:
1161: private static final byte rtypes[] = new byte[0x10000];
1162:
1163: private static char baseTypes[] = { 0, 8, (char) BN, 9, 9,
1164: (char) S, 10, 10, (char) B, 11, 11, (char) S, 12, 12,
1165: (char) WS, 13, 13, (char) B, 14, 27, (char) BN, 28, 30,
1166: (char) B, 31, 31, (char) S, 32, 32, (char) WS, 33, 34,
1167: (char) ON, 35, 37, (char) ET, 38, 42, (char) ON, 43, 43,
1168: (char) ET, 44, 44, (char) CS, 45, 45, (char) ET, 46, 46,
1169: (char) CS, 47, 47, (char) ES, 48, 57, (char) EN, 58, 58,
1170: (char) CS, 59, 64, (char) ON, 65, 90, (char) L, 91, 96,
1171: (char) ON, 97, 122, (char) L, 123, 126, (char) ON, 127,
1172: 132, (char) BN, 133, 133, (char) B, 134, 159, (char) BN,
1173: 160, 160, (char) CS, 161, 161, (char) ON, 162, 165,
1174: (char) ET, 166, 169, (char) ON, 170, 170, (char) L, 171,
1175: 175, (char) ON, 176, 177, (char) ET, 178, 179, (char) EN,
1176: 180, 180, (char) ON, 181, 181, (char) L, 182, 184,
1177: (char) ON, 185, 185, (char) EN, 186, 186, (char) L, 187,
1178: 191, (char) ON, 192, 214, (char) L, 215, 215, (char) ON,
1179: 216, 246, (char) L, 247, 247, (char) ON, 248, 696,
1180: (char) L, 697, 698, (char) ON, 699, 705, (char) L, 706,
1181: 719, (char) ON, 720, 721, (char) L, 722, 735, (char) ON,
1182: 736, 740, (char) L, 741, 749, (char) ON, 750, 750,
1183: (char) L, 751, 767, (char) ON, 768, 855, (char) NSM, 856,
1184: 860, (char) L, 861, 879, (char) NSM, 880, 883, (char) L,
1185: 884, 885, (char) ON, 886, 893, (char) L, 894, 894,
1186: (char) ON, 895, 899, (char) L, 900, 901, (char) ON, 902,
1187: 902, (char) L, 903, 903, (char) ON, 904, 1013, (char) L,
1188: 1014, 1014, (char) ON, 1015, 1154, (char) L, 1155, 1158,
1189: (char) NSM, 1159, 1159, (char) L, 1160, 1161, (char) NSM,
1190: 1162, 1417, (char) L, 1418, 1418, (char) ON, 1419, 1424,
1191: (char) L, 1425, 1441, (char) NSM, 1442, 1442, (char) L,
1192: 1443, 1465, (char) NSM, 1466, 1466, (char) L, 1467, 1469,
1193: (char) NSM, 1470, 1470, (char) R, 1471, 1471, (char) NSM,
1194: 1472, 1472, (char) R, 1473, 1474, (char) NSM, 1475, 1475,
1195: (char) R, 1476, 1476, (char) NSM, 1477, 1487, (char) L,
1196: 1488, 1514, (char) R, 1515, 1519, (char) L, 1520, 1524,
1197: (char) R, 1525, 1535, (char) L, 1536, 1539, (char) AL,
1198: 1540, 1547, (char) L, 1548, 1548, (char) CS, 1549, 1549,
1199: (char) AL, 1550, 1551, (char) ON, 1552, 1557, (char) NSM,
1200: 1558, 1562, (char) L, 1563, 1563, (char) AL, 1564, 1566,
1201: (char) L, 1567, 1567, (char) AL, 1568, 1568, (char) L,
1202: 1569, 1594, (char) AL, 1595, 1599, (char) L, 1600, 1610,
1203: (char) AL, 1611, 1624, (char) NSM, 1625, 1631, (char) L,
1204: 1632, 1641, (char) AN, 1642, 1642, (char) ET, 1643, 1644,
1205: (char) AN, 1645, 1647, (char) AL, 1648, 1648, (char) NSM,
1206: 1649, 1749, (char) AL, 1750, 1756, (char) NSM, 1757, 1757,
1207: (char) AL, 1758, 1764, (char) NSM, 1765, 1766, (char) AL,
1208: 1767, 1768, (char) NSM, 1769, 1769, (char) ON, 1770, 1773,
1209: (char) NSM, 1774, 1775, (char) AL, 1776, 1785, (char) EN,
1210: 1786, 1805, (char) AL, 1806, 1806, (char) L, 1807, 1807,
1211: (char) BN, 1808, 1808, (char) AL, 1809, 1809, (char) NSM,
1212: 1810, 1839, (char) AL, 1840, 1866, (char) NSM, 1867, 1868,
1213: (char) L, 1869, 1871, (char) AL, 1872, 1919, (char) L,
1214: 1920, 1957, (char) AL, 1958, 1968, (char) NSM, 1969, 1969,
1215: (char) AL, 1970, 2304, (char) L, 2305, 2306, (char) NSM,
1216: 2307, 2363, (char) L, 2364, 2364, (char) NSM, 2365, 2368,
1217: (char) L, 2369, 2376, (char) NSM, 2377, 2380, (char) L,
1218: 2381, 2381, (char) NSM, 2382, 2384, (char) L, 2385, 2388,
1219: (char) NSM, 2389, 2401, (char) L, 2402, 2403, (char) NSM,
1220: 2404, 2432, (char) L, 2433, 2433, (char) NSM, 2434, 2491,
1221: (char) L, 2492, 2492, (char) NSM, 2493, 2496, (char) L,
1222: 2497, 2500, (char) NSM, 2501, 2508, (char) L, 2509, 2509,
1223: (char) NSM, 2510, 2529, (char) L, 2530, 2531, (char) NSM,
1224: 2532, 2545, (char) L, 2546, 2547, (char) ET, 2548, 2560,
1225: (char) L, 2561, 2562, (char) NSM, 2563, 2619, (char) L,
1226: 2620, 2620, (char) NSM, 2621, 2624, (char) L, 2625, 2626,
1227: (char) NSM, 2627, 2630, (char) L, 2631, 2632, (char) NSM,
1228: 2633, 2634, (char) L, 2635, 2637, (char) NSM, 2638, 2671,
1229: (char) L, 2672, 2673, (char) NSM, 2674, 2688, (char) L,
1230: 2689, 2690, (char) NSM, 2691, 2747, (char) L, 2748, 2748,
1231: (char) NSM, 2749, 2752, (char) L, 2753, 2757, (char) NSM,
1232: 2758, 2758, (char) L, 2759, 2760, (char) NSM, 2761, 2764,
1233: (char) L, 2765, 2765, (char) NSM, 2766, 2785, (char) L,
1234: 2786, 2787, (char) NSM, 2788, 2800, (char) L, 2801, 2801,
1235: (char) ET, 2802, 2816, (char) L, 2817, 2817, (char) NSM,
1236: 2818, 2875, (char) L, 2876, 2876, (char) NSM, 2877, 2878,
1237: (char) L, 2879, 2879, (char) NSM, 2880, 2880, (char) L,
1238: 2881, 2883, (char) NSM, 2884, 2892, (char) L, 2893, 2893,
1239: (char) NSM, 2894, 2901, (char) L, 2902, 2902, (char) NSM,
1240: 2903, 2945, (char) L, 2946, 2946, (char) NSM, 2947, 3007,
1241: (char) L, 3008, 3008, (char) NSM, 3009, 3020, (char) L,
1242: 3021, 3021, (char) NSM, 3022, 3058, (char) L, 3059, 3064,
1243: (char) ON, 3065, 3065, (char) ET, 3066, 3066, (char) ON,
1244: 3067, 3133, (char) L, 3134, 3136, (char) NSM, 3137, 3141,
1245: (char) L, 3142, 3144, (char) NSM, 3145, 3145, (char) L,
1246: 3146, 3149, (char) NSM, 3150, 3156, (char) L, 3157, 3158,
1247: (char) NSM, 3159, 3259, (char) L, 3260, 3260, (char) NSM,
1248: 3261, 3275, (char) L, 3276, 3277, (char) NSM, 3278, 3392,
1249: (char) L, 3393, 3395, (char) NSM, 3396, 3404, (char) L,
1250: 3405, 3405, (char) NSM, 3406, 3529, (char) L, 3530, 3530,
1251: (char) NSM, 3531, 3537, (char) L, 3538, 3540, (char) NSM,
1252: 3541, 3541, (char) L, 3542, 3542, (char) NSM, 3543, 3632,
1253: (char) L, 3633, 3633, (char) NSM, 3634, 3635, (char) L,
1254: 3636, 3642, (char) NSM, 3643, 3646, (char) L, 3647, 3647,
1255: (char) ET, 3648, 3654, (char) L, 3655, 3662, (char) NSM,
1256: 3663, 3760, (char) L, 3761, 3761, (char) NSM, 3762, 3763,
1257: (char) L, 3764, 3769, (char) NSM, 3770, 3770, (char) L,
1258: 3771, 3772, (char) NSM, 3773, 3783, (char) L, 3784, 3789,
1259: (char) NSM, 3790, 3863, (char) L, 3864, 3865, (char) NSM,
1260: 3866, 3892, (char) L, 3893, 3893, (char) NSM, 3894, 3894,
1261: (char) L, 3895, 3895, (char) NSM, 3896, 3896, (char) L,
1262: 3897, 3897, (char) NSM, 3898, 3901, (char) ON, 3902, 3952,
1263: (char) L, 3953, 3966, (char) NSM, 3967, 3967, (char) L,
1264: 3968, 3972, (char) NSM, 3973, 3973, (char) L, 3974, 3975,
1265: (char) NSM, 3976, 3983, (char) L, 3984, 3991, (char) NSM,
1266: 3992, 3992, (char) L, 3993, 4028, (char) NSM, 4029, 4037,
1267: (char) L, 4038, 4038, (char) NSM, 4039, 4140, (char) L,
1268: 4141, 4144, (char) NSM, 4145, 4145, (char) L, 4146, 4146,
1269: (char) NSM, 4147, 4149, (char) L, 4150, 4151, (char) NSM,
1270: 4152, 4152, (char) L, 4153, 4153, (char) NSM, 4154, 4183,
1271: (char) L, 4184, 4185, (char) NSM, 4186, 5759, (char) L,
1272: 5760, 5760, (char) WS, 5761, 5786, (char) L, 5787, 5788,
1273: (char) ON, 5789, 5905, (char) L, 5906, 5908, (char) NSM,
1274: 5909, 5937, (char) L, 5938, 5940, (char) NSM, 5941, 5969,
1275: (char) L, 5970, 5971, (char) NSM, 5972, 6001, (char) L,
1276: 6002, 6003, (char) NSM, 6004, 6070, (char) L, 6071, 6077,
1277: (char) NSM, 6078, 6085, (char) L, 6086, 6086, (char) NSM,
1278: 6087, 6088, (char) L, 6089, 6099, (char) NSM, 6100, 6106,
1279: (char) L, 6107, 6107, (char) ET, 6108, 6108, (char) L,
1280: 6109, 6109, (char) NSM, 6110, 6127, (char) L, 6128, 6137,
1281: (char) ON, 6138, 6143, (char) L, 6144, 6154, (char) ON,
1282: 6155, 6157, (char) NSM, 6158, 6158, (char) WS, 6159, 6312,
1283: (char) L, 6313, 6313, (char) NSM, 6314, 6431, (char) L,
1284: 6432, 6434, (char) NSM, 6435, 6438, (char) L, 6439, 6443,
1285: (char) NSM, 6444, 6449, (char) L, 6450, 6450, (char) NSM,
1286: 6451, 6456, (char) L, 6457, 6459, (char) NSM, 6460, 6463,
1287: (char) L, 6464, 6464, (char) ON, 6465, 6467, (char) L,
1288: 6468, 6469, (char) ON, 6470, 6623, (char) L, 6624, 6655,
1289: (char) ON, 6656, 8124, (char) L, 8125, 8125, (char) ON,
1290: 8126, 8126, (char) L, 8127, 8129, (char) ON, 8130, 8140,
1291: (char) L, 8141, 8143, (char) ON, 8144, 8156, (char) L,
1292: 8157, 8159, (char) ON, 8160, 8172, (char) L, 8173, 8175,
1293: (char) ON, 8176, 8188, (char) L, 8189, 8190, (char) ON,
1294: 8191, 8191, (char) L, 8192, 8202, (char) WS, 8203, 8205,
1295: (char) BN, 8206, 8206, (char) L, 8207, 8207, (char) R,
1296: 8208, 8231, (char) ON, 8232, 8232, (char) WS, 8233, 8233,
1297: (char) B, 8234, 8234, (char) LRE, 8235, 8235, (char) RLE,
1298: 8236, 8236, (char) PDF, 8237, 8237, (char) LRO, 8238, 8238,
1299: (char) RLO, 8239, 8239, (char) WS, 8240, 8244, (char) ET,
1300: 8245, 8276, (char) ON, 8277, 8278, (char) L, 8279, 8279,
1301: (char) ON, 8280, 8286, (char) L, 8287, 8287, (char) WS,
1302: 8288, 8291, (char) BN, 8292, 8297, (char) L, 8298, 8303,
1303: (char) BN, 8304, 8304, (char) EN, 8305, 8307, (char) L,
1304: 8308, 8313, (char) EN, 8314, 8315, (char) ET, 8316, 8318,
1305: (char) ON, 8319, 8319, (char) L, 8320, 8329, (char) EN,
1306: 8330, 8331, (char) ET, 8332, 8334, (char) ON, 8335, 8351,
1307: (char) L, 8352, 8369, (char) ET, 8370, 8399, (char) L,
1308: 8400, 8426, (char) NSM, 8427, 8447, (char) L, 8448, 8449,
1309: (char) ON, 8450, 8450, (char) L, 8451, 8454, (char) ON,
1310: 8455, 8455, (char) L, 8456, 8457, (char) ON, 8458, 8467,
1311: (char) L, 8468, 8468, (char) ON, 8469, 8469, (char) L,
1312: 8470, 8472, (char) ON, 8473, 8477, (char) L, 8478, 8483,
1313: (char) ON, 8484, 8484, (char) L, 8485, 8485, (char) ON,
1314: 8486, 8486, (char) L, 8487, 8487, (char) ON, 8488, 8488,
1315: (char) L, 8489, 8489, (char) ON, 8490, 8493, (char) L,
1316: 8494, 8494, (char) ET, 8495, 8497, (char) L, 8498, 8498,
1317: (char) ON, 8499, 8505, (char) L, 8506, 8507, (char) ON,
1318: 8508, 8511, (char) L, 8512, 8516, (char) ON, 8517, 8521,
1319: (char) L, 8522, 8523, (char) ON, 8524, 8530, (char) L,
1320: 8531, 8543, (char) ON, 8544, 8591, (char) L, 8592, 8721,
1321: (char) ON, 8722, 8723, (char) ET, 8724, 9013, (char) ON,
1322: 9014, 9082, (char) L, 9083, 9108, (char) ON, 9109, 9109,
1323: (char) L, 9110, 9168, (char) ON, 9169, 9215, (char) L,
1324: 9216, 9254, (char) ON, 9255, 9279, (char) L, 9280, 9290,
1325: (char) ON, 9291, 9311, (char) L, 9312, 9371, (char) EN,
1326: 9372, 9449, (char) L, 9450, 9450, (char) EN, 9451, 9751,
1327: (char) ON, 9752, 9752, (char) L, 9753, 9853, (char) ON,
1328: 9854, 9855, (char) L, 9856, 9873, (char) ON, 9874, 9887,
1329: (char) L, 9888, 9889, (char) ON, 9890, 9984, (char) L,
1330: 9985, 9988, (char) ON, 9989, 9989, (char) L, 9990, 9993,
1331: (char) ON, 9994, 9995, (char) L, 9996, 10023, (char) ON,
1332: 10024, 10024, (char) L, 10025, 10059, (char) ON, 10060,
1333: 10060, (char) L, 10061, 10061, (char) ON, 10062, 10062,
1334: (char) L, 10063, 10066, (char) ON, 10067, 10069, (char) L,
1335: 10070, 10070, (char) ON, 10071, 10071, (char) L, 10072,
1336: 10078, (char) ON, 10079, 10080, (char) L, 10081, 10132,
1337: (char) ON, 10133, 10135, (char) L, 10136, 10159, (char) ON,
1338: 10160, 10160, (char) L, 10161, 10174, (char) ON, 10175,
1339: 10191, (char) L, 10192, 10219, (char) ON, 10220, 10223,
1340: (char) L, 10224, 11021, (char) ON, 11022, 11903, (char) L,
1341: 11904, 11929, (char) ON, 11930, 11930, (char) L, 11931,
1342: 12019, (char) ON, 12020, 12031, (char) L, 12032, 12245,
1343: (char) ON, 12246, 12271, (char) L, 12272, 12283, (char) ON,
1344: 12284, 12287, (char) L, 12288, 12288, (char) WS, 12289,
1345: 12292, (char) ON, 12293, 12295, (char) L, 12296, 12320,
1346: (char) ON, 12321, 12329, (char) L, 12330, 12335,
1347: (char) NSM, 12336, 12336, (char) ON, 12337, 12341,
1348: (char) L, 12342, 12343, (char) ON, 12344, 12348, (char) L,
1349: 12349, 12351, (char) ON, 12352, 12440, (char) L, 12441,
1350: 12442, (char) NSM, 12443, 12444, (char) ON, 12445, 12447,
1351: (char) L, 12448, 12448, (char) ON, 12449, 12538, (char) L,
1352: 12539, 12539, (char) ON, 12540, 12828, (char) L, 12829,
1353: 12830, (char) ON, 12831, 12879, (char) L, 12880, 12895,
1354: (char) ON, 12896, 12923, (char) L, 12924, 12925, (char) ON,
1355: 12926, 12976, (char) L, 12977, 12991, (char) ON, 12992,
1356: 13003, (char) L, 13004, 13007, (char) ON, 13008, 13174,
1357: (char) L, 13175, 13178, (char) ON, 13179, 13277, (char) L,
1358: 13278, 13279, (char) ON, 13280, 13310, (char) L, 13311,
1359: 13311, (char) ON, 13312, 19903, (char) L, 19904, 19967,
1360: (char) ON, 19968, 42127, (char) L, 42128, 42182, (char) ON,
1361: 42183, 64284, (char) L, 64285, 64285, (char) R, 64286,
1362: 64286, (char) NSM, 64287, 64296, (char) R, 64297, 64297,
1363: (char) ET, 64298, 64310, (char) R, 64311, 64311, (char) L,
1364: 64312, 64316, (char) R, 64317, 64317, (char) L, 64318,
1365: 64318, (char) R, 64319, 64319, (char) L, 64320, 64321,
1366: (char) R, 64322, 64322, (char) L, 64323, 64324, (char) R,
1367: 64325, 64325, (char) L, 64326, 64335, (char) R, 64336,
1368: 64433, (char) AL, 64434, 64466, (char) L, 64467, 64829,
1369: (char) AL, 64830, 64831, (char) ON, 64832, 64847, (char) L,
1370: 64848, 64911, (char) AL, 64912, 64913, (char) L, 64914,
1371: 64967, (char) AL, 64968, 65007, (char) L, 65008, 65020,
1372: (char) AL, 65021, 65021, (char) ON, 65022, 65023, (char) L,
1373: 65024, 65039, (char) NSM, 65040, 65055, (char) L, 65056,
1374: 65059, (char) NSM, 65060, 65071, (char) L, 65072, 65103,
1375: (char) ON, 65104, 65104, (char) CS, 65105, 65105,
1376: (char) ON, 65106, 65106, (char) CS, 65107, 65107, (char) L,
1377: 65108, 65108, (char) ON, 65109, 65109, (char) CS, 65110,
1378: 65118, (char) ON, 65119, 65119, (char) ET, 65120, 65121,
1379: (char) ON, 65122, 65123, (char) ET, 65124, 65126,
1380: (char) ON, 65127, 65127, (char) L, 65128, 65128, (char) ON,
1381: 65129, 65130, (char) ET, 65131, 65131, (char) ON, 65132,
1382: 65135, (char) L, 65136, 65140, (char) AL, 65141, 65141,
1383: (char) L, 65142, 65276, (char) AL, 65277, 65278, (char) L,
1384: 65279, 65279, (char) BN, 65280, 65280, (char) L, 65281,
1385: 65282, (char) ON, 65283, 65285, (char) ET, 65286, 65290,
1386: (char) ON, 65291, 65291, (char) ET, 65292, 65292,
1387: (char) CS, 65293, 65293, (char) ET, 65294, 65294,
1388: (char) CS, 65295, 65295, (char) ES, 65296, 65305,
1389: (char) EN, 65306, 65306, (char) CS, 65307, 65312,
1390: (char) ON, 65313, 65338, (char) L, 65339, 65344, (char) ON,
1391: 65345, 65370, (char) L, 65371, 65381, (char) ON, 65382,
1392: 65503, (char) L, 65504, 65505, (char) ET, 65506, 65508,
1393: (char) ON, 65509, 65510, (char) ET, 65511, 65511, (char) L,
1394: 65512, 65518, (char) ON, 65519, 65528, (char) L, 65529,
1395: 65531, (char) BN, 65532, 65533, (char) ON, 65534, 65535,
1396: (char) L };
1397:
1398: static {
1399: for (int k = 0; k < baseTypes.length; ++k) {
1400: int start = baseTypes[k];
1401: int end = baseTypes[++k];
1402: byte b = (byte) baseTypes[++k];
1403: while (start <= end)
1404: rtypes[start++] = b;
1405: }
1406: }
1407: }
|