001: // Jericho HTML Parser - Java based library for analysing and manipulating HTML
002: // Version 2.5
003: // Copyright (C) 2007 Martin Jericho
004: // http://jerichohtml.sourceforge.net/
005: //
006: // This library is free software; you can redistribute it and/or
007: // modify it under the terms of either one of the following licences:
008: //
009: // 1. The Eclipse Public License (EPL) version 1.0,
010: // included in this distribution in the file licence-epl-1.0.html
011: // or available at http://www.eclipse.org/legal/epl-v10.html
012: //
013: // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
014: // included in this distribution in the file licence-lgpl-2.1.txt
015: // or available at http://www.gnu.org/licenses/lgpl.txt
016: //
017: // This library is distributed on an "AS IS" basis,
018: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
019: // See the individual licence texts for more details.
020:
021: package au.id.jericho.lib.html;
022:
023: import java.util.*;
024:
025: /**
026: * Represents the row and column number of a character position in the source document.
027: * <p>
028: * Obtained using the {@link Source#getRowColumnVector(int pos)} method.
029: */
030: public final class RowColumnVector {
031: private int row;
032: private int column;
033: private int pos;
034:
035: private static final RowColumnVector FIRST = new RowColumnVector(1,
036: 1, 0);
037:
038: private RowColumnVector(final int row, final int column,
039: final int pos) {
040: this .row = row;
041: this .column = column;
042: this .pos = pos;
043: }
044:
045: /**
046: * Returns the row number of this character position in the source document.
047: * @return the row number of this character position in the source document.
048: */
049: public int getRow() {
050: return row;
051: }
052:
053: /**
054: * Returns the column number of this character position in the source document.
055: * @return the column number of this character position in the source document.
056: */
057: public int getColumn() {
058: return column;
059: }
060:
061: /**
062: * Returns the character position in the source document.
063: * @return the character position in the source document.
064: */
065: public int getPos() {
066: return pos;
067: }
068:
069: /**
070: * Returns a string representation of this character position.
071: * <p>
072: * The returned string has the format "<code>(</code><i>row</i><code>,</code><i>column</i><code>:</code><i>pos</i><code>)</code>".
073: *
074: * @return a string representation of this character position.
075: */
076: public String toString() {
077: return appendTo(new StringBuffer(20)).toString();
078: }
079:
080: StringBuffer appendTo(final StringBuffer sb) {
081: return sb.append("(r").append(row).append(",c").append(column)
082: .append(",p").append(pos).append(')');
083: }
084:
085: static RowColumnVector[] getCacheArray(final Source source) {
086: final int lastSourcePos = source.end - 1;
087: final ArrayList list = new ArrayList();
088: int pos = 0;
089: list.add(FIRST);
090: int row = 1;
091: while (pos <= lastSourcePos) {
092: final char ch = source.charAt(pos);
093: if (ch == '\n'
094: || (ch == '\r' && (pos == lastSourcePos || source
095: .charAt(pos + 1) != '\n')))
096: list.add(new RowColumnVector(++row, 1, pos + 1));
097: pos++;
098: }
099: return (RowColumnVector[]) list
100: .toArray(new RowColumnVector[list.size()]);
101: }
102:
103: static RowColumnVector get(final RowColumnVector[] cacheArray,
104: final int pos) {
105: int low = 0;
106: int high = cacheArray.length - 1;
107: while (true) {
108: int mid = (low + high) >> 1;
109: final RowColumnVector rowColumnVector = cacheArray[mid];
110: if (rowColumnVector.pos < pos) {
111: if (mid == high)
112: return new RowColumnVector(rowColumnVector.row, pos
113: - rowColumnVector.pos + 1, pos);
114: low = mid + 1;
115: } else if (rowColumnVector.pos > pos) {
116: high = mid - 1;
117: } else {
118: return rowColumnVector;
119: }
120: }
121: }
122: }
|