001: /*
002: * ====================================================================
003: * Copyright (c) 2004 Marc Strapetz, marc.strapetz@smartsvn.com.
004: * All rights reserved.
005: *
006: * This software is licensed as described in the file COPYING, which
007: * you should have received as part of this distribution. Use is
008: * subject to license terms.
009: * ====================================================================
010: */
011:
012: package de.regnis.q.sequence.media;
013:
014: import de.regnis.q.sequence.core.*;
015:
016: /**
017: * @author Marc Strapetz
018: */
019: public abstract class QSequenceDiscardingMediaBlock {
020:
021: // Abstract ===============================================================
022:
023: protected abstract int[] getAllSymbols(QSequenceIntMedia media);
024:
025: // Fields =================================================================
026:
027: private final QSequenceIntMedia media;
028: private final int[] undiscardedSymbols;
029: private final int[] undiscardedIndices;
030:
031: private int undiscardedSymbolCount;
032:
033: // Setup ==================================================================
034:
035: protected QSequenceDiscardingMediaBlock(QSequenceIntMedia media) {
036: this .media = media;
037: this .undiscardedSymbols = new int[getAllSymbols(media).length];
038: this .undiscardedIndices = new int[getAllSymbols(media).length];
039: this .undiscardedSymbolCount = 0;
040: }
041:
042: // Accessing ==============================================================
043:
044: public int getUndiscardedSymbolCount() {
045: return undiscardedSymbolCount;
046: }
047:
048: public int[] getUndiscardedSymbols() {
049: return undiscardedSymbols;
050: }
051:
052: public int getMediaIndex(int index) {
053: return undiscardedIndices[index];
054: }
055:
056: public void init(QSequenceDiscardingMediaBlock thatBlock,
057: QSequenceDiscardingMediaConfusionDetector confusionDetector) {
058: QSequenceAssert.assertNotNull(confusionDetector);
059:
060: // Set up table of which lines are going to be discarded.
061: final int[] this AllSymbols = getAllSymbols(media);
062: final int[] thatAllSymbols = thatBlock.getAllSymbols(media);
063: final int[] otherEquivalences = createEquivalences(
064: thatAllSymbols, media);
065: final byte[] discardableMarkers = createDiscardableMarkers(
066: this AllSymbols, otherEquivalences, confusionDetector);
067:
068: // Don't really discard the provisional lines except when they occur
069: // in a run of discardables, with nonprovisionals at the beginning
070: // and end.
071: // filterDiscardableMarkers(allSymbols, discardableMarkers);
072:
073: for (int index = 0; index < this AllSymbols.length; ++index) {
074: if (discardableMarkers[index] != 1) {
075: undiscardedSymbols[undiscardedSymbolCount] = this AllSymbols[index];
076: undiscardedIndices[undiscardedSymbolCount] = index;
077: undiscardedSymbolCount++;
078: }
079: }
080: }
081:
082: // Utils ==================================================================
083:
084: private static int[] createEquivalences(int[] symbols,
085: QSequenceIntMedia media) {
086: final int[] equivalences = new int[media.getSymbolCount()];
087: for (int index = 0; index < symbols.length; index++) {
088: equivalences[symbols[index]]++;
089: }
090: return equivalences;
091: }
092:
093: private static byte[] createDiscardableMarkers(int[] symbols,
094: int[] otherEquivalences,
095: QSequenceDiscardingMediaConfusionDetector confusionDetector) {
096: final byte[] discardableMarkers = new byte[symbols.length];
097: confusionDetector.init(symbols.length);
098:
099: for (int index = 0; index < symbols.length; index++) {
100: final int occurences = otherEquivalences[symbols[index]];
101: if (confusionDetector.isAbsolute(occurences)) {
102: discardableMarkers[index] = 1;
103: } else if (confusionDetector.isProvisional(occurences)) {
104: discardableMarkers[index] = 2;
105: }
106: }
107:
108: return discardableMarkers;
109: }
110:
111: // --Commented out by Inspection START (17.06.04 15:17):
112: // private static void filterDiscardableMarkers(int[] symbols, byte[] discardableMarkers) {
113: // for (int i = 0; i < symbols.length; i++) {
114: // // Cancel provisional discards not in middle of run of discards.
115: // if (discardableMarkers[i] == 2) {
116: // discardableMarkers[i] = 0;
117: // }
118: // else if (discardableMarkers[i] != 0) {
119: // // We have found a nonprovisional discard.
120: // int j;
121: // final int length;
122: // int provisional = 0;
123: //
124: // // Find end of this run of discardable lines.
125: // // Count how many are provisionally discardable.
126: // for (j = i; j < symbols.length; j++) {
127: // if (discardableMarkers[j] == 0) {
128: // break;
129: // }
130: // if (discardableMarkers[j] == 2) {
131: // provisional++;
132: // }
133: // }
134: //
135: // // Cancel provisional discards at end, and shrink the run.
136: // while (j > i && discardableMarkers[j - 1] == 2) {
137: // discardableMarkers[--j] = 0;
138: // provisional--;
139: // }
140: //
141: // // Now we have the length of a run of discardable lines
142: // // whose first and last are not provisional.
143: // length = j - i;
144: //
145: // // If 1/4 of the lines in the run are provisional,
146: // // cancel discarding of all provisional lines in the run.
147: // if (provisional * 4 > length) {
148: // while (j > i) {
149: // if (discardableMarkers[--j] == 2) {
150: // discardableMarkers[j] = 0;
151: // }
152: // }
153: // }
154: // else {
155: // int consec;
156: // int minimum = 1;
157: // int tem = length / 4;
158: //
159: // // MINIMUM is approximate square root of LENGTH/4.
160: // // A subrun of two or more provisionals can stand
161: // // when LENGTH is at least 16.
162: // // A subrun of 4 or more can stand when LENGTH >= 64.
163: // while ((tem >>= 2) > 0) {
164: // minimum *= 2;
165: // }
166: // minimum++;
167: //
168: // // Cancel any subrun of MINIMUM or more provisionals
169: // // within the larger run.
170: // for (j = 0, consec = 0; j < length; j++) {
171: // if (discardableMarkers[i + j] != 2) {
172: // consec = 0;
173: // }
174: // else if (minimum == ++consec) {
175: // // Back up to start of subrun, to cancel it all.
176: // j -= consec;
177: // }
178: // else if (minimum < consec) {
179: // discardableMarkers[i + j] = 0;
180: // }
181: // }
182: //
183: // // Scan from beginning of run
184: // // until we find 3 or more nonprovisionals in a row
185: // // or until the first nonprovisional at least 8 lines in.
186: // // Until that point, cancel any provisionals.
187: // for (j = 0, consec = 0; j < length; j++) {
188: // if (j >= 8 && discardableMarkers[i + j] == 1) {
189: // break;
190: // }
191: // if (discardableMarkers[i + j] == 2) {
192: // consec = 0;
193: // discardableMarkers[i + j] = 0;
194: // }
195: // else if (discardableMarkers[i + j] == 0) {
196: // consec = 0;
197: // }
198: // else {
199: // consec++;
200: // }
201: // if (consec == 3) {
202: // break;
203: // }
204: // }
205: //
206: // // I advances to the last line of the run.
207: // i += length - 1;
208: //
209: // // Same thing, from end.
210: // for (j = 0, consec = 0; j < length; j++) {
211: // if (j >= 8 && discardableMarkers[i - j] == 1) {
212: // break;
213: // }
214: // if (discardableMarkers[i - j] == 2) {
215: // consec = 0;
216: // discardableMarkers[i - j] = 0;
217: // }
218: // else if (discardableMarkers[i - j] == 0) {
219: // consec = 0;
220: // }
221: // else {
222: // consec++;
223: // }
224: // if (consec == 3) {
225: // break;
226: // }
227: // }
228: // }
229: // }
230: // }
231: // }
232: // --Commented out by Inspection STOP (17.06.04 15:17)
233: }
|