001: package de.regnis.q.sequence.line;
002:
003: import java.io.IOException;
004: import java.io.InputStream;
005: import java.util.List;
006:
007: import de.regnis.q.sequence.QSequenceDifference;
008: import de.regnis.q.sequence.QSequenceDifferenceBlockShifter;
009: import de.regnis.q.sequence.line.simplifier.*;
010: import de.regnis.q.sequence.core.QSequenceAssert;
011: import de.regnis.q.sequence.core.QSequenceDummyCanceller;
012: import de.regnis.q.sequence.core.QSequenceException;
013: import de.regnis.q.sequence.media.QSequenceCachableMedia;
014: import de.regnis.q.sequence.media.QSequenceCachingMedia;
015: import de.regnis.q.sequence.media.QSequenceDiscardingMedia;
016: import de.regnis.q.sequence.media.QSequenceDiscardingMediaNoConfusionDectector;
017: import de.regnis.q.sequence.media.QSequenceMediaComparer;
018: import de.regnis.q.sequence.media.QSequenceMediaDummyIndexTransformer;
019:
020: /**
021: * @author Marc Strapetz
022: */
023: public final class QSequenceLineMedia implements
024: QSequenceCachableMedia, QSequenceMediaComparer {
025:
026: // Constants ==============================================================
027:
028: public static final int FILE_SEGMENT_SIZE = 16384;
029: public static final int SEGMENT_ENTRY_SIZE = 16;
030: public static final int MEMORY_THRESHOLD;
031: public static final double SEARCH_DEPTH_EXPONENT;
032:
033: static {
034: MEMORY_THRESHOLD = parseMemoryTreshold(System.getProperty(
035: "q.sequence.memory-threshold", "1M"));
036: }
037:
038: static {
039: if (System.getProperty("q.sequence.search-depth-exponent") != null) {
040: SEARCH_DEPTH_EXPONENT = Math
041: .max(
042: 0.1,
043: Math
044: .min(
045: 1.0,
046: Double
047: .parseDouble(System
048: .getProperty("q.sequence.search-depth-exponent"))));
049: } else {
050: SEARCH_DEPTH_EXPONENT = .5;
051: }
052: }
053:
054: // Static =================================================================
055:
056: public static QSequenceLineCache readLines(QSequenceLineRAData data)
057: throws IOException {
058: if (data.length() <= MEMORY_THRESHOLD) {
059: final InputStream stream = data.read(0, data.length());
060: try {
061: return QSequenceLineMemoryCache.read(stream,
062: new QSequenceLineDummySimplifier());
063: } finally {
064: stream.close();
065: }
066: }
067:
068: return QSequenceLineFileSystemCache.create(data,
069: new QSequenceLineSystemTempDirectoryFactory(),
070: MEMORY_THRESHOLD, FILE_SEGMENT_SIZE,
071: new QSequenceLineDummySimplifier());
072: }
073:
074: public static QSequenceLineResult createBlocks(
075: QSequenceLineRAData leftData, QSequenceLineRAData rightData)
076: throws IOException, QSequenceException {
077: return createBlocks(leftData, rightData,
078: new QSequenceLineDummySimplifier());
079: }
080:
081: public static QSequenceLineResult createBlocks(
082: QSequenceLineRAData leftData,
083: QSequenceLineRAData rightData,
084: QSequenceLineSimplifier simplifier) throws IOException,
085: QSequenceException {
086: return createBlocks(leftData, rightData, MEMORY_THRESHOLD,
087: FILE_SEGMENT_SIZE, SEARCH_DEPTH_EXPONENT,
088: new QSequenceLineSystemTempDirectoryFactory(),
089: simplifier);
090: }
091:
092: public static QSequenceLineResult createBlocks(
093: QSequenceLineRAData leftData,
094: QSequenceLineRAData rightData, int memoryThreshold,
095: int fileSegmentSize, double searchDepthExponent,
096: QSequenceLineTempDirectoryFactory tempDirectoryFactory,
097: QSequenceLineSimplifier simplifier) throws IOException,
098: QSequenceException {
099: if (leftData.length() <= memoryThreshold
100: && rightData.length() <= memoryThreshold) {
101: final InputStream leftStream = leftData.read(0, leftData
102: .length());
103: final InputStream rightStream = rightData.read(0, rightData
104: .length());
105: try {
106: return createBlocksInMemory(leftStream, rightStream,
107: searchDepthExponent, simplifier);
108: } finally {
109: leftStream.close();
110: rightStream.close();
111: }
112: }
113:
114: return createBlocksInFilesystem(leftData, rightData,
115: tempDirectoryFactory, searchDepthExponent,
116: memoryThreshold, fileSegmentSize, simplifier);
117: }
118:
119: static QSequenceLineResult createBlocksInMemory(
120: InputStream leftStream, InputStream rightStream,
121: double searchDepthExponent,
122: QSequenceLineSimplifier simplifier) throws IOException,
123: QSequenceException {
124: final QSequenceLineMemoryCache leftCache = QSequenceLineMemoryCache
125: .read(leftStream, simplifier);
126: final QSequenceLineMemoryCache rightCache = QSequenceLineMemoryCache
127: .read(rightStream, simplifier);
128: final QSequenceLineMedia lineMedia = new QSequenceLineMedia(
129: leftCache, rightCache);
130: final QSequenceCachingMedia cachingMedia = new QSequenceCachingMedia(
131: lineMedia, new QSequenceDummyCanceller());
132: final QSequenceDiscardingMedia discardingMedia = new QSequenceDiscardingMedia(
133: cachingMedia,
134: new QSequenceDiscardingMediaNoConfusionDectector(true),
135: new QSequenceDummyCanceller());
136: final List blocks = new QSequenceDifference(discardingMedia,
137: discardingMedia, getSearchDepth(lineMedia,
138: searchDepthExponent)).getBlocks();
139: new QSequenceDifferenceBlockShifter(cachingMedia, cachingMedia)
140: .shiftBlocks(blocks);
141: return new QSequenceLineResult(blocks, leftCache, rightCache);
142: }
143:
144: static QSequenceLineResult createBlocksInFilesystem(
145: QSequenceLineRAData leftData,
146: QSequenceLineRAData rightData,
147: QSequenceLineTempDirectoryFactory tempDirectoryFactory,
148: double searchDepthExponent, int memoryThreshold,
149: int fileSegmentSize, QSequenceLineSimplifier simplifier)
150: throws IOException, QSequenceException {
151: final QSequenceLineFileSystemCache leftCache = QSequenceLineFileSystemCache
152: .create(leftData, tempDirectoryFactory,
153: memoryThreshold, fileSegmentSize, simplifier);
154: final QSequenceLineFileSystemCache rightCache = QSequenceLineFileSystemCache
155: .create(rightData, tempDirectoryFactory,
156: memoryThreshold, fileSegmentSize, simplifier);
157: final QSequenceLineMedia lineMedia = new QSequenceLineMedia(
158: leftCache, rightCache);
159: final List blocks = new QSequenceDifference(lineMedia,
160: new QSequenceMediaDummyIndexTransformer(lineMedia),
161: getSearchDepth(lineMedia, searchDepthExponent))
162: .getBlocks();
163: new QSequenceDifferenceBlockShifter(lineMedia, lineMedia)
164: .shiftBlocks(blocks);
165: return new QSequenceLineResult(blocks, leftCache, rightCache);
166: }
167:
168: // Fields =================================================================
169:
170: private final QSequenceLineCache leftCache;
171: private final QSequenceLineCache rightCache;
172:
173: // Setup ==================================================================
174:
175: public QSequenceLineMedia(QSequenceLineCache leftCache,
176: QSequenceLineCache rightCache) {
177: this .leftCache = leftCache;
178: this .rightCache = rightCache;
179: }
180:
181: // Implemented ============================================================
182:
183: public int getLeftLength() {
184: return leftCache.getLineCount();
185: }
186:
187: public int getRightLength() {
188: return rightCache.getLineCount();
189: }
190:
191: public Object getMediaLeftObject(int index)
192: throws QSequenceException {
193: try {
194: return leftCache.getLine(index);
195: } catch (IOException ex) {
196: throw new QSequenceException(ex);
197: }
198: }
199:
200: public Object getMediaRightObject(int index)
201: throws QSequenceException {
202: try {
203: return rightCache.getLine(index);
204: } catch (IOException ex) {
205: throw new QSequenceException(ex);
206: }
207: }
208:
209: public boolean equals(int leftIndex, int rightIndex)
210: throws QSequenceException {
211: try {
212: final int leftHash = leftCache.getLineHash(leftIndex);
213: final int rightHash = rightCache.getLineHash(rightIndex);
214: if (leftHash != 0 && rightHash != 0
215: && leftHash != rightHash) {
216: return false;
217: }
218:
219: return leftCache.getLine(leftIndex).equals(
220: rightCache.getLine(rightIndex));
221: } catch (IOException ex) {
222: throw new QSequenceException(ex);
223: }
224: }
225:
226: public boolean equalsLeft(int left1, int left2)
227: throws QSequenceException {
228: try {
229: return leftCache.getLine(left1).equals(
230: leftCache.getLine(left2));
231: } catch (IOException ex) {
232: throw new QSequenceException(ex);
233: }
234: }
235:
236: public boolean equalsRight(int right1, int right2)
237: throws QSequenceException {
238: try {
239: return rightCache.getLine(right1).equals(
240: rightCache.getLine(right2));
241: } catch (IOException ex) {
242: throw new QSequenceException(ex);
243: }
244: }
245:
246: // Utils ==================================================================
247:
248: private static int getSearchDepth(QSequenceLineMedia lineMedia,
249: double searchDepthExponent) {
250: QSequenceAssert.assertTrue(searchDepthExponent >= 0.0
251: && searchDepthExponent <= 1.0);
252:
253: if (searchDepthExponent == 1.0) {
254: return Integer.MAX_VALUE;
255: }
256:
257: return Math.max(256, (int) Math.pow(lineMedia.getLeftLength()
258: + lineMedia.getRightLength(), searchDepthExponent));
259: }
260:
261: private static int parseMemoryTreshold(String value) {
262: if (value == null) {
263: value = "1M";
264: }
265: value = value.toLowerCase();
266: int factor = 1;
267: if (value.endsWith("m")) {
268: value = value.substring(0, value.length() - 1);
269: factor = 1048576;
270: } else if (value.endsWith("mb")) {
271: value = value.substring(0, value.length() - 2);
272: factor = 1048576;
273: } else if (value.endsWith("k")) {
274: value = value.substring(0, value.length() - 1);
275: factor = 1024;
276: } else if (value.endsWith("kb")) {
277: value = value.substring(0, value.length() - 2);
278: factor = 1024;
279: }
280: try {
281: int amount = Integer.parseInt(value);
282: amount = factor * amount;
283: if (amount < FILE_SEGMENT_SIZE) {
284: amount = FILE_SEGMENT_SIZE;
285: }
286: return amount;
287: } catch (NumberFormatException e) {
288: return parseMemoryTreshold(null);
289: }
290: }
291: }
|