001: /* ReplayCharSequenceTest
002: *
003: * Created on Dec 26, 2006
004: *
005: * Copyright (C) 2006 Internet Archive.
006: *
007: * This file is part of the Heritrix web crawler (crawler.archive.org).
008: *
009: * Heritrix is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU Lesser Public License as published by
011: * the Free Software Foundation; either version 2.1 of the License, or
012: * any later version.
013: *
014: * Heritrix is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: * GNU Lesser Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser Public License
020: * along with Heritrix; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: */
023: package org.archive.io;
024:
025: import java.io.File;
026: import java.io.IOException;
027: import java.util.Date;
028: import java.util.logging.Logger;
029:
030: import org.archive.util.FileUtils;
031: import org.archive.util.TmpDirTestCase;
032:
033: /**
034: * Test ReplayCharSequences.
035: *
036: * @author stack, gojomo
037: * @version $Revision: 5099 $, $Date: 2007-04-27 02:14:08 +0000 (Fri, 27 Apr 2007) $
038: */
039: public class ReplayCharSequenceTest extends TmpDirTestCase {
040: /**
041: * Logger.
042: */
043: private static Logger logger = Logger
044: .getLogger("org.archive.io.ReplayCharSequenceFactoryTest");
045:
046: private static final int SEQUENCE_LENGTH = 127;
047: private static final int MULTIPLIER = 3;
048: private static final int BUFFER_SIZE = SEQUENCE_LENGTH * MULTIPLIER;
049: private static final int INCREMENT = 1;
050:
051: /**
052: * Buffer of regular content.
053: */
054: private byte[] regularBuffer = null;
055:
056: /*
057: * @see TestCase#setUp()
058: */
059: protected void setUp() throws Exception {
060: super .setUp();
061: this .regularBuffer = fillBufferWithRegularContent(new byte[BUFFER_SIZE]);
062: }
063:
064: public void testShiftjis() throws IOException {
065:
066: // Here's the bytes for the JIS encoding of the Japanese form of Nihongo
067: byte[] bytes_nihongo = { (byte) 0x1B, (byte) 0x24, (byte) 0x42,
068: (byte) 0x46, (byte) 0x7C, (byte) 0x4B, (byte) 0x5C,
069: (byte) 0x38, (byte) 0x6C, (byte) 0x1B, (byte) 0x28,
070: (byte) 0x42, (byte) 0x1B, (byte) 0x28, (byte) 0x42 };
071: final String ENCODING = "SJIS";
072: // Here is nihongo converted to JVM encoding.
073: String nihongo = new String(bytes_nihongo, ENCODING);
074:
075: RecordingOutputStream ros = writeTestStream(bytes_nihongo,
076: MULTIPLIER, "testShiftjis", MULTIPLIER);
077: // TODO: check for existence of overflow file?
078: ReplayCharSequence rcs = ros.getReplayCharSequence(ENCODING);
079:
080: // Now check that start of the rcs comes back in as nihongo string.
081: String rcsStr = rcs.subSequence(0, nihongo.length()).toString();
082: assertTrue("Nihongo " + nihongo
083: + " does not equal converted string" + " from rcs "
084: + rcsStr, nihongo.equals(rcsStr));
085: // And assert next string is also properly nihongo.
086: if (rcs.length() >= (nihongo.length() * 2)) {
087: rcsStr = rcs.subSequence(nihongo.length(),
088: nihongo.length() + nihongo.length()).toString();
089: assertTrue("Nihongo " + nihongo
090: + " does not equal converted "
091: + " string from rcs (2nd time)" + rcsStr, nihongo
092: .equals(rcsStr));
093: }
094: }
095:
096: public void testGetReplayCharSequenceByteZeroOffset()
097: throws IOException {
098:
099: RecordingOutputStream ros = writeTestStream(regularBuffer,
100: MULTIPLIER, "testGetReplayCharSequenceByteZeroOffset",
101: MULTIPLIER);
102: ReplayCharSequence rcs = ros.getReplayCharSequence();
103:
104: for (int i = 0; i < MULTIPLIER; i++) {
105: accessingCharacters(rcs);
106: }
107: }
108:
109: public void testGetReplayCharSequenceByteOffset()
110: throws IOException {
111:
112: RecordingOutputStream ros = writeTestStream(regularBuffer,
113: MULTIPLIER, "testGetReplayCharSequenceByteOffset",
114: MULTIPLIER);
115: ReplayCharSequence rcs = ros.getReplayCharSequence(null,
116: SEQUENCE_LENGTH);
117:
118: for (int i = 0; i < MULTIPLIER; i++) {
119: accessingCharacters(rcs);
120: }
121: }
122:
123: public void testGetReplayCharSequenceMultiByteZeroOffset()
124: throws IOException {
125:
126: RecordingOutputStream ros = writeTestStream(regularBuffer,
127: MULTIPLIER,
128: "testGetReplayCharSequenceMultiByteZeroOffset",
129: MULTIPLIER);
130: ReplayCharSequence rcs = ros.getReplayCharSequence("UTF-8");
131:
132: for (int i = 0; i < MULTIPLIER; i++) {
133: accessingCharacters(rcs);
134: }
135: }
136:
137: public void testGetReplayCharSequenceMultiByteOffset()
138: throws IOException {
139:
140: RecordingOutputStream ros = writeTestStream(regularBuffer,
141: MULTIPLIER, "testGetReplayCharSequenceMultiByteOffset",
142: MULTIPLIER);
143: ReplayCharSequence rcs = ros.getReplayCharSequence("UTF-8",
144: SEQUENCE_LENGTH);
145:
146: try {
147: for (int i = 0; i < MULTIPLIER; i++) {
148: accessingCharacters(rcs);
149: }
150: } finally {
151: rcs.close();
152: }
153: }
154:
155: public void testReplayCharSequenceByteToString() throws IOException {
156: String fileContent = "Some file content";
157: byte[] buffer = fileContent.getBytes();
158: RecordingOutputStream ros = writeTestStream(buffer, 1,
159: "testReplayCharSequenceByteToString.txt", 0);
160: ReplayCharSequence rcs = ros.getReplayCharSequence();
161: String result = rcs.toString();
162: assertEquals("Strings don't match", result, fileContent);
163: }
164:
165: public void testReplayCharSequenceByteToStringOverflow()
166: throws IOException {
167: String fileContent = "Some file content. ";
168: byte[] buffer = fileContent.getBytes();
169: RecordingOutputStream ros = writeTestStream(buffer, 1,
170: "testReplayCharSequenceByteToString.txt", 1);
171: String expectedContent = fileContent + fileContent;
172: ReplayCharSequence rcs = ros.getReplayCharSequence();
173: String result = rcs.toString();
174: assertEquals("Strings don't match", expectedContent, result);
175: }
176:
177: public void testReplayCharSequenceByteToStringMulti()
178: throws IOException {
179: String fileContent = "Some file content";
180: byte[] buffer = fileContent.getBytes("UTF-8");
181: final int MULTIPLICAND = 10;
182: StringBuilder sb = new StringBuilder(MULTIPLICAND
183: * fileContent.length());
184: for (int i = 0; i < MULTIPLICAND; i++) {
185: sb.append(fileContent);
186: }
187: String expectedResult = sb.toString();
188: RecordingOutputStream ros = writeTestStream(buffer, 1,
189: "testReplayCharSequenceByteToStringMulti.txt",
190: MULTIPLICAND - 1);
191: for (int i = 0; i < 3; i++) {
192: ReplayCharSequence rcs = ros.getReplayCharSequence("UTF-8");
193: String result = rcs.toString();
194: assertEquals("Strings don't match", result, expectedResult);
195: rcs.close();
196: }
197: }
198:
199: /**
200: * Accessing characters test.
201: *
202: * Checks that characters in the rcs are in sequence.
203: *
204: * @param rcs The ReplayCharSequence to try out.
205: */
206: private void accessingCharacters(CharSequence rcs) {
207: long timestamp = (new Date()).getTime();
208: int seeks = 0;
209: for (int i = (INCREMENT * 2); (i + INCREMENT) < rcs.length(); i += INCREMENT) {
210: checkCharacter(rcs, i);
211: seeks++;
212: for (int j = i - INCREMENT; j < i; j++) {
213: checkCharacter(rcs, j);
214: seeks++;
215: }
216: }
217: // Note that printing out below breaks cruisecontrols drawing
218: // of the xml unit test results because it outputs disallowed
219: // xml characters.
220: logger.fine(rcs + " seeks count " + seeks + " in "
221: + ((new Date().getTime()) - timestamp)
222: + " milliseconds.");
223: }
224:
225: /**
226: * Check the character read.
227: *
228: * Throws assertion if not expected result.
229: *
230: * @param rcs ReplayCharSequence to read from.
231: * @param i Character offset.
232: */
233: private void checkCharacter(CharSequence rcs, int i) {
234: int c = rcs.charAt(i);
235: assertTrue("Character " + Integer.toString(c) + " at offset "
236: + i + " unexpected.",
237: (c % SEQUENCE_LENGTH) == (i % SEQUENCE_LENGTH));
238: }
239:
240: /**
241: * @param baseName
242: * @return RecordingOutputStream
243: * @throws IOException
244: */
245: private RecordingOutputStream writeTestStream(byte[] content,
246: int memReps, String baseName, int fileReps)
247: throws IOException {
248: String backingFilename = FileUtils.maybeRelative(getTmpDir(),
249: baseName).getAbsolutePath();
250: RecordingOutputStream ros = new RecordingOutputStream(
251: content.length * memReps, backingFilename);
252: ros.open();
253: for (int i = 0; i < (memReps + fileReps); i++) {
254: // fill buffer (repeat MULTIPLIER times) and
255: // overflow to disk (also MULTIPLIER times)
256: ros.write(content);
257: }
258: ros.close();
259: return ros;
260: }
261:
262: /**
263: * Fill a buffer w/ regular progression of single-byte
264: * (and <= 127) characters.
265: * @param buffer Buffer to fill.
266: * @return The buffer we filled.
267: */
268: private byte[] fillBufferWithRegularContent(byte[] buffer) {
269: int index = 0;
270: for (int i = 0; i < buffer.length; i++) {
271: buffer[i] = (byte) (index & 0x00ff);
272: index++;
273: if (index >= SEQUENCE_LENGTH) {
274: // Reset the index.
275: index = 0;
276: }
277: }
278: return buffer;
279: }
280:
281: public void testCheckParameters() {
282: // TODO.
283: }
284: }
|