001: /*-
002: * See the file LICENSE for redistribution information.
003: *
004: * Copyright (c) 2002,2008 Oracle. All rights reserved.
005: *
006: * $Id: DbScavenger.java,v 1.16.2.3 2008/01/07 15:14:18 cwl Exp $
007: */
008:
009: package com.sleepycat.je.utilint;
010:
011: import java.io.File;
012: import java.io.FileOutputStream;
013: import java.io.IOException;
014: import java.io.PrintStream;
015: import java.util.Date;
016: import java.util.HashMap;
017: import java.util.Iterator;
018: import java.util.Map;
019:
020: import com.sleepycat.je.DatabaseException;
021: import com.sleepycat.je.DbInternal;
022: import com.sleepycat.je.Environment;
023: import com.sleepycat.je.config.EnvironmentParams;
024: import com.sleepycat.je.dbi.DatabaseImpl;
025: import com.sleepycat.je.dbi.DbConfigManager;
026: import com.sleepycat.je.dbi.EnvironmentImpl;
027: import com.sleepycat.je.log.FileManager;
028: import com.sleepycat.je.log.LastFileReader;
029: import com.sleepycat.je.log.LogEntryType;
030: import com.sleepycat.je.log.ScavengerFileReader;
031: import com.sleepycat.je.log.entry.LNLogEntry;
032: import com.sleepycat.je.log.entry.LogEntry;
033: import com.sleepycat.je.tree.LN;
034: import com.sleepycat.je.tree.MapLN;
035: import com.sleepycat.je.tree.NameLN;
036: import com.sleepycat.je.util.DbDump;
037:
038: public class DbScavenger extends DbDump {
039: private static final int FLUSH_INTERVAL = 100;
040: private int readBufferSize;
041: private EnvironmentImpl envImpl;
042:
043: /*
044: * Set of committed txn ids that have been seen so far.
045: */
046: private BitMap committedTxnIdsSeen;
047:
048: /*
049: * Set of LN Node Ids that have been seen so far.
050: */
051: private BitMap nodeIdsSeen;
052:
053: /*
054: * Map of database id to database names.
055: */
056: private Map dbIdToName;
057:
058: /*
059: * Map of database id to Boolean (dupSort).
060: */
061: private Map dbIdToDupSort;
062:
063: /*
064: * Map of database id to the .dump file output stream for that database.
065: */
066: private Map dbIdToOutputStream;
067:
068: private boolean dumpCorruptedBounds = false;
069:
070: private int flushCounter = 0;
071: private long lastTime;
072:
073: public DbScavenger(Environment env, PrintStream outputFile,
074: String outputDirectory, boolean formatUsingPrintable,
075: boolean doAggressiveScavengerRun, boolean verbose) {
076: super (env, null, outputFile, outputDirectory,
077: formatUsingPrintable);
078:
079: this .doAggressiveScavengerRun = doAggressiveScavengerRun;
080: this .dbIdToName = new HashMap();
081: this .dbIdToDupSort = new HashMap();
082: this .dbIdToOutputStream = new HashMap();
083: this .verbose = verbose;
084: }
085:
086: /**
087: * Set to true if corrupted boundaries should be dumped out.
088: */
089: public void setDumpCorruptedBounds(boolean dumpCorruptedBounds) {
090: this .dumpCorruptedBounds = dumpCorruptedBounds;
091: }
092:
093: public void dump() throws IOException, DatabaseException {
094:
095: openEnv(false);
096:
097: envImpl = DbInternal.envGetEnvironmentImpl(env);
098: DbConfigManager cm = envImpl.getConfigManager();
099: try {
100: readBufferSize = cm
101: .getInt(EnvironmentParams.LOG_ITERATOR_READ_SIZE);
102: } catch (DatabaseException DBE) {
103: readBufferSize = 8192;
104: }
105:
106: /*
107: * Find the end of the log.
108: */
109: LastFileReader reader = new LastFileReader(envImpl,
110: readBufferSize);
111: while (reader.readNextEntry()) {
112: }
113:
114: /* Tell the fileManager where the end of the log is. */
115: long lastUsedLsn = reader.getLastValidLsn();
116: long nextAvailableLsn = reader.getEndOfLog();
117: envImpl.getFileManager().setLastPosition(nextAvailableLsn,
118: lastUsedLsn, reader.getPrevOffset());
119:
120: try {
121: /* Pass 1: Scavenge the dbtree. */
122: if (verbose) {
123: System.out.println("Pass 1: " + new Date());
124: }
125: scavengeDbTree(lastUsedLsn, nextAvailableLsn);
126:
127: /* Pass 2: Scavenge the databases. */
128: if (verbose) {
129: System.out.println("Pass 2: " + new Date());
130: }
131: scavenge(lastUsedLsn, nextAvailableLsn);
132:
133: if (verbose) {
134: System.out.println("End: " + new Date());
135: }
136: } finally {
137: closeOutputStreams();
138: }
139: }
140:
141: /*
142: * Scan the log looking for records that are relevant for scavenging the db
143: * tree.
144: */
145: private void scavengeDbTree(long lastUsedLsn, long nextAvailableLsn)
146: throws IOException, DatabaseException {
147:
148: committedTxnIdsSeen = new BitMap();
149: nodeIdsSeen = new BitMap();
150:
151: final ScavengerFileReader scavengerReader = new ScavengerFileReader(
152: envImpl, readBufferSize, lastUsedLsn, DbLsn.NULL_LSN,
153: nextAvailableLsn) {
154: protected void processEntryCallback(LogEntry entry,
155: LogEntryType entryType) throws DatabaseException {
156:
157: processDbTreeEntry(entry, entryType);
158: }
159: };
160:
161: scavengerReader
162: .setTargetType(LogEntryType.LOG_MAPLN_TRANSACTIONAL);
163: scavengerReader.setTargetType(LogEntryType.LOG_MAPLN);
164: scavengerReader
165: .setTargetType(LogEntryType.LOG_NAMELN_TRANSACTIONAL);
166: scavengerReader.setTargetType(LogEntryType.LOG_NAMELN);
167: scavengerReader.setTargetType(LogEntryType.LOG_TXN_COMMIT);
168: scavengerReader.setTargetType(LogEntryType.LOG_TXN_ABORT);
169: lastTime = System.currentTimeMillis();
170: long fileNum = -1;
171: while (scavengerReader.readNextEntry()) {
172: fileNum = reportProgress(fileNum, scavengerReader
173: .getLastLsn());
174: }
175: }
176:
177: private long reportProgress(long fileNum, long lastLsn) {
178:
179: long currentFile = DbLsn.getFileNumber(lastLsn);
180: if (verbose) {
181: if (currentFile != fileNum) {
182: long now = System.currentTimeMillis();
183: System.out.println("processing file "
184: + FileManager
185: .getFileName(currentFile, ".jdb ")
186: + (now - lastTime) + " ms");
187: lastTime = now;
188: }
189: }
190:
191: return currentFile;
192: }
193:
194: /*
195: * Look at an entry and determine if it should be processed for scavenging.
196: */
197: private boolean checkProcessEntry(LogEntry entry,
198: LogEntryType entryType, boolean pass2) {
199: boolean isTransactional = entryType.isTransactional();
200:
201: /*
202: * If entry is txnal...
203: * if a commit record, add to committed txn id set
204: * if an abort record, ignore it and don't process.
205: * if an LN, check if it's in the committed txn id set.
206: * If it is, continue processing, otherwise ignore it.
207: */
208: if (isTransactional) {
209: long txnId = entry.getTransactionId();
210: if (entryType.equals(LogEntryType.LOG_TXN_COMMIT)) {
211: committedTxnIdsSeen.set(txnId);
212: /* No need to process this entry further. */
213: return false;
214: }
215:
216: if (entryType.equals(LogEntryType.LOG_TXN_ABORT)) {
217: /* No need to process this entry further. */
218: return false;
219: }
220:
221: if (!committedTxnIdsSeen.get(txnId)) {
222: return false;
223: }
224: }
225:
226: /*
227: * Check the nodeid to see if we've already seen it or not.
228: */
229: if (entry instanceof LNLogEntry) {
230: LNLogEntry lnEntry = (LNLogEntry) entry;
231: LN ln = lnEntry.getLN();
232: long nodeId = ln.getNodeId();
233: boolean isDelDupLN = entryType
234: .equals(LogEntryType.LOG_DEL_DUPLN_TRANSACTIONAL)
235: || entryType.equals(LogEntryType.LOG_DEL_DUPLN);
236:
237: /*
238: * If aggressive, don't worry about whether this node has been
239: * dumped already.
240: */
241: if (pass2 && doAggressiveScavengerRun) {
242: return !isDelDupLN;
243: }
244: if (nodeIdsSeen.get(nodeId)) {
245: return false;
246: } else {
247: nodeIdsSeen.set(nodeId);
248: if (isDelDupLN) {
249:
250: /*
251: * For deleted LN's, note the NodeId has having been
252: * processed, but, don't output them.
253: */
254: return false;
255: } else {
256: return true;
257: }
258: }
259: }
260:
261: return false;
262: }
263:
264: /*
265: * Called once for each log entry during the pass 1 (scavenging the dbtree.
266: */
267: private void processDbTreeEntry(LogEntry entry,
268: LogEntryType entryType) throws DatabaseException {
269:
270: boolean processThisEntry = checkProcessEntry(entry, entryType,
271: false);
272:
273: if (processThisEntry && (entry instanceof LNLogEntry)) {
274: LNLogEntry lnEntry = (LNLogEntry) entry;
275: LN ln = lnEntry.getLN();
276: if (ln instanceof NameLN) {
277: String name = new String(lnEntry.getKey());
278: Integer dbId = new Integer(((NameLN) ln).getId()
279: .getId());
280: if (dbIdToName.containsKey(dbId)
281: && !((String) dbIdToName.get(dbId))
282: .equals(name)) {
283: throw new DatabaseException(
284: "Already name mapped for dbId: " + dbId
285: + " changed from "
286: + (String) dbIdToName.get(dbId)
287: + " to " + name);
288: } else {
289: dbIdToName.put(dbId, name);
290: }
291: }
292:
293: if (ln instanceof MapLN) {
294: DatabaseImpl db = ((MapLN) ln).getDatabase();
295: Integer dbId = new Integer(db.getId().getId());
296: Boolean dupSort = Boolean.valueOf(db
297: .getSortedDuplicates());
298: if (dbIdToDupSort.containsKey(dbId)) {
299: throw new DatabaseException(
300: "Already saw dupSort entry for dbId: "
301: + dbId);
302: } else {
303: dbIdToDupSort.put(dbId, dupSort);
304: }
305: }
306: }
307: }
308:
309: /*
310: * Pass 2: scavenge the regular (non-dbtree) environment.
311: */
312: private void scavenge(long lastUsedLsn, long nextAvailableLsn)
313: throws IOException, DatabaseException {
314:
315: final ScavengerFileReader scavengerReader = new ScavengerFileReader(
316: envImpl, readBufferSize, lastUsedLsn, DbLsn.NULL_LSN,
317: nextAvailableLsn) {
318: protected void processEntryCallback(LogEntry entry,
319: LogEntryType entryType) throws DatabaseException {
320:
321: processRegularEntry(entry, entryType);
322: }
323: };
324:
325: /*
326: * Note: committed transaction id map has been created already, no
327: * need to read TXN_COMMITS on this pass.
328: */
329: scavengerReader
330: .setTargetType(LogEntryType.LOG_LN_TRANSACTIONAL);
331: scavengerReader.setTargetType(LogEntryType.LOG_LN);
332: scavengerReader
333: .setTargetType(LogEntryType.LOG_DEL_DUPLN_TRANSACTIONAL);
334: scavengerReader.setTargetType(LogEntryType.LOG_DEL_DUPLN);
335: scavengerReader.setDumpCorruptedBounds(dumpCorruptedBounds);
336:
337: long progressFileNum = -1;
338: while (scavengerReader.readNextEntry()) {
339: progressFileNum = reportProgress(progressFileNum,
340: scavengerReader.getLastLsn());
341: }
342: }
343:
344: /*
345: * Process an entry during pass 2.
346: */
347: private void processRegularEntry(LogEntry entry,
348: LogEntryType entryType) throws DatabaseException {
349:
350: boolean processThisEntry = checkProcessEntry(entry, entryType,
351: true);
352:
353: if (processThisEntry) {
354: LNLogEntry lnEntry = (LNLogEntry) entry;
355: Integer dbId = new Integer(lnEntry.getDbId().getId());
356: PrintStream out = getOutputStream(dbId);
357:
358: LN ln = lnEntry.getLN();
359: byte[] keyData = lnEntry.getKey();
360: byte[] data = ln.getData();
361: if (data != null) {
362: dumpOne(out, keyData, formatUsingPrintable);
363: dumpOne(out, data, formatUsingPrintable);
364: if ((++flushCounter % FLUSH_INTERVAL) == 0) {
365: out.flush();
366: flushCounter = 0;
367: }
368: }
369: }
370: }
371:
372: /*
373: * Return the output stream for the .dump file for database with id dbId.
374: * If an output stream has not already been created, then create one.
375: */
376: private PrintStream getOutputStream(Integer dbId)
377: throws DatabaseException {
378:
379: try {
380: PrintStream ret = (PrintStream) dbIdToOutputStream
381: .get(dbId);
382: if (ret != null) {
383: return ret;
384: }
385: String name = (String) dbIdToName.get(dbId);
386: if (name == null) {
387: name = "db" + dbId;
388: }
389: File file = new File(outputDirectory, name + ".dump");
390: ret = new PrintStream(new FileOutputStream(file), false);
391: dbIdToOutputStream.put(dbId, ret);
392: Boolean dupSort = (Boolean) dbIdToDupSort.get(dbId);
393: if (dupSort == null) {
394: dupSort = Boolean.valueOf(false);
395: }
396: printHeader(ret, dupSort.booleanValue(),
397: formatUsingPrintable);
398: return ret;
399: } catch (IOException IOE) {
400: throw new DatabaseException(IOE);
401: }
402: }
403:
404: private void closeOutputStreams() {
405:
406: Iterator iter = dbIdToOutputStream.values().iterator();
407: while (iter.hasNext()) {
408: PrintStream s = (PrintStream) iter.next();
409: s.println("DATA=END");
410: s.close();
411: }
412: }
413: }
|