001: /*
002: * Original Code:
003: * Copyright (c) 2004, Ben Fortuna
004: * All rights reserved.
005: *
006: * Modified by Sergio Montoro Ten on November 2004 for use with hipergate.
007: *
008: * purge(int[]) method fix up by Heidi on September 2006
009: *
010: * Redistribution and use in source and binary forms, with or without
011: * modification, are permitted provided that the following conditions
012: * are met:
013: *
014: * o Redistributions of source code must retain the above copyright
015: * notice, this list of conditions and the following disclaimer.
016: *
017: * o Redistributions in binary form must reproduce the above copyright
018: * notice, this list of conditions and the following disclaimer in the
019: * documentation and/or other materials provided with the distribution.
020: *
021: * o Neither the name of Ben Fortuna nor the names of any other contributors
022: * may be used to endorse or promote products derived from this software
023: * without specific prior written permission.
024: *
025: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
026: * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
027: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
028: * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
029: * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
030: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
031: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
032: * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
033: * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
034: * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
035: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
036: */
037: package com.knowgate.hipermail;
038:
039: import java.io.ByteArrayInputStream;
040: import java.io.File;
041: import java.io.FileNotFoundException;
042: import java.io.FileOutputStream;
043: import java.io.IOException;
044: import java.io.InputStream;
045: import java.io.RandomAccessFile;
046:
047: import java.nio.ByteBuffer;
048: import java.nio.MappedByteBuffer;
049: import java.nio.CharBuffer;
050: import java.nio.channels.FileChannel;
051: import java.nio.channels.FileLock;
052: import java.nio.charset.Charset;
053: import java.nio.charset.CharsetDecoder;
054: import java.nio.charset.CharsetEncoder;
055: import java.nio.charset.CodingErrorAction;
056: import java.nio.charset.CoderResult;
057:
058: import java.text.DateFormat;
059: import java.text.SimpleDateFormat;
060:
061: import java.util.ArrayList;
062: import java.util.Date;
063: import java.util.Iterator;
064: import java.util.List;
065: import java.util.TimeZone;
066: import java.util.regex.Matcher;
067: import java.util.regex.Pattern;
068:
069: import com.knowgate.debug.DebugFile;
070:
071: /**
072: * Provides access to an mbox-formatted file.
073: * @author Ben Fortuna adapted to hipergate by Sergio Montoro Ten
074: * @version 3.0
075: */
076: public class MboxFile {
077:
078: public static final String READ_ONLY = "r";
079:
080: public static final String READ_WRITE = "rw";
081:
082: private static final String TEMP_FILE_EXTENSION = ".tmp";
083:
084: /**
085: * The prefix for all "From_" lines in an mbox file.
086: */
087: private static final String FROM__PREFIX = "From ";
088:
089: /**
090: * A pattern representing the format of the "From_" line
091: * for the first message in an mbox file.
092: */
093: private static final String INITIAL_FROM__PATTERN = FROM__PREFIX
094: + ".*";
095:
096: /**
097: * A pattern representing the format of all "From_" lines
098: * except for the first message in an mbox file.
099: */
100: private static final String FROM__PATTERN = "\n" + FROM__PREFIX;
101:
102: private static final String FROM__DATE_FORMAT = "EEE MMM d HH:mm:ss yyyy";
103:
104: private static DateFormat from_DateFormat = new SimpleDateFormat(
105: FROM__DATE_FORMAT);
106:
107: static {
108: from_DateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
109: }
110:
111: /**
112: * The default "From_" line used if a message doesn't already have one.
113: */
114: private static final String DEFAULT_FROM__LINE = FROM__PREFIX
115: + "- " + from_DateFormat.format(new Date(0)) + "\n";
116:
117: // Charset and decoder for ISO-8859-1
118: private static Charset charset = Charset.forName("ISO-8859-1");
119:
120: private static CharsetDecoder decoder = charset.newDecoder();
121:
122: private static CharsetEncoder encoder = charset.newEncoder();
123:
124: static {
125: encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
126: }
127:
128: private static DebugFile log = new DebugFile();
129:
130: /**
131: * Used primarily to provide information about
132: * the mbox file.
133: */
134: private File file;
135:
136: private String mode;
137:
138: /**
139: * Used to access the mbox file in a random manner.
140: */
141: private FileChannel channel;
142:
143: /**
144: * Used to grant exclusive access to the Mbox file to one thread at a time.
145: */
146: private FileLock lock;
147:
148: /**
149: * Tracks all message positions within the mbox file.
150: */
151: private long[] messagePositions;
152:
153: /**
154: * Constructor.
155: */
156: public MboxFile(File file) throws FileNotFoundException,
157: IOException {
158: this (file, READ_ONLY);
159: }
160:
161: /**
162: * Constructor.
163: * @param file
164: * @param mode Either MboxFile.READ_ONLY or MboxFile.READ_WRITE
165: */
166: public MboxFile(File file, String mode)
167: throws FileNotFoundException, IOException {
168: this .file = file;
169: this .mode = mode;
170: if (mode.equals(READ_WRITE))
171: lock = getChannel().lock();
172: }
173:
174: /**
175: * Constructor.
176: * @param filepath
177: * @param mode Either MboxFile.READ_ONLY or MboxFile.READ_WRITE
178: */
179: public MboxFile(String filepath) {
180: this .file = new File(filepath);
181: this .mode = READ_ONLY;
182: }
183:
184: /**
185: * Constructor.
186: * @param filepath
187: * @param mode Either MboxFile.READ_ONLY or MboxFile.READ_WRITE
188: */
189: public MboxFile(String filepath, String mode)
190: throws FileNotFoundException, IOException {
191: this .file = new File(filepath);
192: this .mode = mode;
193: if (mode.equals(READ_WRITE))
194: lock = getChannel().lock();
195: }
196:
197: /**
198: * Returns a channel for reading and writing to the mbox file.
199: * @return a file channel
200: * @throws FileNotFoundException
201: */
202: private FileChannel getChannel() throws FileNotFoundException {
203:
204: if (channel == null) {
205: channel = new RandomAccessFile(file, mode).getChannel();
206: }
207:
208: return channel;
209: }
210:
211: /**
212: * Return MBox file size in bytes
213: * @return long
214: */
215: public long size() throws IOException {
216: return channel.size();
217: }
218:
219: /**
220: * Returns an initialised array of file positions
221: * for all messages in the mbox file.
222: * @return a long array
223: * @throws IOException thrown when unable to read
224: * from the specified file channel
225: */
226: public long[] getMessagePositions() throws IOException {
227: if (messagePositions == null) {
228: final long length = getChannel().size();
229: log.debug("Channel size [" + String.valueOf(length)
230: + "] bytes");
231:
232: if (0 == length)
233: return new long[0];
234:
235: List posList = new ArrayList();
236:
237: final long FRAME = 32000;
238: final long STEPBACK = FROM__PATTERN.length() - 1;
239: long size = (length < FRAME ? length : FRAME);
240:
241: long offset = 0;
242: FileChannel chnnl = getChannel();
243:
244: // read mbox file to determine the message positions..
245: ByteBuffer buffer = chnnl.map(
246: FileChannel.MapMode.READ_ONLY, 0l, size);
247: CharBuffer cb = decoder.decode(buffer);
248:
249: // check that first message is correct..
250: if (Pattern.compile(INITIAL_FROM__PATTERN, Pattern.DOTALL)
251: .matcher(cb).matches()) {
252: // debugging..
253: log.debug("Matched first message...");
254:
255: posList.add(new Long(0));
256: }
257:
258: Pattern fromPattern = Pattern.compile(FROM__PATTERN);
259: Matcher matcher;
260:
261: do {
262: log.debug("scanning from " + String.valueOf(offset)
263: + " to " + String.valueOf(offset + size));
264: matcher = fromPattern.matcher(cb);
265: while (matcher.find()) {
266: log.debug("Found match at ["
267: + String.valueOf(offset + matcher.start())
268: + "]");
269:
270: // add one (1) to position to account for newline..
271: posList.add(new Long(offset + matcher.start() + 1));
272: } // wend
273:
274: if (size < FRAME)
275: break;
276:
277: offset += FRAME - STEPBACK;
278: size = (offset + FRAME < length) ? FRAME : length
279: - (offset + 1);
280:
281: buffer = chnnl.map(FileChannel.MapMode.READ_ONLY,
282: offset, size);
283: cb = decoder.decode(buffer);
284: } while (true);
285:
286: log.debug("found " + String.valueOf(posList.size())
287: + " matches");
288:
289: messagePositions = new long[posList.size()];
290:
291: int count = 0;
292:
293: for (Iterator i = posList.iterator(); i.hasNext(); count++) {
294: messagePositions[count] = ((Long) i.next()).longValue();
295: } // next
296: } // fi (messagePositions == null)
297: return messagePositions;
298: } // getMessagePositions
299:
300: /**
301: * <p>Get byte offset position of a given message inside the mbox file</p>
302: * This method is slow when called for the first time, as it has to parse
303: * the whole Mbox file for finding each message index.
304: * @param index Message Index
305: * @return message byte offset position inside the mbox file
306: * @throws IOException
307: * @throws ArrayIndexOutOfBoundsException
308: */
309: public long getMessagePosition(int index) throws IOException,
310: ArrayIndexOutOfBoundsException {
311: if (messagePositions == null)
312: getMessagePositions();
313: return messagePositions[index];
314: }
315:
316: /**
317: * Get size of a message in bytes
318: * @param index Message Index
319: * @throws IOException
320: * @throws ArrayIndexOutOfBoundsException
321: */
322: public int getMessageSize(int index) throws IOException,
323: ArrayIndexOutOfBoundsException {
324: long position = getMessagePosition(index);
325: long size;
326:
327: if (index < messagePositions.length - 1)
328: size = messagePositions[index + 1] - position;
329: else
330: size = getChannel().size() - position;
331:
332: return (int) size;
333: }
334:
335: /**
336: * Returns the total number of messages in the mbox file.
337: * @return an int
338: */
339: public int getMessageCount() throws IOException {
340: return getMessagePositions().length;
341: }
342:
343: /**
344: * Returns a CharSequence containing the data for
345: * the message at the specified index.
346: * @param index the index of the message to retrieve
347: * @return a CharSequence
348: */
349: public CharSequence getMessage(final int index) throws IOException {
350: long position = getMessagePosition(index);
351: long size;
352:
353: if (index < messagePositions.length - 1) {
354: size = messagePositions[index + 1] - position;
355: } else {
356: size = getChannel().size() - position;
357: }
358:
359: return decoder.decode(getChannel().map(
360: FileChannel.MapMode.READ_ONLY, position, size));
361: }
362:
363: /**
364: * Get message as stream
365: * @param begin long Byte offset position for message
366: * @param size int Number of bytes to be readed
367: * @return InputStream
368: * @throws IOException
369: */
370: public InputStream getMessageAsStream(final long begin,
371: final int size) throws IOException {
372:
373: log.debug("MboxFile.getMessageAsStream("
374: + String.valueOf(begin) + "," + String.valueOf(size)
375: + ")");
376:
377: // Skip From line
378: ByteBuffer byFrom = getChannel().map(
379: FileChannel.MapMode.READ_ONLY, begin, 128);
380: CharBuffer chFrom = decoder.decode(byFrom);
381:
382: int start = 0;
383: // Ignore any white spaces and line feed
384: char c = chFrom.charAt(start);
385: while (c == ' ' || c == '\r' || c == '\n' || c == '\t')
386: c = chFrom.charAt(++start);
387: // If first line does not start with message preffx then raise an exception
388: if (!chFrom.subSequence(start, start + FROM__PREFIX.length())
389: .toString().equals(FROM__PREFIX))
390: throw new IOException(
391: "MboxFile.getMessageAsStream() starting position "
392: + String.valueOf(start)
393: + " \""
394: + chFrom.subSequence(start,
395: start + FROM__PREFIX.length())
396: .toString()
397: + "\" does not match a begin message token \""
398: + FROM__PREFIX + "\"");
399: // Skip the From line
400: while (chFrom.charAt(start++) != (char) 10)
401: ;
402:
403: log.debug(" skip = " + String.valueOf(start));
404: log.debug(" start = " + String.valueOf(begin + start));
405:
406: MappedByteBuffer byBuffer = getChannel().map(
407: FileChannel.MapMode.READ_ONLY, begin + start, size);
408: byte[] byArray = new byte[size];
409: byBuffer.get(byArray);
410:
411: ByteArrayInputStream byStrm = new ByteArrayInputStream(byArray);
412:
413: return byStrm;
414: }
415:
416: // -------------------------------------------------------------------------
417:
418: public InputStream getPartAsStream(final long begin,
419: final long offset, final int size) throws IOException {
420: log.debug("MboxFile.getPartAsStream(" + String.valueOf(begin)
421: + "," + String.valueOf(offset) + ","
422: + String.valueOf(size) + ")");
423:
424: // Skip From line
425: ByteBuffer byFrom = getChannel().map(
426: FileChannel.MapMode.READ_ONLY, begin, 128);
427: CharBuffer chFrom = decoder.decode(byFrom);
428:
429: log.debug("from line decoded");
430:
431: int start = 0;
432: // Ignore any white spaces and line feed
433: char c = chFrom.charAt(start);
434: while (c == ' ' || c == '\r' || c == '\n' || c == '\t')
435: c = chFrom.charAt(++start);
436: // If first line does not start with message preffx then raise an exception
437: log.debug("first line is "
438: + chFrom.subSequence(start,
439: start + FROM__PREFIX.length()).toString());
440: if (!chFrom.subSequence(start, start + FROM__PREFIX.length())
441: .toString().equals(FROM__PREFIX))
442: throw new IOException(
443: "MboxFile.getPartAsStream() starting position "
444: + String.valueOf(start)
445: + " \""
446: + chFrom.subSequence(start,
447: start + FROM__PREFIX.length())
448: .toString()
449: + "\" does not match a begin message token \""
450: + FROM__PREFIX + "\"");
451: // Skip the From line
452: while (chFrom.charAt(start++) != (char) 10)
453: ;
454:
455: start += offset;
456:
457: log.debug(" skip = " + String.valueOf(start));
458: log.debug(" start = " + String.valueOf(start));
459:
460: MappedByteBuffer byBuffer = getChannel().map(
461: FileChannel.MapMode.READ_ONLY, begin + start, size);
462: byte[] byArray = new byte[size];
463: byBuffer.get(byArray);
464:
465: ByteArrayInputStream byStrm = new ByteArrayInputStream(byArray);
466:
467: return byStrm;
468: }
469:
470: /**
471: * Opens an input stream to the specified message
472: * data.
473: * @param index the index of the message to open
474: * a stream to
475: * @return an input stream
476: */
477: public InputStream getMessageAsStream(int index) throws IOException {
478: long position = getMessagePosition(index);
479: int size;
480:
481: log.debug("MboxFile.getMessageAsStream("
482: + String.valueOf(position) + ")");
483:
484: if (index < messagePositions.length - 1) {
485: size = (int) (messagePositions[index + 1] - position);
486: } else {
487: size = (int) (getChannel().size() - position);
488: }
489:
490: // Skip From line
491: ByteBuffer byFrom = getChannel().map(
492: FileChannel.MapMode.READ_ONLY, position, 256);
493: CharBuffer chFrom = decoder.decode(byFrom);
494:
495: int start = 0;
496: // Ignore any white spaces and line feed
497: char c = chFrom.charAt(start);
498: while (c == ' ' || c == '\r' || c == '\n' || c == '\t')
499: c = chFrom.charAt(++start);
500: // If first line does not start with message preffx then raise an exception
501: if (!chFrom.subSequence(start, start + FROM__PREFIX.length())
502: .toString().equals(FROM__PREFIX))
503: throw new IOException(
504: "MboxFile.getMessageAsStream() starting position "
505: + String.valueOf(start)
506: + " \""
507: + chFrom.subSequence(start,
508: start + FROM__PREFIX.length())
509: .toString()
510: + "\" does not match a begin message token \""
511: + FROM__PREFIX + "\"");
512: // Skip the From line
513: while (chFrom.charAt(start++) != (char) 10)
514: ;
515:
516: log.debug(" skip = " + String.valueOf(start));
517: log.debug(" start = " + String.valueOf(position + start));
518:
519: MappedByteBuffer byBuffer = getChannel().map(
520: FileChannel.MapMode.READ_ONLY, position + start,
521: size - start);
522: byte[] byArray = new byte[size - start];
523: byBuffer.get(byArray);
524:
525: ByteArrayInputStream byStrm = new ByteArrayInputStream(byArray);
526:
527: return byStrm;
528: }
529:
530: /**
531: * Appends the specified message from another mbox file
532: * @param source Source mbox file
533: * @param srcpos Byte offset position of message at source mbox file
534: * @param srcsize Size of source message in bytes
535: * @return byte offset position where message is appended on this mbox file
536: * @throws IOException
537: */
538: public final long appendMessage(MboxFile source, long srcpos,
539: int srcsize) throws IOException {
540:
541: long position = channel.size();
542:
543: // if not first message add required newlines..
544: if (position > 0) {
545: channel.write(encoder.encode(CharBuffer.wrap("\n\n")),
546: channel.size());
547: }
548: channel.write(encoder.encode(CharBuffer
549: .wrap(DEFAULT_FROM__LINE)), channel.size());
550:
551: channel.write(source.getChannel().map(
552: FileChannel.MapMode.READ_ONLY, srcpos, srcsize));
553:
554: return position;
555: }
556:
557: /**
558: * Appends the specified message from another mbox file
559: * @param source Source mbox file
560: * @param index Index of message to be appended at the source file
561: * @return byte offset position where message is appended on this mbox file
562: * @throws IOException
563: */
564: public final long appendMessage(MboxFile source, int index)
565: throws IOException {
566: long srcpos = source.getMessagePosition(index);
567: int srcsize;
568:
569: if (index < source.messagePositions.length - 1) {
570: srcsize = (int) (source.messagePositions[index + 1] - srcpos);
571: } else {
572: srcsize = (int) (source.getChannel().size() - srcpos);
573: }
574:
575: return appendMessage(source, srcpos, srcsize);
576: }
577:
578: /**
579: * Appends the specified message (represented by a CharSequence) to the
580: * mbox file.
581: * @param message
582: */
583: public final long appendMessage(final CharSequence message)
584: throws IOException {
585: return appendMessage(message, getChannel());
586: }
587:
588: /**
589: * Appends the specified message (represented by a CharSequence) to the specified channel.
590: * @param message
591: * @param channel
592: * @return long Byte position where message is appended
593: * @throws IOException
594: */
595: private long appendMessage(final CharSequence message,
596: FileChannel channel) throws IOException {
597: long position = channel.size();
598:
599: if (!hasFrom_Line(message)) {
600: // if not first message add required newlines..
601: if (position > 0) {
602: channel.write(encoder.encode(CharBuffer.wrap("\n\n")),
603: channel.size());
604: }
605: channel.write(encoder.encode(CharBuffer
606: .wrap(DEFAULT_FROM__LINE)), channel.size());
607: }
608:
609: channel.write(encoder.encode(CharBuffer.wrap(message)), channel
610: .size());
611:
612: return position;
613: }
614:
615: /**
616: * Purge the specified messages from the file.
617: * @param messageNumbers int[]
618: * @throws IOException
619: * @throws IllegalArgumentException
620: */
621: public void purge(int[] messageNumbers) throws IOException,
622: IllegalArgumentException {
623:
624: if (null == messageNumbers)
625: return;
626: if (0 == messageNumbers.length)
627: return;
628:
629: getMessagePositions();
630:
631: if (null == messagePositions)
632: return;
633: if (0 == messagePositions.length)
634: return;
635:
636: final int total = messagePositions.length;
637: final int count = messageNumbers.length;
638: int size;
639: long start, next, append;
640: boolean perform;
641: ByteBuffer messageBuffer = null;
642: byte[] byBuffer = null;
643:
644: log.debug("MboxFile.purge(" + String.valueOf(count) + " of "
645: + String.valueOf(total) + ")");
646:
647: getChannel();
648: long[] newPositions = null;
649: int newIndex = 0;
650:
651: newPositions = new long[total - count];
652:
653: append = 0;
654: for (int index = 0; index < total; index++) {
655:
656: perform = true;
657: for (int d = 0; d < count; d++)
658: if (messageNumbers[d] == index)
659: perform = false;
660:
661: start = messagePositions[index];
662: if (index < total - 1) {
663: next = messagePositions[index + 1];
664: size = (int) (next - messagePositions[index]);
665: } else {
666: next = -1l;
667: size = (int) (channel.size() - messagePositions[index]);
668: }
669:
670: if (perform) {
671: log.debug("FileChannel.map(MapMode.READ_WRITE,"
672: + String.valueOf(next) + ","
673: + String.valueOf(size) + ")");
674: newPositions[newIndex] = append;
675: newIndex++;
676:
677: if (start != append) {
678: messageBuffer = channel
679: .map(FileChannel.MapMode.READ_WRITE, start,
680: size);
681: if (byBuffer == null)
682: byBuffer = new byte[size];
683: else if (byBuffer.length < size)
684: byBuffer = new byte[size];
685: messageBuffer.get(byBuffer, 0, size);
686: channel.position(append);
687: channel.write(ByteBuffer.wrap(byBuffer));
688: messageBuffer.clear();
689: messageBuffer = null;
690: } // fi (-1!=next)
691: append += size;
692: }
693: } // next
694: log.debug("FileChannel.truncate(" + String.valueOf(append)
695: + ")");
696: messageBuffer = null;
697: try {
698: channel.truncate(append);
699: } catch (IOException e) {
700: log.debug("MBoxFile.purge() FileChannel.truncate() failed");
701: }
702:
703: messagePositions = null;
704: messagePositions = newPositions;
705: } // purge
706:
707: /**
708: * Close the mbox file and release any system resources.
709: * @throws IOException
710: */
711: public void close() throws IOException {
712: if (lock != null) {
713: lock.release();
714: lock = null;
715: }
716:
717: if (channel != null) {
718: channel.close();
719: channel = null;
720: }
721: }
722:
723: /**
724: * Indicates whether the specified CharSequence representation of
725: * a message contains a "From_" line.
726: * @param message a CharSequence representing a message
727: * @return true if a "From_" line is found, otherwise false
728: */
729: private boolean hasFrom_Line(CharSequence message) {
730: return Pattern.compile(FROM__PREFIX + ".*", Pattern.DOTALL)
731: .matcher(message).matches();
732: }
733: }
|