001: /*
002: * Utils.java
003: *
004: * Version: $Revision: 2074 $
005: *
006: * Date: $Date: 2007-07-19 14:40:11 -0500 (Thu, 19 Jul 2007) $
007: *
008: * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
009: * Institute of Technology. All rights reserved.
010: *
011: * Redistribution and use in source and binary forms, with or without
012: * modification, are permitted provided that the following conditions are
013: * met:
014: *
015: * - Redistributions of source code must retain the above copyright
016: * notice, this list of conditions and the following disclaimer.
017: *
018: * - Redistributions in binary form must reproduce the above copyright
019: * notice, this list of conditions and the following disclaimer in the
020: * documentation and/or other materials provided with the distribution.
021: *
022: * - Neither the name of the Hewlett-Packard Company nor the name of the
023: * Massachusetts Institute of Technology nor the names of their
024: * contributors may be used to endorse or promote products derived from
025: * this software without specific prior written permission.
026: *
027: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
028: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
029: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
030: * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
031: * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
032: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
033: * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
034: * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
035: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
036: * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
037: * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
038: * DAMAGE.
039: */
040: package org.dspace.core;
041:
042: import java.io.BufferedInputStream;
043: import java.io.BufferedOutputStream;
044: import java.io.IOException;
045: import java.io.InputStream;
046: import java.io.OutputStream;
047: import java.math.BigInteger;
048: import java.rmi.dgc.VMID;
049: import java.security.MessageDigest;
050: import java.security.NoSuchAlgorithmException;
051: import java.text.ParseException;
052: import java.util.Random;
053: import java.util.regex.Matcher;
054: import java.util.regex.Pattern;
055: import java.util.Date;
056: import java.util.Calendar;
057: import java.util.GregorianCalendar;
058: import java.text.SimpleDateFormat;
059: import java.text.ParseException;
060:
061: import org.apache.log4j.Logger;
062:
063: /**
064: * Utility functions for DSpace.
065: *
066: * @author Peter Breton
067: * @version $Revision: 2074 $
068: */
069: public class Utils {
070: /** log4j logger */
071: private static Logger log = Logger.getLogger(Utils.class);
072:
073: private static final Pattern DURATION_PATTERN = Pattern
074: .compile("(\\d+)([smhdwy])");
075:
076: private static final long MS_IN_SECOND = 1000L;
077:
078: private static final long MS_IN_MINUTE = 60000L;
079:
080: private static final long MS_IN_HOUR = 3600000L;
081:
082: private static final long MS_IN_DAY = 86400000L;
083:
084: private static final long MS_IN_WEEK = 604800000L;
085:
086: private static final long MS_IN_YEAR = 31536000000L;
087:
088: private static int counter = 0;
089:
090: private static Random random = new Random();
091:
092: private static VMID vmid = new VMID();
093:
094: // for parseISO8601Date
095: private static SimpleDateFormat parseFmt[] = {
096: // first try at parsing, has milliseconds (note General time zone)
097: new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss.SSSz"),
098:
099: // second try at parsing, no milliseconds (note General time zone)
100: new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ssz"),
101:
102: // finally, try without any timezone (defaults to current TZ)
103: new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss.SSS"),
104:
105: new SimpleDateFormat("yyyy'-'MM'-'dd'T'HH':'mm':'ss") };
106:
107: // for formatISO8601Date
108: // output canonical format (note RFC22 time zone, easier to hack)
109: private static SimpleDateFormat outFmtSecond = new SimpleDateFormat(
110: "yyyy'-'MM'-'dd'T'HH':'mm':'ssZ");
111:
112: // output format with millsecond precision
113: private static SimpleDateFormat outFmtMillisec = new SimpleDateFormat(
114: "yyyy'-'MM'-'dd'T'HH':'mm':'ss.SSSZ");
115:
116: private static Calendar outCal = GregorianCalendar.getInstance();
117:
118: /** Private Constructor */
119: private Utils() {
120: }
121:
122: /**
123: * Return an MD5 checksum for data in hex format.
124: *
125: * @param data
126: * The data to checksum.
127: * @return MD5 checksum for the data in hex format.
128: */
129: public static String getMD5(String data) {
130: return getMD5(data.getBytes());
131: }
132:
133: /**
134: * Return an MD5 checksum for data in hex format.
135: *
136: * @param data
137: * The data to checksum.
138: * @return MD5 checksum for the data in hex format.
139: */
140: public static String getMD5(byte[] data) {
141: return toHex(getMD5Bytes(data));
142: }
143:
144: /**
145: * Return an MD5 checksum for data as a byte array.
146: *
147: * @param data
148: * The data to checksum.
149: * @return MD5 checksum for the data as a byte array.
150: */
151: public static byte[] getMD5Bytes(byte[] data) {
152: try {
153: MessageDigest digest = MessageDigest.getInstance("MD5");
154:
155: return digest.digest(data);
156: } catch (NoSuchAlgorithmException nsae) {
157: }
158:
159: // Should never happen
160: return null;
161: }
162:
163: /**
164: * Return a hex representation of the byte array
165: *
166: * @param data
167: * The data to transform.
168: * @return A hex representation of the data.
169: */
170: public static String toHex(byte[] data) {
171: if ((data == null) || (data.length == 0)) {
172: return null;
173: }
174:
175: StringBuffer result = new StringBuffer();
176:
177: // This is far from the most efficient way to do things...
178: for (int i = 0; i < data.length; i++) {
179: int low = (int) (data[i] & 0x0F);
180: int high = (int) (data[i] & 0xF0);
181:
182: result.append(Integer.toHexString(high).substring(0, 1));
183: result.append(Integer.toHexString(low));
184: }
185:
186: return result.toString();
187: }
188:
189: /**
190: * Generate a unique key. The key is a long (length 38 to 40) sequence of
191: * digits.
192: *
193: * @return A unique key as a long sequence of base-10 digits.
194: */
195: public static String generateKey() {
196: return new BigInteger(generateBytesKey()).abs().toString();
197: }
198:
199: /**
200: * Generate a unique key. The key is a 32-character long sequence of hex
201: * digits.
202: *
203: * @return A unique key as a long sequence of hex digits.
204: */
205: public static String generateHexKey() {
206: return toHex(generateBytesKey());
207: }
208:
209: /**
210: * Generate a unique key as a byte array.
211: *
212: * @return A unique key as a byte array.
213: */
214: public static synchronized byte[] generateBytesKey() {
215: byte[] junk = new byte[16];
216:
217: random.nextBytes(junk);
218:
219: String input = new StringBuffer().append(vmid).append(
220: new java.util.Date()).append(junk).append(counter++)
221: .toString();
222:
223: return getMD5Bytes(input.getBytes());
224: }
225:
226: // The following two methods are taken from the Jakarta IOUtil class.
227:
228: /**
229: * Copy stream-data from source to destination. This method does not buffer,
230: * flush or close the streams, as to do so would require making non-portable
231: * assumptions about the streams' origin and further use. If you wish to
232: * perform a buffered copy, use {@link #bufferedCopy}.
233: *
234: * @param input
235: * The InputStream to obtain data from.
236: * @param output
237: * The OutputStream to copy data to.
238: */
239: public static void copy(final InputStream input,
240: final OutputStream output) throws IOException {
241: final int BUFFER_SIZE = 1024 * 4;
242: final byte[] buffer = new byte[BUFFER_SIZE];
243:
244: while (true) {
245: final int count = input.read(buffer, 0, BUFFER_SIZE);
246:
247: if (-1 == count) {
248: break;
249: }
250:
251: // write out those same bytes
252: output.write(buffer, 0, count);
253: }
254:
255: // needed to flush cache
256: // output.flush();
257: }
258:
259: /**
260: * Copy stream-data from source to destination, with buffering. This is
261: * equivalent to passing {@link #copy}a
262: * <code>java.io.BufferedInputStream</code> and
263: * <code>java.io.BufferedOuputStream</code> to {@link #copy}, and
264: * flushing the output stream afterwards. The streams are not closed after
265: * the copy.
266: *
267: * @param source
268: * The InputStream to obtain data from.
269: * @param destination
270: * The OutputStream to copy data to.
271: */
272: public static void bufferedCopy(final InputStream source,
273: final OutputStream destination) throws IOException {
274: final BufferedInputStream input = new BufferedInputStream(
275: source);
276: final BufferedOutputStream output = new BufferedOutputStream(
277: destination);
278: copy(input, output);
279: output.flush();
280: }
281:
282: /**
283: * Replace characters that could be interpreted as HTML codes with symbolic
284: * references (entities). This function should be called before displaying
285: * any metadata fields that could contain the characters " <", ">", "&",
286: * "'", and double quotation marks. This will effectively disable HTML links
287: * in metadata.
288: *
289: * @param value
290: * the metadata value to be scrubbed for display
291: *
292: * @return the passed-in string, with html special characters replaced with
293: * entities.
294: */
295: public static String addEntities(String value) {
296: if (value == null || value.length() == 0)
297: return value;
298:
299: value = value.replaceAll("&", "&");
300: value = value.replaceAll("\"", """);
301:
302: // actually, ' is an XML entity, not in HTML.
303: // that's why it's commented out.
304: // value = value.replaceAll("'", "'");
305: value = value.replaceAll("<", "<");
306: value = value.replaceAll(">", ">");
307:
308: return value;
309: }
310:
311: /**
312: * Utility method to parse durations defined as \d+[smhdwy] (seconds,
313: * minutes, hours, days, weeks, years)
314: *
315: * @param duration
316: * specified duration
317: *
318: * @return number of milliseconds equivalent to duration.
319: *
320: * @throws ParseException
321: * if the duration is of incorrect format
322: */
323: public static long parseDuration(String duration)
324: throws ParseException {
325: Matcher m = DURATION_PATTERN.matcher(duration.trim());
326: if (!m.matches()) {
327: throw new ParseException("'" + duration
328: + "' is not a valid duration definition", 0);
329: }
330:
331: String units = m.group(2);
332: long multiplier = MS_IN_SECOND;
333:
334: if ("s".equals(units)) {
335: multiplier = MS_IN_SECOND;
336: } else if ("m".equals(units)) {
337: multiplier = MS_IN_MINUTE;
338: } else if ("h".equals(units)) {
339: multiplier = MS_IN_HOUR;
340: } else if ("d".equals(units)) {
341: multiplier = MS_IN_DAY;
342: } else if ("w".equals(units)) {
343: multiplier = MS_IN_WEEK;
344: } else if ("y".equals(units)) {
345: multiplier = MS_IN_YEAR;
346: } else {
347: throw new ParseException(units
348: + " is not a valid time unit (must be 'y', "
349: + "'w', 'd', 'h', 'm' or 's')", duration
350: .indexOf(units));
351: }
352:
353: long qint = Long.parseLong(m.group(1));
354:
355: return qint * multiplier;
356: }
357:
358: /**
359: * Translates timestamp from an ISO 8601-standard format, which
360: * is commonly used in XML and RDF documents.
361: * This method is synchronized because it depends on a non-reentrant
362: * static DateFormat (more efficient than creating a new one each call).
363: *
364: * @param s the input string
365: * @return Date object, or null if there is a problem translating.
366: */
367: public static synchronized Date parseISO8601Date(String s) {
368: // attempt to normalize the timezone to something we can parse;
369: // SimpleDateFormat can't handle "Z"
370: char tzSign = s.charAt(s.length() - 6);
371: if (s.endsWith("Z"))
372: s = s.substring(0, s.length() - 1) + "GMT+00:00";
373:
374: // check for trailing timezone
375: else if (tzSign == '-' || tzSign == '+')
376: s = s.substring(0, s.length() - 6) + "GMT"
377: + s.substring(s.length() - 6);
378:
379: // try to parse without millseconds
380: ParseException lastError = null;
381: for (int i = 0; i < parseFmt.length; ++i) {
382: try {
383: return parseFmt[i].parse(s);
384: } catch (ParseException e) {
385: lastError = e;
386: }
387: }
388: if (lastError != null)
389: log.error("Error parsing date:", lastError);
390: return null;
391: }
392:
393: /**
394: * Convert a Date to String in the ISO 8601 standard format.
395: * The RFC822 timezone is almost right, still need to insert ":".
396: * This method is synchronized because it depends on a non-reentrant
397: * static DateFormat (more efficient than creating a new one each call).
398: *
399: * @param d the input Date
400: * @return String containing formatted date.
401: */
402: public static synchronized String formatISO8601Date(Date d) {
403: String result;
404: outCal.setTime(d);
405: if (outCal.get(Calendar.MILLISECOND) == 0)
406: result = outFmtSecond.format(d);
407: else
408: result = outFmtMillisec.format(d);
409: int rl = result.length();
410: return result.substring(0, rl - 2) + ":"
411: + result.substring(rl - 2);
412: }
413: }
|