001: /**
002: *
003: * edtFTPj
004: *
005: * Copyright (C) 2000-2004 Enterprise Distributed Technologies Ltd
006: *
007: * www.enterprisedt.com
008: *
009: * This library is free software; you can redistribute it and/or
010: * modify it under the terms of the GNU Lesser General Public
011: * License as published by the Free Software Foundation; either
012: * version 2.1 of the License, or (at your option) any later version.
013: *
014: * This library is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017: * Lesser General Public License for more details.
018: *
019: * You should have received a copy of the GNU Lesser General Public
020: * License along with this library; if not, write to the Free Software
021: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: *
023: * Bug fixes, suggestions and comments should be should posted on
024: * http://www.enterprisedt.com/forums/index.php
025: *
026: * Change Log:
027: *
028: * $Log: UnixFileParser.java,v $
029: * Revision 1.19 2007-12-18 07:52:53 bruceb
030: * trimStart() changes
031: *
032: * Revision 1.18 2007-10-12 05:20:44 bruceb
033: * permit ignoring date parser errors
034: *
035: * Revision 1.17 2007-06-15 08:15:30 bruceb
036: * Connect:Enterprise fix
037: *
038: * Revision 1.16 2007/03/28 06:04:15 bruceb
039: * support reverse MMM/dd formats
040: *
041: * Revision 1.15 2007/03/19 22:10:57 bruceb
042: * when testing for future, set future date 2 days ahead to account for time zones
043: *
044: * Revision 1.14 2006/10/11 08:57:40 hans
045: * Removed usage of deprecated FTPFile constructor and made cvsId final
046: *
047: * Revision 1.13 2006/05/23 04:10:17 bruceb
048: * support Unix listing starting with 'p'
049: *
050: * Revision 1.12 2005/06/03 11:26:25 bruceb
051: * comment change
052: *
053: * Revision 1.11 2005/04/01 13:57:15 bruceb
054: * minor tweak re groups
055: *
056: * Revision 1.10 2004/10/19 16:15:49 bruceb
057: * minor restructuring
058: *
059: * Revision 1.9 2004/10/18 15:58:15 bruceb
060: * setLocale
061: *
062: * Revision 1.8 2004/09/20 21:36:13 bruceb
063: * tweak to skip invalid lines
064: *
065: * Revision 1.7 2004/09/17 14:56:54 bruceb
066: * parse fixes including wrong year
067: *
068: * Revision 1.6 2004/07/23 08:32:36 bruceb
069: * made cvsId public
070: *
071: * Revision 1.5 2004/06/11 10:19:59 bruceb
072: * fixed bug re filename same as user
073: *
074: * Revision 1.4 2004/05/20 19:47:00 bruceb
075: * blanks in names fix
076: *
077: * Revision 1.3 2004/05/05 20:27:41 bruceb
078: * US locale for date formats
079: *
080: * Revision 1.2 2004/05/01 11:44:21 bruceb
081: * modified for server returning "total 3943" as first line
082: *
083: * Revision 1.1 2004/04/17 23:42:07 bruceb
084: * file parsing part II
085: *
086: * Revision 1.1 2004/04/17 18:37:23 bruceb
087: * new parse functionality
088: *
089: */package com.enterprisedt.net.ftp;
090:
091: import java.text.ParseException;
092: import java.text.SimpleDateFormat;
093: import java.util.Calendar;
094: import java.util.Date;
095: import java.util.Locale;
096:
097: /**
098: * Represents a remote Unix file parser
099: *
100: * @author Bruce Blackshaw
101: * @version $Revision: 1.19 $
102: */
103: public class UnixFileParser extends FTPFileParser {
104:
105: /**
106: * Revision control id
107: */
108: final public static String cvsId = "@(#)$Id: UnixFileParser.java,v 1.19 2007-12-18 07:52:53 bruceb Exp $";
109:
110: /**
111: * Symbolic link symbol
112: */
113: private final static String SYMLINK_ARROW = "->";
114:
115: /**
116: * Indicates symbolic link
117: */
118: private final static char SYMLINK_CHAR = 'l';
119:
120: /**
121: * These chars indicates ordinary files
122: */
123: private final static char[] FILE_CHARS = { '-', 'p' };
124:
125: /**
126: * Indicates directory
127: */
128: private final static char DIRECTORY_CHAR = 'd';
129:
130: /**
131: * Date formatter 1 with no HH:mm
132: */
133: private SimpleDateFormat noHHmmFormatter1;
134:
135: /**
136: * Date formatter 2 with no HH:mm
137: */
138: private SimpleDateFormat noHHmmFormatter2;
139:
140: /**
141: * Date formatter with no HH:mm
142: */
143: private SimpleDateFormat noHHmmFormatter;
144:
145: /**
146: * Date formatter 1 with HH:mm
147: */
148: private SimpleDateFormat hhmmFormatter1;
149:
150: /**
151: * Date formatter 2 with HH:mm
152: */
153: private SimpleDateFormat hhmmFormatter2;
154:
155: /**
156: * Date formatter with HH:mm
157: */
158: private SimpleDateFormat hhmmFormatter;
159:
160: /**
161: * Minimum number of expected fields
162: */
163: private final static int MIN_FIELD_COUNT = 8;
164:
165: /**
166: * Constructor
167: */
168: public UnixFileParser() {
169: setLocale(Locale.getDefault());
170: }
171:
172: /**
173: * Set the locale for date parsing of listings
174: *
175: * @param locale locale to set
176: */
177: public void setLocale(Locale locale) {
178: noHHmmFormatter1 = new SimpleDateFormat("MMM-dd-yyyy", locale);
179: noHHmmFormatter2 = new SimpleDateFormat("dd-MMM-yyyy", locale);
180: noHHmmFormatter = noHHmmFormatter1;
181: hhmmFormatter1 = new SimpleDateFormat("MMM-dd-yyyy-HH:mm",
182: locale);
183: hhmmFormatter2 = new SimpleDateFormat("dd-MMM-yyyy-HH:mm",
184: locale);
185: hhmmFormatter = hhmmFormatter1;
186: }
187:
188: /**
189: * Is this a Unix format listing?
190: *
191: * @param raw raw listing line
192: * @return true if Unix, false otherwise
193: */
194: public static boolean isUnix(String raw) {
195: char ch = raw.charAt(0);
196: if (ch == DIRECTORY_CHAR || ch == SYMLINK_CHAR)
197: return true;
198: for (int i = 0; i < FILE_CHARS.length; i++)
199: if (ch == FILE_CHARS[i])
200: return true;
201: return false;
202: }
203:
204: /**
205: * Parse server supplied string, e.g.:
206: *
207: * lrwxrwxrwx 1 wuftpd wuftpd 14 Jul 22 2002 MIRRORS -> README-MIRRORS
208: * -rw-r--r-- 1 b173771 users 431 Mar 31 20:04 .htaccess
209: *
210: * @param raw raw string to parse
211: */
212: public FTPFile parse(String raw) throws ParseException {
213:
214: // test it is a valid line, e.g. "total 342522" is invalid
215: if (!isUnix(raw))
216: return null;
217:
218: String[] fields = split(raw);
219:
220: if (fields.length < MIN_FIELD_COUNT) {
221: StringBuffer listing = new StringBuffer(
222: "Unexpected number of fields in listing '");
223: listing.append(raw).append("' - expected minimum ").append(
224: MIN_FIELD_COUNT).append(" fields but found ")
225: .append(fields.length).append(" fields");
226: throw new ParseException(listing.toString(), 0);
227: }
228:
229: // field pos
230: int index = 0;
231:
232: // first field is perms
233: char ch = raw.charAt(0);
234: String permissions = fields[index++];
235: ch = permissions.charAt(0);
236: boolean isDir = false;
237: boolean isLink = false;
238: if (ch == DIRECTORY_CHAR)
239: isDir = true;
240: else if (ch == SYMLINK_CHAR)
241: isLink = true;
242:
243: // some servers don't supply the link count
244: int linkCount = 0;
245: if (Character.isDigit(fields[index].charAt(0))) {
246: try {
247: linkCount = Integer.parseInt(fields[index++]);
248: } catch (NumberFormatException ignore) {
249: }
250: }
251:
252: // owner and group
253: String owner = fields[index++];
254: String group = fields[index++];
255:
256: // size
257: long size = 0L;
258: String sizeStr = fields[index];
259: // some listings don't have group - make group -> size in
260: // this case, and use the sizeStr for the start of the date
261: if (!Character.isDigit(sizeStr.charAt(0))
262: && Character.isDigit(group.charAt(0))) {
263: sizeStr = group;
264: group = "";
265: } else {
266: index++;
267: }
268: try {
269: size = Long.parseLong(sizeStr);
270: } catch (NumberFormatException ex) {
271: throw new ParseException(
272: "Failed to parse size: " + sizeStr, 0);
273: }
274:
275: // next 3 are the date time
276:
277: // we expect the month first on Unix.
278: // Connect:Enterprise UNIX has a weird extra field here - we test if the
279: // next field starts with a digit and if so, we skip it
280: if (Character.isDigit(fields[index].charAt(0)))
281: index++;
282:
283: int dateTimePos = index;
284: Date lastModified = null;
285: StringBuffer stamp = new StringBuffer(fields[index++]);
286: stamp.append('-').append(fields[index++]).append('-');
287:
288: String field = fields[index++];
289: if (field.indexOf(':') < 0) {
290: stamp.append(field); // year
291: try {
292: lastModified = noHHmmFormatter.parse(stamp.toString());
293: } catch (ParseException ignore) {
294: noHHmmFormatter = (noHHmmFormatter == noHHmmFormatter1 ? noHHmmFormatter2
295: : noHHmmFormatter1);
296: try {
297: lastModified = noHHmmFormatter.parse(stamp
298: .toString());
299: } catch (ParseException ex) {
300: if (!ignoreDateParseErrors)
301: throw ex;
302: }
303: }
304: } else { // add the year ourselves as not present
305: Calendar cal = Calendar.getInstance();
306: int year = cal.get(Calendar.YEAR);
307: stamp.append(year).append('-').append(field);
308: try {
309: lastModified = hhmmFormatter.parse(stamp.toString());
310: } catch (ParseException ignore) {
311: hhmmFormatter = (hhmmFormatter == hhmmFormatter1 ? hhmmFormatter2
312: : hhmmFormatter1);
313: try {
314: lastModified = hhmmFormatter
315: .parse(stamp.toString());
316:
317: } catch (ParseException ex) {
318: if (!ignoreDateParseErrors)
319: throw ex;
320: }
321: }
322:
323: // can't be in the future - must be the previous year
324: // add 2 days just to allow for different time zones
325: cal.add(Calendar.DATE, 2);
326: if (lastModified != null
327: && lastModified.after(cal.getTime())) {
328: cal.setTime(lastModified);
329: cal.add(Calendar.YEAR, -1);
330: lastModified = cal.getTime();
331: }
332: }
333:
334: // name of file or dir. Extract symlink if possible
335: String name = null;
336: String linkedname = null;
337:
338: // we've got to find the starting point of the name. We
339: // do this by finding the pos of all the date/time fields, then
340: // the name - to ensure we don't get tricked up by a userid the
341: // same as the filename,for example
342: int pos = 0;
343: boolean ok = true;
344: for (int i = dateTimePos; i < dateTimePos + 3; i++) {
345: pos = raw.indexOf(fields[i], pos);
346: if (pos < 0) {
347: ok = false;
348: break;
349: } else { // move on the length of the field
350: pos += fields[i].length();
351: }
352: }
353: if (ok) {
354: String remainder = trimStart(raw.substring(pos));
355: if (!isLink)
356: name = remainder;
357: else { // symlink, try to extract it
358: pos = remainder.indexOf(SYMLINK_ARROW);
359: if (pos <= 0) { // couldn't find symlink, give up & just assign as name
360: name = remainder;
361: } else {
362: int len = SYMLINK_ARROW.length();
363: name = remainder.substring(0, pos).trim();
364: if (pos + len < remainder.length())
365: linkedname = remainder.substring(pos + len);
366: }
367: }
368: } else {
369: throw new ParseException("Failed to retrieve name: " + raw,
370: 0);
371: }
372:
373: FTPFile file = new FTPFile(raw, name, size, isDir, lastModified);
374: file.setGroup(group);
375: file.setOwner(owner);
376: file.setLink(isLink);
377: file.setLinkCount(linkCount);
378: file.setLinkedName(linkedname);
379: file.setPermissions(permissions);
380: return file;
381: }
382: }
|