001: package org.openi.feeds;
002:
003: import java.io.BufferedReader;
004: import java.io.File;
005: import java.io.FileNotFoundException;
006: import java.io.FileReader;
007: import java.io.IOException;
008: import java.io.Reader;
009: import java.util.ArrayList;
010: import java.util.Calendar;
011: import java.util.HashMap;
012: import java.util.List;
013: import java.util.Map;
014:
015: import org.apache.log4j.Logger;
016:
017: import java.util.LinkedHashMap;
018:
019: public class MetaDataReader {
020: private static Logger logger = Logger
021: .getLogger(MetaDataReader.class);
022:
023: public static FeedsMetaData readMetaData(File file, int sampleSize,
024: char delimeter, boolean readFields) throws ParseException {
025: FeedsMetaData metadata = new FeedsMetaData();
026: metadata.setDelimeter("" + delimeter);
027: metadata.setSampleSize(sampleSize);
028: readMetaData(file, metadata, readFields);
029: return metadata;
030: }
031:
032: public static void readMetaData(File file,
033: final FeedsMetaData metaData, boolean readFields)
034: throws ParseException {
035:
036: Reader reader = null;
037: try {
038: reader = new FileReader(file);
039: } catch (FileNotFoundException e) {
040: throw new ParseException("File '" + file.getName()
041: + "' is not found");
042: }
043:
044: metaData.setFileSize(file.length());
045: metaData.setParsedDate(Calendar.getInstance());
046: Calendar lastModified = Calendar.getInstance();
047: lastModified.setTimeInMillis(file.lastModified());
048: metaData.setFileDate(lastModified);
049: metaData.setFileName(file.getName());
050: Map attr = metaData.getAttributes();
051:
052: if (attr == null) {
053: attr = new HashMap();
054: metaData.setAttributes(attr);
055: }
056:
057: try {
058: attr.put("path", file.getCanonicalPath());
059: } catch (IOException e) {
060: logger.error(e);
061: }
062:
063: if (metaData.getFieldCount() == null)
064: metaData.setFieldCount(new HashMap());
065:
066: if (!readFields)
067: return;
068: ParserFactory.createFeedParser().parse(reader,
069: metaData.getDelimeter().charAt(0),
070: metaData.getSampleSize(), new ParserResultHandler() {
071:
072: Map fieldMap = metaData.getFieldCount();
073: Map lineDetails = null;
074: int previousFieldCount = 0;
075: long previousLinePos = 0;
076: long blankCount = 0;
077:
078: public void start() {
079: previousFieldCount = 0;
080: previousLinePos = 0;
081: blankCount = 0;
082: fieldMap.clear();
083: lineDetails = new LinkedHashMap();
084: metaData.getAttributes().put("lineDetails",
085: lineDetails);
086: }
087:
088: public void processFields(int lineIndex,
089: String[] fields) {
090: int count = fields.length;
091: if (count == 0) {
092: blankCount++;
093: if (previousFieldCount == 0) {
094: previousFieldCount = count;
095: previousLinePos = lineIndex;
096: } else if (previousFieldCount != count) {
097: String detail = "" + previousLinePos
098: + " - " + (lineIndex - 1);
099: lineDetails.put(detail, ""
100: + previousFieldCount);
101: previousLinePos = lineIndex;
102: previousFieldCount = count;
103: }
104: return;
105: }
106: if (metaData.isParseHeader() && lineIndex == 1) {
107: StringBuffer header = new StringBuffer("");
108: for (int i = 0; i < fields.length; i++) {
109: header.append(fields[i] + ",");
110: }
111: header.deleteCharAt(header.length() - 1);
112: metaData.setHeader(header.toString());
113: }
114:
115: if (previousFieldCount == 0) {
116: previousFieldCount = count;
117: previousLinePos = lineIndex;
118: } else if (previousFieldCount != count) {
119: String detail = "" + previousLinePos
120: + " - " + (lineIndex - 1);
121: lineDetails.put(detail, ""
122: + previousFieldCount);
123: previousLinePos = lineIndex;
124: previousFieldCount = count;
125: }
126:
127: Integer lineCount = (Integer) fieldMap.get(""
128: + count);
129: if (lineCount == null) {
130: lineCount = new Integer(1);
131: } else {
132: lineCount = new Integer(lineCount
133: .intValue() + 1);
134: }
135:
136: fieldMap.put("" + count, lineCount);
137: }
138:
139: public void end(int totalLines) {
140: metaData
141: .setLineCount((int) (totalLines - blankCount));
142: if (previousFieldCount != 0) {
143: String detail = "" + previousLinePos
144: + " - " + totalLines;
145: lineDetails.put(detail, ""
146: + previousFieldCount);
147: }
148: }
149:
150: });
151:
152: }
153:
154: public static int readLineCount(File file) throws IOException {
155: BufferedReader reader = new BufferedReader(new FileReader(file));
156: int count = 0;
157: while (reader.readLine() != null) {
158: count++;
159: }
160: return count;
161: }
162:
163: public static String[] listFolders(File path) throws Exception {
164: if (!path.exists())
165: throw new FileNotFoundException("file '" + path.getPath()
166: + " ' does not exist");
167: if (!path.isDirectory())
168: throw new IllegalArgumentException("Not a directory -"
169: + path.getPath());
170: return path.list();
171: }
172: }
|