001: /*
002: * gnu/regexp/util/Grep.java
003: * Copyright (C) 1998 Wes Biggs
004: * Copyright (C) 2001 Lee Sau Dan for the use of Reader for handling file I/O
005: * Copyright (C) 2001 Ulf Dittmer for support of grepping into ZIP files
006: *
007: * This program is free software; you can redistribute it and/or modify
008: * it under the terms of the GNU General Public License as published
009: * by the Free Software Foundation; either version 2 of the License, or
010: * (at your option) any later version.
011: *
012: * This program is distributed in the hope that it will be useful,
013: * but WITHOUT ANY WARRANTY; without even the implied warranty of
014: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
015: * GNU General Public License for more details.
016: *
017: * You should have received a copy of the GNU General Public License
018: * along with this program; if not, write to the Free Software
019: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
020: */
021:
022: package gnu.regexp.util;
023:
024: import gnu.getopt.Getopt;
025: import gnu.getopt.LongOpt;
026: import gnu.regexp.RE;
027: import gnu.regexp.REException;
028: import gnu.regexp.REMatch;
029: import gnu.regexp.RESyntax;
030: import java.io.BufferedReader;
031: import java.io.File;
032: import java.io.FileInputStream;
033: import java.io.FileNotFoundException;
034: import java.io.InputStream;
035: import java.io.InputStreamReader;
036: import java.io.IOException;
037: import java.io.PrintStream;
038: import java.io.UnsupportedEncodingException;
039: import java.util.Enumeration;
040: import java.util.Vector;
041: import java.util.zip.*;
042:
043: /**
044: * Grep is a pure-Java clone of the GNU grep utility. As such, it is much
045: * slower and not as full-featured, but it has the advantage of being
046: * available on any system with a Java virtual machine.
047: *
048: * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
049: * <A HREF="http://www.csis.hku.hk/~sdlee/">Lee Sau Dan</A>
050: * <A HREF="http://www.capital.net/~dittmer/">Ulf Dittmer</A>
051: * @version 1.03
052: * @use gnu.getopt
053: */
054: public class Grep {
055: private static final int BYTE_OFFSET = 0;
056: private static final int COUNT = 1;
057: private static final int LINE_NUMBER = 2;
058: private static final int QUIET = 3;
059: private static final int SILENT = 4;
060: private static final int NO_FILENAME = 5;
061: private static final int REVERT_MATCH = 6;
062: private static final int FILES_WITH_MATCHES = 7;
063: private static final int LINE_REGEXP = 8;
064: private static final int FILES_WITHOUT_MATCH = 9;
065: private static final int EXPAND_ZIP_FILES = 10;
066:
067: private static final String PROGNAME = "gnu.regexp.util.Grep";
068: private static final String PROGVERSION = "1.03";
069:
070: private Grep() {
071: }
072:
073: /**
074: * Invokes the grep() function below with the command line arguments
075: * and using the RESyntax.RE_SYNTAX_GREP syntax, which attempts to
076: * emulate the traditional UNIX grep syntax.
077: */
078: public static void main(String[] argv) {
079: System.exit(grep(argv, RESyntax.RE_SYNTAX_GREP, System.out));
080: }
081:
082: /**
083: * Runs Grep with the specified arguments. For a list of
084: * supported options, specify "--help".
085: *
086: * This is the meat of the grep routine, but unlike main(), you can
087: * specify your own syntax and PrintStream to use for output.
088: */
089: public static int grep(String[] argv, RESyntax syntax,
090: PrintStream out) {
091: // use gnu.getopt to read arguments
092: int cflags = 0;
093:
094: boolean[] options = new boolean[10];
095:
096: String encoding = null;
097:
098: LongOpt[] longOptions = {
099: new LongOpt("byte-offset", LongOpt.NO_ARGUMENT, null,
100: 'b'),
101: new LongOpt("count", LongOpt.NO_ARGUMENT, null, 'c'),
102: new LongOpt("no-filename", LongOpt.NO_ARGUMENT, null,
103: 'h'),
104: new LongOpt("ignore-case", LongOpt.NO_ARGUMENT, null,
105: 'i'),
106: new LongOpt("files-with-matches", LongOpt.NO_ARGUMENT,
107: null, 'l'),
108: new LongOpt("help", LongOpt.NO_ARGUMENT, null, '!'),
109: new LongOpt("line-number", LongOpt.NO_ARGUMENT, null,
110: 'n'),
111: new LongOpt("quiet", LongOpt.NO_ARGUMENT, null, 'q'),
112: new LongOpt("silent", LongOpt.NO_ARGUMENT, null, 'q'),
113: new LongOpt("no-messages", LongOpt.NO_ARGUMENT, null,
114: 's'),
115: new LongOpt("revert-match", LongOpt.NO_ARGUMENT, null,
116: 'v'),
117: new LongOpt("line-regexp", LongOpt.NO_ARGUMENT, null,
118: 'x'),
119: new LongOpt("extended-regexp", LongOpt.NO_ARGUMENT,
120: null, 'E'),
121: new LongOpt("fixed-strings", LongOpt.NO_ARGUMENT, null,
122: 'F'), // TODO
123: new LongOpt("basic-regexp", LongOpt.NO_ARGUMENT, null,
124: 'G'),
125: new LongOpt("files-without-match", LongOpt.NO_ARGUMENT,
126: null, 'L'),
127: new LongOpt("version", LongOpt.NO_ARGUMENT, null, 'V'),
128: new LongOpt("zip", LongOpt.NO_ARGUMENT, null, 'z'),
129: new LongOpt("encoding", LongOpt.REQUIRED_ARGUMENT,
130: null, 'N') };
131:
132: Getopt g = new Getopt(PROGNAME, argv, "bchilnqsvxyEFGLVzN:",
133: longOptions);
134: int c;
135: String arg;
136: while ((c = g.getopt()) != -1) {
137: switch (c) {
138: case 'b':
139: options[BYTE_OFFSET] = true;
140: break;
141: case 'c':
142: options[COUNT] = true;
143: break;
144: case 'h':
145: options[NO_FILENAME] = true;
146: break;
147: case 'i':
148: case 'y':
149: cflags |= RE.REG_ICASE;
150: break;
151: case 'l':
152: options[FILES_WITH_MATCHES] = true;
153: break;
154: case 'n':
155: options[LINE_NUMBER] = true;
156: break;
157: case 'q':
158: options[QUIET] = true;
159: break;
160: case 's':
161: options[SILENT] = true;
162: break;
163: case 'v':
164: options[REVERT_MATCH] = true;
165: break;
166: case 'x':
167: options[LINE_REGEXP] = true;
168: break;
169: case 'E': // TODO: check compatibility with grep
170: syntax = RESyntax.RE_SYNTAX_EGREP;
171: break;
172: case 'F': // TODO: fixed strings
173: break;
174: case 'G':
175: syntax = RESyntax.RE_SYNTAX_GREP;
176: break;
177: case 'L':
178: options[FILES_WITHOUT_MATCH] = true;
179: break;
180: case 'V':
181: System.err.println(PROGNAME + ' ' + PROGVERSION);
182: return 0;
183: case 'z':
184: options[EXPAND_ZIP_FILES] = true;
185: break;
186: case 'N':
187: encoding = g.getOptarg();
188: try { // try out this encoding now. If not found, fall back to default
189: "".getBytes(encoding);
190: } catch (UnsupportedEncodingException uee) {
191: System.err.println(PROGNAME + ": (Warning)"
192: + " Unsupported Encoding: " + encoding
193: + "; reverting to default");
194: encoding = null;
195: }
196: break;
197: case '!': // help
198: try {
199: BufferedReader br = new BufferedReader(
200: new InputStreamReader(
201: (Grep.class)
202: .getResourceAsStream("GrepUsage.txt"),
203: "UTF8"));
204: String line;
205: while ((line = br.readLine()) != null)
206: out.println(line);
207: } catch (IOException ie) {
208: }
209: return 0;
210: }
211: }
212:
213: InputStream is = null;
214: RE pattern = null;
215: if (g.getOptind() >= argv.length) {
216: System.err.println("Usage: java " + PROGNAME
217: + " [OPTION]... PATTERN [FILE]...");
218: System.err.println("Try `java " + PROGNAME
219: + " --help' for more information.");
220: return 2;
221: }
222: try {
223: pattern = new RE(argv[g.getOptind()], cflags, syntax);
224: } catch (REException e) {
225: System.err.println("Error in expression: " + e);
226: return 2;
227: }
228:
229: boolean notFound = true;
230: if (argv.length >= g.getOptind() + 2) {
231: for (int i = g.getOptind() + 1; i < argv.length; i++) {
232: boolean no_filename = (argv.length == g.getOptind() + 2)
233: || options[NO_FILENAME];
234: if (argv[i].equals("-")) {
235: final String filename = no_filename ? null
236: : "(standard input)";
237: if (processStream(pattern, System.in, encoding,
238: options, filename, null, out))
239: notFound = false;
240: } else {
241: final String filename = no_filename ? null
242: : argv[i];
243: try {
244: File file = new File(argv[i]);
245: if (file.isDirectory()) {
246: System.err.println(PROGNAME + ": "
247: + argv[i] + ": Is a directory");
248: } else if (!file.canRead()) {
249: System.err.println(PROGNAME + ": "
250: + argv[i] + ": Permission denied");
251: } else if (options[EXPAND_ZIP_FILES]
252: && argv[i].endsWith(".zip")) {
253: // iterate over all files within this ZIP file
254: try {
255: ZipFile zf = new ZipFile(file);
256: Enumeration list = zf.entries();
257: while (list.hasMoreElements()) {
258: ZipEntry ze = (ZipEntry) list
259: .nextElement();
260: if (!ze.isDirectory()) {
261: if (processStream(pattern, zf
262: .getInputStream(ze),
263: encoding, options,
264: filename, ze.getName(),
265: out))
266: notFound = false;
267: }
268: }
269: } catch (Exception ex) {
270: System.err.println(PROGNAME + ": "
271: + argv[i]
272: + ": Problem reading ZIP file");
273: return 2;
274: }
275: } else {
276: if (processStream(pattern,
277: new FileInputStream(argv[i]),
278: encoding, options, filename, null,
279: out))
280: notFound = false;
281: }
282: } catch (FileNotFoundException e) {
283: if (!options[SILENT])
284: System.err.println(PROGNAME + ": " + e);
285: }
286: }
287: }
288: } else {
289: if (processStream(pattern, System.in, encoding, options,
290: null, null, out))
291: notFound = false;
292: }
293: return notFound ? 1 : 0;
294: }
295:
296: private static boolean processStream(RE pattern, InputStream is,
297: String encoding, boolean[] options, String filename,
298: String zipName, PrintStream out) {
299: try {
300: final InputStreamReader isr = encoding == null ? new InputStreamReader(
301: is)
302: : new InputStreamReader(is, encoding);
303: final BufferedReader r = new BufferedReader(isr);
304: return processReader(pattern, r, options, filename,
305: zipName, out);
306: } catch (UnsupportedEncodingException uee) {
307: /* since grep() should have checked that the 'encoding' parameter
308: is valid, it should be impossible that this exception would
309: happen. Of, sso, it is a logic error.
310: */
311: throw new Error(PROGNAME + ": programming logic error");
312: }
313: }
314:
315: private static String fileNameString(String fileName, String zipName) {
316: if (zipName == null)
317: return fileName;
318: else
319: return zipName + " in " + fileName;
320: }
321:
322: private static boolean processReader(RE pattern, BufferedReader br,
323: boolean[] options, String filename, String zipName,
324: PrintStream out) {
325:
326: int newlineLen = System.getProperty("line.separator").length();
327: int count = 0;
328: long atByte = 0;
329: int atLine = 1;
330: String line;
331: REMatch match;
332:
333: try {
334: while ((line = br.readLine()) != null) {
335: match = pattern.getMatch(line);
336: if (((options[LINE_REGEXP] && pattern.isMatch(line)) || (!options[LINE_REGEXP] && (match != null)))
337: ^ options[REVERT_MATCH]) {
338: count++;
339: if (!options[COUNT]) {
340: if (options[QUIET]) {
341: return true;
342: }
343: if (options[FILES_WITH_MATCHES]) {
344: if (filename != null)
345: out.println(fileNameString(filename,
346: zipName));
347: return true;
348: }
349: if (options[FILES_WITHOUT_MATCH]) {
350: return false;
351: }
352: if (filename != null) {
353: out
354: .print(fileNameString(filename,
355: zipName));
356: out.print(':');
357: }
358: if (options[LINE_NUMBER]) {
359: out.print(atLine);
360: out.print(':');
361: }
362: if (options[BYTE_OFFSET]) {
363: out.print(atByte + match.getStartIndex());
364: out.print(':');
365: }
366: out.println(line);
367: }
368: } // a match
369: atByte += line.length() + newlineLen; // could be troublesome...
370: atLine++;
371: } // a valid line
372: br.close();
373:
374: if (options[COUNT]) {
375: if (filename != null)
376: out
377: .println(fileNameString(filename, zipName) + ':');
378: out.println(count);
379: }
380: if (options[FILES_WITHOUT_MATCH] && count == 0) {
381: if (filename != null)
382: out.println(fileNameString(filename, zipName));
383: }
384: } catch (IOException e) {
385: System.err.println(PROGNAME + ": " + e);
386: }
387: return ((count > 0) ^ options[REVERT_MATCH]);
388: }
389: }
|