001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: *
017: */
018:
019: /* $Id: Grep.java 152682 2005-02-08 18:13:39Z gregor $ */
020:
021: package org.apache.lenya.search;
022:
023: import java.io.File;
024: import java.io.FileInputStream;
025: import java.io.FileNotFoundException;
026: import java.io.IOException;
027: import java.nio.CharBuffer;
028: import java.nio.MappedByteBuffer;
029: import java.nio.channels.FileChannel;
030: import java.nio.charset.CharacterCodingException;
031: import java.nio.charset.Charset;
032: import java.nio.charset.CharsetDecoder;
033: import java.util.ArrayList;
034: import java.util.List;
035: import java.util.regex.Matcher;
036: import java.util.regex.Pattern;
037:
038: import org.apache.avalon.framework.logger.ConsoleLogger;
039: import org.apache.avalon.framework.logger.Logger;
040:
041: /**
042: * Utility class to provide a subset of the grep functionality.
043: */
044: public class Grep {
045:
046: private static Charset charset = Charset.forName("UTF-8");
047: private static CharsetDecoder decoder = charset.newDecoder();
048:
049: /**
050: * Check if the given file contains the pattern
051: * @param file the file which is to be searched for the pattern
052: * @param pattern the pattern that is being searched.
053: * @return true if the file contains the string, false otherwise.
054: * @throws IOException
055: */
056: public static boolean containsPattern(File file, Pattern pattern)
057: throws IOException {
058:
059: Logger log = new ConsoleLogger();
060:
061: FileChannel fc = null;
062: // Open the file and then get a channel from the stream
063: FileInputStream fis = null;
064: boolean result = false;
065:
066: try {
067: fis = new FileInputStream(file);
068: fc = fis.getChannel();
069:
070: // Get the file's size and then map it into memory
071: int sz = (int) fc.size();
072: MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY,
073: 0, sz);
074:
075: // Decode the file into a char buffer
076: CharBuffer cb = decoder.decode(bb);
077:
078: // Perform the search
079: Matcher pm = pattern.matcher(cb); // Pattern matcher
080:
081: result = pm.find();
082: } catch (FileNotFoundException e) {
083: log.error("File not found: " + e.toString());
084: } catch (CharacterCodingException e) {
085: log.error("Problem with encoding: " + e.toString());
086: } catch (IOException e) {
087: log.error("IO Exception: " + e.toString());
088: } finally {
089: // Close the channel and the stream
090: if (fc != null)
091: fc.close();
092: if (fis != null)
093: fis.close();
094: }
095: return result;
096: }
097:
098: /**
099: * Find all occurences of pattern in a file.
100: * @param file the file to search for occurences of pattern
101: * @param pattern the pattern to search for
102: * @param group which group in the pattern to return
103: * @return an <code>array</code> of occurences of pattern
104: * (i.e. the groupth group of the match)
105: * @throws IOException if the file could not be read.
106: */
107: public static String[] findPattern(File file, Pattern pattern,
108: int group) throws IOException {
109:
110: Logger log = new ConsoleLogger();
111:
112: ArrayList occurences = new ArrayList();
113: FileInputStream fis = null;
114: FileChannel fc = null;
115:
116: try {
117: // Open the file and then get a channel from the stream
118: fis = new FileInputStream(file);
119: fc = fis.getChannel();
120:
121: // Get the file's size and then map it into memory
122: int sz = (int) fc.size();
123: MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY,
124: 0, sz);
125:
126: // Decode the file into a char buffer
127: CharBuffer cb = decoder.decode(bb);
128:
129: // Perform the search
130: Matcher pm = pattern.matcher(cb); // Pattern matcher
131:
132: while (pm.find()) {
133: occurences.add(pm.group(group));
134: }
135: } catch (FileNotFoundException e) {
136: log.error("file not found " + e.toString());
137: } catch (CharacterCodingException e) {
138: log.error("encoding problem " + e.toString());
139: } catch (IOException e) {
140: log.error("IO exception" + e.toString());
141: } finally {
142: // Close the channel and the stream
143: if (fc != null)
144: fc.close();
145: if (fis != null)
146: fis.close();
147: }
148: return (String[]) occurences.toArray(new String[occurences
149: .size()]);
150:
151: }
152:
153: /**
154: * Find all files below the given file which contain the given pattern.
155: * @param file the file where to start the search for the pattern.
156: * @param pattern the pattern to search for.
157: * @return an array of files which contain the pattern
158: * @throws IOException if any of the files could not be opened.
159: */
160: private static List find_internal(File file, Pattern pattern)
161: throws IOException {
162: ArrayList fileList = new ArrayList();
163:
164: if (file.isDirectory()) {
165: String[] children = file.list();
166: for (int i = 0; i < children.length; i++) {
167: fileList.addAll(find_internal(new File(file
168: .getAbsolutePath(), children[i]), pattern));
169: }
170: } else if (file.isFile() && containsPattern(file, pattern)) {
171: fileList.add(file);
172: }
173: return fileList;
174: }
175:
176: /**
177: * Find all files below the given file which contain the given search string.
178: *
179: * @param file the where to start the search
180: * @param searchString the string to search for.
181: *
182: * @return an array of files which contain the search string.
183: *
184: * @throws IOException if any of the files could not be opened.
185: */
186: public static File[] find(File file, String searchString)
187: throws IOException {
188: Pattern pattern = Pattern.compile(searchString);
189: List fileList = find_internal(file, pattern);
190: return (File[]) fileList.toArray(new File[fileList.size()]);
191: }
192: }
|