001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: *
017: */
018:
019: package org.apache.jmeter.protocol.http.util.accesslog;
020:
021: import java.io.BufferedReader;
022: import java.io.File;
023: import java.io.FileReader;
024: import java.io.IOException;
025: import java.io.UnsupportedEncodingException;
026: import java.net.URLDecoder;
027: import java.util.StringTokenizer;
028: import java.util.Vector;
029:
030: import org.apache.jmeter.protocol.http.sampler.HTTPSamplerBase;
031: import org.apache.jmeter.testelement.TestElement;
032: import org.apache.jorphan.logging.LoggingManager;
033: import org.apache.log.Logger;
034:
035: // For JUnit tests, @see TestTCLogParser
036:
037: /**
038: * Description:<br>
039: * <br>
040: * Currently the parser only handles GET/POST requests. It's easy enough to add
041: * support for other request methods by changing checkMethod. The is a complete
042: * rewrite of a tool I wrote for myself earlier. The older algorithm was basic
043: * and did not provide the same level of flexibility I want, so I wrote a new
044: * one using a totally new algorithm. This implementation reads one line at a
045: * time using BufferedReader. When it gets to the end of the file and the
046: * sampler needs to get more requests, the parser will re-initialize the
047: * BufferedReader. The implementation uses StringTokenizer to create tokens.
048: * <p>
049: * The parse algorithm is the following:
050: * <p>
051: * <ol>
052: * <li> cleans the entry by looking for backslash "\"
053: * <li> looks to see if GET or POST is in the line
054: * <li> tokenizes using quotes "
055: * <li> finds the token with the request method
056: * <li> gets the string of the token and tokenizes it using space
057: * <li> finds the first token beginning with slash character
058: * <li> tokenizes the string using question mark "?"
059: * <li> get the path from the first token
060: * <li> returns the second token and checks it for parameters
061: * <li> tokenizes the string using ampersand "&"
062: * <li> parses each token to name/value pairs
063: * </ol>
064: * <p>
065: * Extending this class is fairly simple. Most access logs use the same format
066: * starting from the request method. Therefore, changing the implementation of
067: * cleanURL(string) method should be sufficient to support new log formats.
068: * Tomcat uses common log format, so any webserver that uses the format should
069: * work with this parser. Servers that are known to use non standard formats are
070: * IIS and Netscape.
071: * <p>
072: *
073: */
074:
075: public class TCLogParser implements LogParser {
076: static Logger log = LoggingManager.getLoggerForClass();
077:
078: public static final String GET = "GET";
079:
080: public static final String POST = "POST";
081:
082: /** protected members * */
083: protected String RMETHOD = null;
084:
085: /**
086: * The path to the access log file
087: */
088: protected String URL_PATH = null;
089:
090: protected boolean useFILE = true;
091:
092: protected File SOURCE = null;
093:
094: protected String FILENAME = null;
095:
096: protected BufferedReader READER = null;
097:
098: /**
099: * Handles to supporting classes
100: */
101: protected Filter FILTER = null;
102:
103: /**
104: * by default, we probably should decode the parameter values
105: */
106: protected boolean decode = true;
107:
108: // TODO downcase UPPER case non-final variables
109:
110: /**
111: *
112: */
113: public TCLogParser() {
114: super ();
115: }
116:
117: /**
118: * @param source
119: */
120: public TCLogParser(String source) {
121: setSourceFile(source);
122: }
123:
124: /**
125: * by default decode is set to true. if the parameters shouldn't be
126: * decoded, call the method with false
127: * @param decodeparams
128: */
129: public void setDecodeParameterValues(boolean decodeparams) {
130: this .decode = decodeparams;
131: }
132:
133: /**
134: * decode the parameter values is to true by default
135: * @return if paramter values should be decoded
136: */
137: public boolean decodeParameterValue() {
138: return this .decode;
139: }
140:
141: /**
142: * Calls this method to set whether or not to use the path in the log. We
143: * may want to provide the ability to filter the log file later on. By
144: * default, the parser uses the file in the log.
145: *
146: * @param file
147: */
148: public void setUseParsedFile(boolean file) {
149: this .useFILE = file;
150: }
151:
152: /**
153: * Use the filter to include/exclude files in the access logs. This is
154: * provided as a convienance and reduce the need to spend hours cleaning up
155: * log files.
156: *
157: * @param filter
158: */
159: public void setFilter(Filter filter) {
160: FILTER = filter;
161: }
162:
163: /**
164: * Sets the source file.
165: *
166: * @param source
167: */
168: public void setSourceFile(String source) {
169: this .FILENAME = source;
170: }
171:
172: /**
173: * Creates a new File object.
174: *
175: * @param filename
176: */
177: public File openFile(String filename) {
178: return new File(filename);
179: }
180:
181: /**
182: * parse the entire file.
183: *
184: * @return boolean success/failure
185: */
186: public int parse(TestElement el, int parseCount) {
187: if (this .SOURCE == null) {
188: this .SOURCE = this .openFile(this .FILENAME);
189: }
190: try {
191: if (this .READER == null) {
192: this .READER = new BufferedReader(new FileReader(
193: this .SOURCE));
194: }
195: return parse(this .READER, el, parseCount);
196: } catch (Exception exception) {
197: log.error("Problem creating samples", exception);
198: }
199: return -1;// indicate that an error occured
200: }
201:
202: /**
203: * parse a set number of lines from the access log. Keep in mind the number
204: * of lines parsed will depend the filter and number of lines in the log.
205: * The method returns the actual lines parsed.
206: *
207: * @param count
208: * @return lines parsed
209: */
210: public int parseAndConfigure(int count, TestElement el) {
211: return this .parse(el, count);
212: }
213:
214: /**
215: * The method is responsible for reading each line, and breaking out of the
216: * while loop if a set number of lines is given.
217: *
218: * @param breader
219: */
220: protected int parse(BufferedReader breader, TestElement el,
221: int parseCount) {
222: int actualCount = 0;
223: String line = null;
224: try {
225: // read one line at a time using
226: // BufferedReader
227: line = breader.readLine();
228: while (line != null) {
229: if (line.length() > 0) {
230: actualCount += this .parseLine(line, el);
231: }
232: // we check the count to see if we have exceeded
233: // the number of lines to parse. There's no way
234: // to know where to stop in the file. Therefore
235: // we use break to escape the while loop when
236: // we've reached the count.
237: if (parseCount != -1 && actualCount >= parseCount) {
238: break;
239: }
240: line = breader.readLine();
241: }
242: if (line == null) {
243: breader.close();
244: breader = null;
245: this .READER = null;
246: // this.READER = new BufferedReader(new
247: // FileReader(this.SOURCE));
248: // parse(this.READER,el);
249: }
250: } catch (IOException ioe) {
251: log.error("Error reading log file", ioe);
252: }
253: return actualCount;
254: }
255:
256: /**
257: * parseLine calls the other parse methods to parse the given text.
258: *
259: * @param line
260: */
261: protected int parseLine(String line, TestElement el) {
262: int count = 0;
263: // we clean the line to get
264: // rid of extra stuff
265: String cleanedLine = this .cleanURL(line);
266: log.debug("parsing line: " + line);
267: // now we set request method
268: el.setProperty(HTTPSamplerBase.METHOD, RMETHOD);
269: if (FILTER != null) {
270: log.debug("filter is not null");
271: if (!FILTER.isFiltered(line, el)) {
272: log.debug("line was not filtered");
273: // increment the current count
274: count++;
275: // we filter the line first, before we try
276: // to separate the URL into file and
277: // parameters.
278: line = FILTER.filter(cleanedLine);
279: if (line != null) {
280: createUrl(cleanedLine, el);
281: }
282: } else {
283: log.debug("Line was filtered");
284: }
285: } else {
286: log.debug("filter was null");
287: // increment the current count
288: count++;
289: // in the case when the filter is not set, we
290: // parse all the lines
291: createUrl(cleanedLine, el);
292: }
293: return count;
294: }
295:
296: /**
297: * @param line
298: */
299: private void createUrl(String line, TestElement el) {
300: String paramString = null;
301: // check the URL for "?" symbol
302: paramString = this .stripFile(line, el);
303: if (paramString != null) {
304: this .checkParamFormat(line);
305: // now that we have stripped the file, we can parse the parameters
306: this .convertStringToJMRequest(paramString, el);
307: }
308: }
309:
310: /**
311: * The method cleans the URL using the following algorithm.
312: * <ol>
313: * <li> check for double quotes
314: * <li> check the request method
315: * <li> tokenize using double quotes
316: * <li> find first token containing request method
317: * <li> tokenize string using space
318: * <li> find first token that begins with "/"
319: * </ol>
320: * Example Tomcat log entry:
321: * <p>
322: * 127.0.0.1 - - [08/Jan/2003:07:03:54 -0500] "GET /addrbook/ HTTP/1.1" 200
323: * 1981
324: * <p>
325: *
326: * @param entry
327: * @return cleaned url
328: */
329: public String cleanURL(String entry) {
330: String url = entry;
331: // if the string contains atleast one double
332: // quote and checkMethod is true, go ahead
333: // and tokenize the string.
334: if (entry.indexOf("\"") > -1 && checkMethod(entry)) {
335: StringTokenizer tokens = null;
336: // we tokenize using double quotes. this means
337: // for tomcat we should have 3 tokens if there
338: // isn't any additional information in the logs
339: tokens = this .tokenize(entry, "\"");
340: while (tokens.hasMoreTokens()) {
341: String toke = tokens.nextToken();
342: // if checkMethod on the token is true
343: // we tokenzie it using space and escape
344: // the while loop. Only the first matching
345: // token will be used
346: if (checkMethod(toke)) {
347: StringTokenizer token2 = this .tokenize(toke, " ");
348: while (token2.hasMoreTokens()) {
349: String t = (String) token2.nextElement();
350: if (t.equalsIgnoreCase(GET)) {
351: RMETHOD = GET;
352: } else if (t.equalsIgnoreCase(POST)) {
353: RMETHOD = POST;
354: }
355: // there should only be one token
356: // that starts with slash character
357: if (t.startsWith("/")) {
358: url = t;
359: break;
360: }
361: }
362: break;
363: }
364: }
365: return url;
366: } else {
367: // we return the original string
368: return url;
369: }
370: }
371:
372: /**
373: * The method checks for POST and GET methods currently. The other methods
374: * aren't supported yet.
375: *
376: * @param text
377: * @return if method is supported
378: */
379: public boolean checkMethod(String text) {
380: if (text.indexOf("GET") > -1) {
381: this .RMETHOD = GET;
382: return true;
383: } else if (text.indexOf("POST") > -1) {
384: this .RMETHOD = POST;
385: return true;
386: } else {
387: return false;
388: }
389: }
390:
391: /**
392: * Tokenize the URL into two tokens. If the URL has more than one "?", the
393: * parse may fail. Only the first two tokens are used. The first token is
394: * automatically parsed and set at URL_PATH.
395: *
396: * @param url
397: * @return String parameters
398: */
399: public String stripFile(String url, TestElement el) {
400: if (url.indexOf("?") > -1) {
401: StringTokenizer tokens = this .tokenize(url, "?");
402: this .URL_PATH = tokens.nextToken();
403: el.setProperty(HTTPSamplerBase.PATH, URL_PATH);
404: return tokens.hasMoreTokens() ? tokens.nextToken() : null;
405: } else {
406: el.setProperty(HTTPSamplerBase.PATH, url);
407: return null;
408: }
409: }
410:
411: /**
412: * Checks the string to make sure it has /path/file?name=value format. If
413: * the string doesn't have "?", it will return false.
414: *
415: * @param url
416: * @return boolean
417: */
418: public boolean checkURL(String url) {
419: if (url.indexOf("?") > -1) {
420: return true;
421: } else {
422: return false;
423: }
424: }
425:
426: /**
427: * Checks the string to see if it contains "&" and "=". If it does, return
428: * true, so that it can be parsed.
429: *
430: * @param text
431: * @return boolean
432: */
433: public boolean checkParamFormat(String text) {
434: if (text.indexOf("&") > -1 && text.indexOf("=") > -1) {
435: return true;
436: } else {
437: return false;
438: }
439: }
440:
441: /**
442: * Convert a single line into XML
443: *
444: * @param text
445: */
446: public void convertStringToJMRequest(String text, TestElement el) {
447: ((HTTPSamplerBase) el).parseArguments(text);
448: }
449:
450: /**
451: * Parse the string parameters into NVPair[] array. Once they are parsed, it
452: * is returned. The method uses parseOneParameter(string) to convert each
453: * pair.
454: *
455: * @param stringparams
456: */
457: public NVPair[] convertStringtoNVPair(String stringparams) {
458: Vector vparams = this .parseParameters(stringparams);
459: NVPair[] nvparams = new NVPair[vparams.size()];
460: // convert the Parameters
461: for (int idx = 0; idx < nvparams.length; idx++) {
462: nvparams[idx] = this .parseOneParameter((String) vparams
463: .get(idx));
464: }
465: return nvparams;
466: }
467:
468: /**
469: * Method expects name and value to be separated by an equal sign "=". The
470: * method uses StringTokenizer to make a NVPair object. If there happens to
471: * be more than one "=" sign, the others are ignored. The chance of a string
472: * containing more than one is unlikely and would not conform to HTTP spec.
473: * I should double check the protocol spec to make sure this is accurate.
474: *
475: * @param parameter
476: * to be parsed
477: * @return NVPair
478: */
479: protected NVPair parseOneParameter(String parameter) {
480: String name = null;
481: String value = null;
482: try {
483: StringTokenizer param = this .tokenize(parameter, "=");
484: name = param.nextToken();
485: value = param.nextToken();
486: } catch (Exception e) {
487: // do nothing. it's naive, but since
488: // the utility is meant to parse access
489: // logs the formatting should be correct
490: }
491: if (value == null) {
492: value = "";
493: } else {
494: if (decode) {
495: try {
496: value = URLDecoder.decode(value, "UTF-8");
497: } catch (UnsupportedEncodingException e) {
498: log.warn(e.getMessage());
499: }
500: }
501: }
502: return new NVPair(name.trim(), value.trim());
503: }
504:
505: /**
506: * Method uses StringTokenizer to convert the string into single pairs. The
507: * string should conform to HTTP protocol spec, which means the name/value
508: * pairs are separated by the ampersand symbol "&". Some one could write the
509: * querystrings by hand, but that would be round about and go against the
510: * purpose of this utility.
511: *
512: * @param parameters
513: * @return Vector
514: */
515: protected Vector parseParameters(String parameters) {
516: Vector parsedParams = new Vector();
517: StringTokenizer paramtokens = this .tokenize(parameters, "&");
518: while (paramtokens.hasMoreElements()) {
519: parsedParams.add(paramtokens.nextElement());
520: }
521: return parsedParams;
522: }
523:
524: /**
525: * Parses the line using java.util.StringTokenizer.
526: *
527: * @param line
528: * line to be parsed
529: * @param delim
530: * delimiter
531: * @return StringTokenizer
532: */
533: public StringTokenizer tokenize(String line, String delim) {
534: return new StringTokenizer(line, delim);
535: }
536:
537: public void close() {
538: try {
539: this .READER.close();
540: this .READER = null;
541: this .SOURCE = null;
542: } catch (IOException e) {
543: // do nothing
544: }
545: }
546: }
|