Source Code Cross Referenced for Blacklist.java in  » Blogger-System » apache-roller-3.1 » org » apache » roller » util » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Blogger System » apache roller 3.1 » org.apache.roller.util 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         * Licensed to the Apache Software Foundation (ASF) under one or more
003:         *  contributor license agreements.  The ASF licenses this file to You
004:         * under the Apache License, Version 2.0 (the "License"); you may not
005:         * use this file except in compliance with the License.
006:         * You may obtain a copy of the License at
007:         *
008:         *     http://www.apache.org/licenses/LICENSE-2.0
009:         *
010:         * Unless required by applicable law or agreed to in writing, software
011:         * distributed under the License is distributed on an "AS IS" BASIS,
012:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013:         * See the License for the specific language governing permissions and
014:         * limitations under the License.  For additional information regarding
015:         * copyright in this work, please see the NOTICE file in the top level
016:         * directory of this distribution.
017:         */
018:        /* Created on Nov 11, 2003 */
019:        package org.apache.roller.util;
020:
021:        import org.apache.commons.logging.Log;
022:        import org.apache.commons.logging.LogFactory;
023:        import java.io.BufferedReader;
024:        import java.io.FileInputStream;
025:        import java.io.IOException;
026:        import java.io.InputStream;
027:        import java.io.InputStreamReader;
028:        import java.io.File;
029:        import java.io.FileOutputStream;
030:        import java.net.HttpURLConnection;
031:        import java.net.URL;
032:        import java.text.ParseException;
033:        import java.text.SimpleDateFormat;
034:        import java.util.ArrayList;
035:        import java.util.Date;
036:        import java.util.Iterator;
037:        import java.util.LinkedList;
038:        import java.util.List;
039:        import java.util.StringTokenizer;
040:        import java.util.regex.Matcher;
041:        import java.util.regex.Pattern;
042:        import org.apache.roller.config.RollerConfig;
043:        import org.apache.commons.lang.StringUtils;
044:
045:        /**
046:         * Loads MT-Blacklist style blacklist from disk and allows callers to test
047:         * strings against the blacklist and (optionally) addition blacklists.
048:         * <br />
049:         * First looks for blacklist.txt in uploads directory, than in classpath 
050:         * as /blacklist.txt. Download from web feature disabed.
051:         * <br />
052:         * Blacklist is formatted one entry per line. 
053:         * Any line that begins with # is considered to be a comment. 
054:         * Any line that begins with ( is considered to be a regex expression. 
055:         * <br />
056:         * For more information on the (discontinued) MT-Blacklist service:
057:         * http://www.jayallen.org/projects/mt-blacklist. 
058:         *
059:         * @author Lance Lavandowska
060:         * @author Allen Gilliland
061:         */
062:        public class Blacklist {
063:
064:            private static Log mLogger = LogFactory.getLog(Blacklist.class);
065:
066:            private static Blacklist blacklist;
067:            private static final String blacklistFile = "blacklist.txt";
068:            private static final String lastUpdateStr = "Last update:";
069:
070:            /** We no longer have a blacklist update URL */
071:            private static final String blacklistURL = null;
072:
073:            private Date lastModified = null;
074:            private List blacklistStr = new LinkedList();
075:            private List blacklistRegex = new LinkedList();
076:
077:            // setup our singleton at class loading time
078:            static {
079:                mLogger.info("Initializing MT Blacklist");
080:                blacklist = new Blacklist();
081:                blacklist.loadBlacklistFromFile(null);
082:            }
083:
084:            /** Hide constructor */
085:            private Blacklist() {
086:            }
087:
088:            /** Singleton factory method. */
089:            public static Blacklist getBlacklist() {
090:                return blacklist;
091:            }
092:
093:            /** Updated MT blacklist if necessary. */
094:            public static void checkForUpdate() {
095:                getBlacklist().update();
096:            }
097:
098:            /** Non-Static update method. */
099:            public void update() {
100:                if (this .blacklistURL != null) {
101:                    boolean blacklist_updated = this .downloadBlacklist();
102:                    if (blacklist_updated) {
103:                        this .loadBlacklistFromFile(null);
104:                    }
105:                }
106:            }
107:
108:            /** Download the MT blacklist from the web to our uploads directory. */
109:            private boolean downloadBlacklist() {
110:
111:                boolean blacklist_updated = false;
112:                try {
113:                    mLogger.debug("Attempting to download MT blacklist");
114:
115:                    URL url = new URL(blacklistURL);
116:                    HttpURLConnection connection = (HttpURLConnection) url
117:                            .openConnection();
118:
119:                    // after spending way too much time debugging i've discovered
120:                    // that the blacklist server is selective based on the User-Agent
121:                    // header.  without this header set i always get a 403 response :(
122:                    connection.setRequestProperty("User-Agent", "Mozilla/5.0");
123:
124:                    if (this .lastModified != null) {
125:                        connection.setRequestProperty("If-Modified-Since",
126:                                DateUtil.formatRfc822(this .lastModified));
127:                    }
128:
129:                    int responseCode = connection.getResponseCode();
130:
131:                    mLogger.debug("HttpConnection response = " + responseCode);
132:
133:                    // did the connection return NotModified? If so, no need to parse
134:                    if (responseCode == HttpURLConnection.HTTP_NOT_MODIFIED) {
135:                        mLogger.debug("MT blacklist site says we are current");
136:                        return false;
137:                    }
138:
139:                    // did the connection return a LastModified header?
140:                    long lastModifiedLong = connection.getHeaderFieldDate(
141:                            "Last-Modified", -1);
142:
143:                    // if the file is newer than our current then we need do update it
144:                    if (responseCode == HttpURLConnection.HTTP_OK
145:                            && (this .lastModified == null || this .lastModified
146:                                    .getTime() < lastModifiedLong)) {
147:
148:                        mLogger.debug("my last modified = "
149:                                + this .lastModified.getTime());
150:                        mLogger.debug("MT last modified = " + lastModifiedLong);
151:
152:                        // save the new blacklist
153:                        InputStream instream = connection.getInputStream();
154:
155:                        String uploadDir = RollerConfig
156:                                .getProperty("uploads.dir");
157:                        String path = uploadDir + File.separator
158:                                + blacklistFile;
159:                        FileOutputStream outstream = new FileOutputStream(path);
160:
161:                        mLogger
162:                                .debug("writing updated MT blacklist to "
163:                                        + path);
164:
165:                        // read from url and write to file
166:                        byte[] buf = new byte[4096];
167:                        int length = 0;
168:                        while ((length = instream.read(buf)) > 0)
169:                            outstream.write(buf, 0, length);
170:
171:                        outstream.close();
172:                        instream.close();
173:
174:                        blacklist_updated = true;
175:
176:                        mLogger.debug("MT blacklist download completed.");
177:
178:                    } else {
179:                        mLogger
180:                                .debug("blacklist *NOT* saved, assuming we are current");
181:                    }
182:
183:                } catch (Exception e) {
184:                    mLogger.error("error downloading blacklist", e);
185:                }
186:
187:                return blacklist_updated;
188:            }
189:
190:            /**
191:             * Load the MT blacklist from the file system.
192:             * We look for a previously downloaded version of the blacklist first and
193:             * if it's not found then we load the default blacklist packed with Roller.
194:             * Only public for purposes of unit testing.
195:             */
196:            public void loadBlacklistFromFile(String blacklistFilePath) {
197:
198:                InputStream txtStream = null;
199:                try {
200:                    String path = blacklistFilePath;
201:                    if (path == null) {
202:                        String uploadDir = RollerConfig
203:                                .getProperty("uploads.dir");
204:                        path = uploadDir + File.separator + blacklistFile;
205:                    }
206:                    File blacklistFile = new File(path);
207:
208:                    // check our lastModified date to see if we need to re-read the file
209:                    if (this .lastModified != null
210:                            && this .lastModified.getTime() >= blacklistFile
211:                                    .lastModified()) {
212:                        mLogger
213:                                .debug("Blacklist is current, no need to load again");
214:                        return;
215:                    } else {
216:                        this .lastModified = new Date(blacklistFile
217:                                .lastModified());
218:                    }
219:                    txtStream = new FileInputStream(blacklistFile);
220:                    mLogger.info("Loading blacklist from " + path);
221:
222:                } catch (Exception e) {
223:                    // Roller keeps a copy in the webapp just in case
224:                    txtStream = getClass().getResourceAsStream(
225:                            "/" + blacklistFile);
226:                    mLogger.warn("Couldn't find downloaded blacklist, "
227:                            + "loading from classpath instead");
228:                }
229:
230:                if (txtStream != null) {
231:                    readFromStream(txtStream, false);
232:                } else {
233:                    mLogger
234:                            .error("Couldn't load a blacklist file from anywhere, "
235:                                    + "this means blacklist checking is disabled for now.");
236:                }
237:                mLogger.info("Number of blacklist string rules: "
238:                        + blacklistStr.size());
239:                mLogger.info("Number of blacklist regex rules: "
240:                        + blacklistRegex.size());
241:            }
242:
243:            /**
244:             * Read in the InputStream for rules.
245:             * @param txtStream
246:             */
247:            private String readFromStream(InputStream txtStream,
248:                    boolean saveStream) {
249:                String line;
250:                StringBuffer buf = new StringBuffer();
251:                BufferedReader in = null;
252:                try {
253:                    in = new BufferedReader(new InputStreamReader(txtStream,
254:                            "UTF-8"));
255:                    while ((line = in.readLine()) != null) {
256:                        if (line.startsWith("#")) {
257:                            readComment(line);
258:                        } else {
259:                            readRule(line);
260:                        }
261:
262:                        if (saveStream)
263:                            buf.append(line).append("\n");
264:                    }
265:                } catch (Exception e) {
266:                    mLogger.error(e);
267:                } finally {
268:                    try {
269:                        if (in != null)
270:                            in.close();
271:                    } catch (IOException e1) {
272:                        mLogger.error(e1);
273:                    }
274:                }
275:                return buf.toString();
276:            }
277:
278:            private void readRule(String str) {
279:                if (StringUtils.isEmpty(str))
280:                    return; // bad condition
281:
282:                String rule = str.trim();
283:
284:                if (str.indexOf("#") > 0) // line has a comment
285:                {
286:                    int commentLoc = str.indexOf("#");
287:                    rule = str.substring(0, commentLoc - 1).trim(); // strip comment
288:                }
289:
290:                if (rule.indexOf("(") > -1) // regex rule
291:                {
292:                    // pre-compile patterns since they will be frequently used
293:                    blacklistRegex.add(Pattern.compile(rule));
294:                } else if (StringUtils.isNotEmpty(rule)) {
295:                    blacklistStr.add(rule);
296:                }
297:            }
298:
299:            /** Read comment and try to parse out "Last update" value */
300:            private void readComment(String str) {
301:                int lastUpdatePos = str.indexOf(lastUpdateStr);
302:                if (lastUpdatePos > -1) {
303:                    str = str.substring(lastUpdatePos + lastUpdateStr.length());
304:                    str = str.trim();
305:                    try {
306:                        SimpleDateFormat sdf = new SimpleDateFormat(
307:                                "yyyy/MM/dd HH:mm:ss");
308:                        lastModified = DateUtil.parse(str, sdf);
309:                    } catch (ParseException e) {
310:                        mLogger.debug("ParseException reading " + str);
311:                    }
312:                }
313:            }
314:
315:            /** 
316:             * Does the String argument match any of the rules in the built-in blacklist? 
317:             */
318:            public boolean isBlacklisted(String str) {
319:                return isBlacklisted(str, null, null);
320:            }
321:
322:            /** 
323:             * Does the String argument match any of the rules in the built-in blacklist
324:             * plus additional blacklists provided by caller?
325:             * @param str             String to be checked against blacklist
326:             * @param moreStringRules Additional string rules to consider
327:             * @param moreRegexRules  Additional regex rules to consider 
328:             */
329:            public boolean isBlacklisted(String str, List moreStringRules,
330:                    List moreRegexRules) {
331:                if (str == null || StringUtils.isEmpty(str))
332:                    return false;
333:
334:                // First iterate over blacklist, doing indexOf.
335:                // Then iterate over blacklistRegex and test.
336:                // As soon as there is a hit in either case return true
337:
338:                // test plain String.indexOf
339:                List stringRules = blacklistStr;
340:                if (moreStringRules != null && moreStringRules.size() > 0) {
341:                    stringRules = new ArrayList();
342:                    stringRules.addAll(moreStringRules);
343:                    stringRules.addAll(blacklistStr);
344:                }
345:                if (testStringRules(str, stringRules))
346:                    return true;
347:
348:                // test regex blacklisted
349:                List regexRules = blacklistRegex;
350:                if (moreRegexRules != null && moreRegexRules.size() > 0) {
351:                    regexRules = new ArrayList();
352:                    regexRules.addAll(moreRegexRules);
353:                    regexRules.addAll(blacklistRegex);
354:                }
355:                return testRegExRules(str, regexRules);
356:            }
357:
358:            /** 
359:             * Test string only against rules provided by caller, NOT against built-in blacklist.
360:             * @param str             String to be checked against rules
361:             * @param moreStringRules String rules to consider
362:             * @param moreRegexRules  Regex rules to consider 
363:             */
364:            public static boolean matchesRulesOnly(String str,
365:                    List stringRules, List regexRules) {
366:                if (testStringRules(str, stringRules))
367:                    return true;
368:                return testRegExRules(str, regexRules);
369:            }
370:
371:            /** Test String against the RegularExpression rules. */
372:            private static boolean testRegExRules(String str, List regexRules) {
373:                boolean hit = false;
374:                Pattern testPattern = null;
375:                Iterator iter = regexRules.iterator();
376:                while (iter.hasNext()) {
377:                    testPattern = (Pattern) iter.next();
378:
379:                    // want to see what it is matching on, but only in debug mode
380:                    if (mLogger.isDebugEnabled()) {
381:                        Matcher matcher = testPattern.matcher(str);
382:                        if (matcher.find()) {
383:                            mLogger.debug(matcher.group() + " matched by "
384:                                    + testPattern.pattern());
385:                            return true;
386:                        }
387:                    } else {
388:                        if (testPattern.matcher(str).find()) {
389:                            return true;
390:                        }
391:                    }
392:                }
393:                return hit;
394:            }
395:
396:            /** Test the String against the String rules, using simple indexOf. */
397:            private static boolean testStringRules(String str, List stringRules) {
398:                String test;
399:                Iterator iter = stringRules.iterator();
400:                boolean hit = false;
401:                while (iter.hasNext()) {
402:                    test = (String) iter.next();
403:                    if (str.indexOf(test) > -1) {
404:                        // want to see what it is matching on, but only in debug mode
405:                        if (mLogger.isDebugEnabled()) {
406:                            mLogger.debug("matched:" + test + ":");
407:                        }
408:                        return true;
409:                    }
410:                }
411:                return hit;
412:            }
413:
414:            /** Utility method to populate lists based a blacklist in string form */
415:            public static void populateSpamRules(String blacklist,
416:                    List stringRules, List regexRules, String addendum) {
417:                String weblogWords = blacklist;
418:                weblogWords = (weblogWords == null) ? "" : weblogWords;
419:                String siteWords = (addendum != null) ? addendum : "";
420:                StringTokenizer toker = new StringTokenizer(siteWords
421:                        + weblogWords, "\n");
422:                while (toker.hasMoreTokens()) {
423:                    String token = toker.nextToken().trim();
424:                    if (token.startsWith("#"))
425:                        continue;
426:                    if (token.startsWith("(")) {
427:                        regexRules.add(Pattern.compile(token));
428:                    } else {
429:                        stringRules.add(token);
430:                    }
431:                }
432:            }
433:
434:            /** Return pretty list of String and RegEx rules. */
435:            public String toString() {
436:                StringBuffer buf = new StringBuffer("blacklist ");
437:                buf.append(blacklistStr).append("\n");
438:                buf.append("Regex blacklist ").append(blacklistRegex);
439:                return buf.toString();
440:            }
441:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.