Source Code Cross Referenced for HttpDoc.java in » Web-Crawler » JoBo » net » matuschek » http » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation

1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI

Java

Java Tutorial

Illustrator Tutorials

GIMP Tutorials

C# / C Sharp

C# / CSharp Tutorial

C# / CSharp Open Source

SQL Server / T-SQL Tutorial

Oracle PL / SQL

Oracle PL/SQL Tutorial

Flash / Flex / ActionScript

VBA / Excel / Access / Word

XML

XML Tutorial

Microsoft Office PowerPoint 2007 Tutorial

Microsoft Office Excel 2007 Tutorial

Microsoft Office Word 2007 Tutorial

Java Source Code / Java Documentation » Web Crawler » JoBo » net.matuschek.http

Source Cross Referenced Class Diagram Java Document (Java Doc)

001:        package net.matuschek.http;
002:
003:        import java.net.URL;
004:        import java.util.Date;
005:        import java.util.Iterator;
006:        import java.util.List;
007:        import java.util.StringTokenizer;
008:        import java.util.Vector;
009:        import net.matuschek.util.MD5;
010:
011:        /*********************************************
012:         Copyright (c) 2001 by Daniel Matuschek
013:         *******************************************/
014:
015:        /**
016:         * A HTTP document. It consists of the contents and HTTP headers.
017:         *
018:         * @author Daniel Matuschek (daniel@matuschek.net)
019:         * @author ptah
020:         * @version $Id: HttpDoc.java,v 1.11 2004/08/09 17:36:49 matuschd Exp $
021:         */
022:        public class HttpDoc {
023:            /** The content */
024:            private byte[] content;
025:
026:            /**
027:             * The HTTP header lines
028:             *
029:             * @link aggregation
030:             * @associates <{HttpHeader}>
031:             */
032:            private Vector<HttpHeader> httpHeader;
033:
034:            /** place to store links of the document if necessary */
035:            private List links;
036:
037:            private int httpReturnCode = 0;
038:            private URL url;
039:
040:            /** flag that indicates if this document is retrieved from cache */
041:            private boolean cached = false;
042:
043:            private final static int HTTP_REDIRECTSTART = 300;
044:            private final static int HTTP_REDIRECTEND = 399;
045:
046:            /**
047:             * Default constructor, initializes a new HttpDoc with
048:             * empty headers and no content
049:             */
050:            public HttpDoc() {
051:                httpHeader = new Vector<HttpHeader>();
052:            }
053:
054:            /**
055:             * Gets the content of the document
056:             *
057:             * @return an array of bytes containing the document content. This
058:             * may represent text or binary data
059:             */
060:            public byte[] getContent() {
061:                return content;
062:            }
063:
064:            /**
065:             * Set the content of the document
066:             * 
067:             * @param content
068:             */
069:            public void setContent(byte[] content) {
070:                this .content = content;
071:                // existing MD5 keys become invalid
072:                removeHeader(HttpHeader.CONTENT_MD5);
073:            }
074:
075:            public void setHttpCode(String httpCode) {
076:                StringTokenizer st = new StringTokenizer(httpCode, " ");
077:                // an HTTP answer must have at least 2 fields
078:                if (st.countTokens() < 2) {
079:                    return;
080:                }
081:
082:                st.nextToken();
083:                String codeStr = st.nextToken();
084:
085:                try {
086:                    httpReturnCode = Integer.parseInt(codeStr);
087:                } catch (NumberFormatException e) {
088:                    // something is wrong !!!
089:                }
090:            }
091:
092:            public void setHttpCode(int code) {
093:                httpReturnCode = code;
094:            }
095:
096:            /**
097:             * Get the Http Return-Code
098:             *
099:             * @return Http Return-Code
100:             */
101:            public int getHttpCode() {
102:                return httpReturnCode;
103:            }
104:
105:            /**
106:             * Add another HTTP header
107:             *
108:             * @param header an HttpHeader object to add to the lis
109:             * of headers
110:             */
111:            public void addHeader(HttpHeader header) {
112:                httpHeader.add(header);
113:            }
114:
115:            /**
116:             * Get all HTTP header lines
117:             *
118:             * @return a Vector of HttpHeader objects
119:             */
120:            public Vector getHttpHeader() {
121:                return httpHeader;
122:            }
123:
124:            /**
125:             * Get the HTTP header with the given name
126:             * @param headerName
127:             *
128:             * @return a HttpHeader with the given name or null if not found
129:             */
130:            public HttpHeader getHttpHeader(String headerName) {
131:                for (Iterator iter = httpHeader.iterator(); iter.hasNext();) {
132:                    HttpHeader header = (HttpHeader) iter.next();
133:                    if (header.getName().equals(headerName)) {
134:                        return header;
135:                    }
136:                }
137:                return null;
138:            }
139:
140:            /**
141:             * Get the header value with the given name
142:             * @param headerName
143:             *
144:             * @return a HttpHeader.value with the given name or null if not found
145:             */
146:            public String getHeaderValue(String headerName) {
147:                HttpHeader header = getHeader(headerName);
148:                return header != null ? header.getValue() : null;
149:            }
150:
151:            /**
152:             * Set a HTTP header value with the given name (creates one if not found)
153:             * @param headerName
154:             * @param headerValue
155:             *
156:             * @return a HttpHeader.value with the given name or null if not found
157:             */
158:            public void setHeaderValue(String headerName, String headerValue) {
159:                HttpHeader header = getHeader(headerName);
160:                if (header == null) {
161:                    header = new HttpHeader(headerName, headerValue);
162:                    addHeader(header);
163:                } else {
164:                    header.setValue(headerValue);
165:                }
166:            }
167:
168:            /**
169:             * Get the content of the Location header. This header will
170:             * be used for REDIRECTs.
171:             * 
172:             * @return the value of the HTTP Location header.
173:             */
174:            public String getLocation() {
175:                HttpHeader location = getHeader(HttpHeader.LOCATION);
176:                if (location == null) {
177:                    return "";
178:                } else {
179:                    return location.getValue();
180:                }
181:            }
182:
183:            /**
184:             * Was it a redirect ?
185:             *
186:             * @return true if this document is a HTTP REDIRECT
187:             */
188:            public boolean isRedirect() {
189:                if ((httpReturnCode >= HTTP_REDIRECTSTART)
190:                        && (httpReturnCode <= HTTP_REDIRECTEND)) {
191:                    return true;
192:                } else {
193:                    return false;
194:                }
195:            }
196:
197:            /**
198:             * Was it a "normal" document ?
199:             */
200:            public boolean isOk() {
201:                return (httpReturnCode == HttpConstants.HTTP_OK);
202:            }
203:
204:            /**
205:             * Was it not modified ?
206:             */
207:            public boolean isNotModified() {
208:                return (getHttpCode() == HttpConstants.HTTP_NOTMODIFIED);
209:            }
210:
211:            /**
212:             * Was it not found ?
213:             */
214:            public boolean isNotFound() {
215:                return (httpReturnCode == HttpConstants.HTTP_NOTFOUND);
216:            }
217:
218:            /**
219:             * did we get "Authorization required"
220:             */
221:            public boolean isUnauthorized() {
222:                return (httpReturnCode == HttpConstants.HTTP_UNAUTHORIZED);
223:            }
224:
225:            /**
226:             * Gets the HttpHeader with the given name
227:             * 
228:             * @param headerName
229:             */
230:            public HttpHeader getHeader(String name) {
231:                for (int i = 0; i < httpHeader.size(); i++) {
232:                    HttpHeader h = (HttpHeader) httpHeader.elementAt(i);
233:                    if (name.equalsIgnoreCase(h.getName())) {
234:                        return h;
235:                    }
236:                }
237:                return null;
238:            }
239:
240:            /**
241:             * Removes the HttpHeader with the given name
242:             * 
243:             * @param headerName
244:             */
245:            public HttpHeader removeHeader(String name) {
246:                HttpHeader header = getHeader(name);
247:                if (header != null) {
248:                    httpHeader.remove(header);
249:                }
250:                return header;
251:            }
252:
253:            /**
254:             * Get all the HTTP headers. This function is useful if you
255:             * don't know what headers exists and you want to have ALL
256:             * headers
257:             * 
258:             * @return a Vector containing HttpHeader objects
259:             */
260:            public Vector getHttpHeaders() {
261:                return httpHeader;
262:            }
263:
264:            /**
265:             * is the content-type text/html ?
266:             * 
267:             * @return true if the HTTP Content-Type header has the
268:             * value text/html
269:             */
270:            public boolean isHTML() {
271:                HttpHeader ct = getHeader(HttpHeader.CONTENT_TYPE);
272:                if (ct == null) {
273:                    return false;
274:                } else {
275:                    if (ct.getValue().toLowerCase().startsWith("text/html")) {
276:                        return true;
277:                    }
278:                }
279:                return false;
280:            }
281:
282:            /**
283:             * is this a Javascript document ?
284:             *
285:             * @return true if the Content-Type is text/x-javascript
286:             */
287:            public boolean isJavaScript() {
288:                HttpHeader ct = getHeader(HttpHeader.CONTENT_TYPE);
289:                if (ct == null) {
290:                    return false;
291:                } else {
292:                    if (ct.getValue().equalsIgnoreCase("text/x-javascript")) {
293:                        return true;
294:                    }
295:                }
296:                return false;
297:            }
298:
299:            /**
300:             * Convert this object to a String.
301:             *
302:             * @return a String representation of this HttpDoc. Format
303:             * may change, therefore this should be used only for
304:             * logging or debugging
305:             */
306:            public String toString() {
307:                StringBuffer res = new StringBuffer();
308:
309:                res.append(url.toString() + "\n\n");
310:
311:                for (int i = 0; i < httpHeader.size(); i++) {
312:                    HttpHeader h = (HttpHeader) httpHeader.elementAt(i);
313:                    res.append(h.toString());
314:                    res.append("\n");
315:                }
316:                res.append("\n");
317:                if (content != null) {
318:                    res.append(new String(content));
319:                }
320:
321:                return res.toString();
322:            }
323:
324:            /**
325:             * Get the full URL where this document was retrieved from
326:             *
327:             * @return an URL object containing the location where this
328:             * document was retrieved from
329:             */
330:            public URL getURL() {
331:                return url;
332:            }
333:
334:            /**
335:             * Set the location where this  document was retrieved from
336:             *
337:             * @param url the original location of this document
338:             */
339:            public void setURL(URL url) {
340:                this .url = url;
341:            }
342:
343:            /**
344:             * Gets lastModified date as milliseconds.
345:             *
346:             * @return lastModified as milliseconds or -1 if not specified
347:             */
348:            public long getLastModifiedAsMilliSeconds() {
349:                String value = getHeaderValue(HttpHeader.LAST_MODIFIED);
350:                return value != null ? HTTPDateTool.parseDate(value) : -1;
351:            }
352:
353:            /**
354:             * Gets date as milliseconds.
355:             *
356:             * @return date as milliseconds or -1 if not specified
357:             */
358:            public long getDateAsMilliSeconds() {
359:                String value = getHeaderValue(HttpHeader.DATE);
360:                return value != null ? HTTPDateTool.parseDate(value) : -1;
361:            }
362:
363:            /**
364:             * Sets lastModified date in milliseconds.
365:             *
366:             * @param lastModified in milliseconds
367:             */
368:            public void setLastModified(long d) {
369:                String dateString = HTTPDateTool.rfc1123Format.format(new Date(
370:                        d));
371:                setHeaderValue(HttpHeader.LAST_MODIFIED, dateString);
372:            }
373:
374:            /**
375:             * Sets date in milliseconds.
376:             *
377:             * @param lastModified in milliseconds
378:             */
379:            public void setDate(long d) {
380:                String dateString = HTTPDateTool.rfc1123Format.format(new Date(
381:                        d));
382:                setHeaderValue(HttpHeader.DATE, dateString);
383:            }
384:
385:            /**
386:             * Calculates MD5 key for given content
387:             *
388:             * @param content
389:             * @return MD5 key for content
390:             */
391:            protected static String getContentMD5(byte[] content) {
392:                if ((content == null) || (content.length == 0)) {
393:                    return "00000000000000000000000000000000";
394:                }
395:                MD5 md5 = new MD5();
396:                md5.Update(content);
397:                return md5.asHex();
398:            }
399:
400:            /**
401:             * Gets MD5 key of document content.
402:             * A calculated key is stored as a header and reused 
403:             * in successive calls of this method.
404:             * 
405:             * @return MD5 key
406:             */
407:            public String getContentMD5() {
408:                HttpHeader md5Header = getHeader(HttpHeader.CONTENT_MD5);
409:                String md5;
410:                if (md5Header != null) {
411:                    md5 = md5Header.getValue();
412:                } else {
413:                    md5 = getContentMD5(getContent());
414:                    md5Header = new HttpHeader(HttpHeader.CONTENT_MD5, md5);
415:                    addHeader(md5Header);
416:                }
417:                return md5;
418:            }
419:
420:            /**
421:             * Set flag that indicates if this document is retrieved from cache
422:             * @param cached
423:             */
424:            public void setCached(boolean cached) {
425:                this .cached = cached;
426:            }
427:
428:            /**
429:             * Was this document retrieved from cache?
430:             * @return cached
431:             */
432:            public boolean isCached() {
433:                return cached;
434:            }
435:
436:            /**
437:             * Store calculated links of a HttpDoc.
438:             * @param links
439:             */
440:            public void setLinks(List links) {
441:                this .links = links;
442:            }
443:
444:            /**
445:             * Gets List of links (if set previously).
446:             * @return List
447:             */
448:            public List getLinks() {
449:                return links;
450:            }
451:
452:        }

www.java2java.com | Contact Us

All other trademarks are property of their respective owners.