01: /* SubElement
02: *
03: * $Id: SubElement.java 4465 2006-08-08 18:25:42Z stack-sf $
04: *
05: * Created on July 26, 2006.
06: *
07: * Copyright (C) 2006 Internet Archive.
08: *
09: * This file is part of the Heritrix web crawler (crawler.archive.org).
10: *
11: * Heritrix is free software; you can redistribute it and/or modify
12: * it under the terms of the GNU Lesser Public License as published by
13: * the Free Software Foundation; either version 2.1 of the License, or
14: * any later version.
15: *
16: * Heritrix is distributed in the hope that it will be useful,
17: * but WITHOUT ANY WARRANTY; without even the implied warranty of
18: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: * GNU Lesser Public License for more details.
20: *
21: * You should have received a copy of the GNU Lesser Public License
22: * along with Heritrix; if not, write to the Free Software
23: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: */
25: package org.archive.util.anvl;
26:
27: /**
28: * Abstract ANVL 'data element' sub-part.
29: * Subclass to make a Comment, a Label, or a Value.
30: * @author stack
31: */
32: abstract class SubElement {
33: private final String e;
34:
35: protected SubElement() {
36: this (null);
37: }
38:
39: public SubElement(final String s) {
40: this .e = baseCheck(s);
41: }
42:
43: protected String baseCheck(final String s) {
44: // Check for null.
45: if (s == null || s.length() <= 0) {
46: throw new IllegalArgumentException("Can't be null or empty");
47: }
48: // Check for CRLF.
49: for (int i = 0; i < s.length(); i++) {
50: checkCharacter(s.charAt(i), s, i);
51: }
52: return s;
53: }
54:
55: protected void checkCharacter(final char c, final String srcStr,
56: final int index) {
57: checkControlCharacter(c, srcStr, index);
58: checkCRLF(c, srcStr, index);
59: }
60:
61: protected void checkControlCharacter(final char c,
62: final String srcStr, final int index) {
63: if (Character.isISOControl(c) && !Character.isWhitespace(c)
64: || !Character.isValidCodePoint(c)) {
65: throw new IllegalArgumentException(
66: srcStr
67: + " contains a control character(s) or invalid code point: 0x"
68: + Integer.toHexString(c));
69: }
70: }
71:
72: protected void checkCRLF(final char c, final String srcStr,
73: final int index) {
74: if (ANVLRecord.isCROrLF(c)) {
75: throw new IllegalArgumentException(
76: srcStr
77: + " contains disallowed CRLF control character(s): 0x"
78: + Integer.toHexString(c));
79: }
80: }
81:
82: @Override
83: public String toString() {
84: return e;
85: }
86: }
|