Source Code Cross Referenced for Attributes.java in » HTML-Parser » jericho-html » au » id » jericho » lib » html » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » HTML Parser » jericho html » au.id.jericho.lib.html
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        // Jericho HTML Parser - Java based library for analysing and manipulating HTML
002:        // Version 2.5
003:        // Copyright (C) 2007 Martin Jericho
004:        // http://jerichohtml.sourceforge.net/
005:        //
006:        // This library is free software; you can redistribute it and/or
007:        // modify it under the terms of either one of the following licences:
008:        //
009:        // 1. The Eclipse Public License (EPL) version 1.0,
010:        // included in this distribution in the file licence-epl-1.0.html
011:        // or available at http://www.eclipse.org/legal/epl-v10.html
012:        //
013:        // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
014:        // included in this distribution in the file licence-lgpl-2.1.txt
015:        // or available at http://www.gnu.org/licenses/lgpl.txt
016:        //
017:        // This library is distributed on an "AS IS" basis,
018:        // WITHOUT WARRANTY OF ANY KIND, either express or implied.
019:        // See the individual licence texts for more details.
020:
021:        package au.id.jericho.lib.html;
022:
023:        import au.id.jericho.lib.html.nodoc.*;
024:        import java.util.*;
025:        import java.io.*;
026:
027:        /**
028:         * Represents the list of {@link Attribute} objects present within a particular {@link StartTag}.
029:         * <p>
030:         * This segment starts at the end of the start tag's {@linkplain StartTag#getName() name}
031:         * and ends at the end of the last attribute.
032:         * <p>
033:         * The attributes in this list are a representation of those found in the source document and are not modifiable.
034:         * The {@link OutputDocument#replace(Attributes, Map)} and {@link OutputDocument#replace(Attributes, boolean convertNamesToLowerCase)} methods
035:         * provide the means to add, delete or modify attributes and their values in an {@link OutputDocument}.
036:         * <p>
037:         * As of version 2.4, any {@linkplain TagType#isServerTag() server tags} encountered inside the attributes area of a non-server tag
038:         * no longer interfere with the parsing of the attributes.
039:         * <p>
040:         * If too many syntax errors are encountered while parsing a start tag's attributes, the parser rejects the entire start tag
041:         * and generates a {@linkplain Source#getLogger() log} entry.
042:         * The threshold for the number of errors allowed can be set using the {@link #setDefaultMaxErrorCount(int)} static method.
043:         * <p>
044:         * Obtained using the {@link StartTag#getAttributes()} method, or explicitly using the {@link Source#parseAttributes(int pos, int maxEnd)} method.
045:         * <p>
046:         * It is common for instances of this class to contain no attributes.
047:         * <p>
048:         * See also the XML 1.0 specification for <a target="_blank" href="http://www.w3.org/TR/REC-xml#dt-attr">attributes</a>.
049:         * <p>
050:         * Note that before version 2.0 the segment ended just before the tag's
051:         * {@linkplain StartTagType#getClosingDelimiter() closing delimiter} instead of at the end of the last attribute.
052:         *
053:         * @see StartTag
054:         * @see Attribute
055:         */
056:        public final class Attributes extends SequentialListSegment {
057:            private final LinkedList attributeList; // never null
058:
059:            // parsing states:
060:            private static final int AFTER_TAG_NAME = 0;
061:            private static final int BETWEEN_ATTRIBUTES = 1;
062:            private static final int IN_NAME = 2;
063:            private static final int AFTER_NAME = 3; // this only happens if an attribute name is followed by whitespace
064:            private static final int START_VALUE = 4;
065:            private static final int IN_VALUE = 5;
066:            private static final int AFTER_VALUE_FINAL_QUOTE = 6;
067:
068:            private static int defaultMaxErrorCount = 2; // defines maximum number of minor errors that can be encountered in attributes before entire start tag is rejected.
069:
070:            private Attributes(final Source source, final int begin,
071:                    final int end, final LinkedList attributeList) {
072:                super (source, begin, end);
073:                this .attributeList = attributeList;
074:            }
075:
076:            /** called from StartTagType.parseAttributes(Source, int startTagBegin, String tagName) */
077:            static Attributes construct(final Source source,
078:                    final int startTagBegin, final StartTagType startTagType,
079:                    final String tagName) {
080:                return construct(source, "StartTag", AFTER_TAG_NAME,
081:                        startTagBegin, -1, -1, startTagType, tagName,
082:                        defaultMaxErrorCount);
083:            }
084:
085:            /** called from StartTag.parseAttributes(int maxErrorCount) */
086:            static Attributes construct(final Source source,
087:                    final int startTagBegin, final int attributesBegin,
088:                    final int maxEnd, final StartTagType startTagType,
089:                    final String tagName, final int maxErrorCount) {
090:                return construct(source, "Attributes for StartTag",
091:                        BETWEEN_ATTRIBUTES, startTagBegin, attributesBegin,
092:                        maxEnd, startTagType, tagName, maxErrorCount);
093:            }
094:
095:            /** called from Source.parseAttributes(int pos, int maxEnd, int maxErrorCount) */
096:            static Attributes construct(final Source source, final int begin,
097:                    final int maxEnd, final int maxErrorCount) {
098:                return construct(source, "Attributes", BETWEEN_ATTRIBUTES,
099:                        begin, -1, maxEnd, StartTagType.NORMAL, null,
100:                        maxErrorCount);
101:            }
102:
103:            /**
104:             * Any &lt; character found within the start tag is treated as though it is part of the attribute
105:             * list, which is consistent with the way IE treats it.
106:             * @param logBegin  the position of the beginning of the object being searched (for logging)
107:             * @param attributesBegin  the position of the beginning of the attribute list, or -1 if it should be calculated automatically from logBegin.
108:             * @param maxEnd  the position at which the attributes must end if a terminating character is not found, or -1 if no maximum.
109:             * @param tagName  the name of the enclosing StartTag, or null if constucting attributes directly.
110:             */
111:            private static Attributes construct(final Source source,
112:                    final String logType, int state, final int logBegin,
113:                    int attributesBegin, final int maxEnd,
114:                    final StartTagType startTagType, final String tagName,
115:                    final int maxErrorCount) {
116:                boolean isClosingSlashIgnored = false;
117:                if (tagName != null) {
118:                    // 'logBegin' parameter is the start of the associated start tag
119:                    if (attributesBegin == -1)
120:                        attributesBegin = logBegin + 1 + tagName.length();
121:                    if (startTagType == StartTagType.NORMAL
122:                            && HTMLElements.isClosingSlashIgnored(tagName))
123:                        isClosingSlashIgnored = true;
124:                } else {
125:                    attributesBegin = logBegin;
126:                }
127:                int attributesEnd = attributesBegin;
128:                final LinkedList attributeList = new LinkedList();
129:                final ParseText parseText = source.getParseText();
130:                int i = attributesBegin;
131:                char quote = ' ';
132:                Segment nameSegment = null;
133:                String key = null;
134:                int currentBegin = -1;
135:                boolean isTerminatingCharacter = false;
136:                int errorCount = 0;
137:                try {
138:                    while (!isTerminatingCharacter) {
139:                        if (i == maxEnd
140:                                || startTagType.atEndOfAttributes(source, i,
141:                                        isClosingSlashIgnored))
142:                            isTerminatingCharacter = true;
143:                        final char ch = parseText.charAt(i);
144:                        // First check if there is a server tag in this position:
145:                        if (ch == '<') {
146:                            final Tag interlopingTag = source.getTagAt(i);
147:                            if (interlopingTag != null
148:                                    && interlopingTag.getTagType()
149:                                            .isServerTag()) {
150:                                // There is a server tag in this position. Skip over it:
151:                                i = interlopingTag.end;
152:                                if (state == START_VALUE)
153:                                    state = IN_VALUE;
154:                                continue;
155:                            }
156:                        }
157:                        // There is no server tag in this position. Now we can parse the attributes:
158:                        switch (state) {
159:                        case IN_VALUE:
160:                            if (isTerminatingCharacter || ch == quote
161:                                    || (quote == ' ' && isWhiteSpace(ch))) {
162:                                Segment valueSegment;
163:                                Segment valueSegmentIncludingQuotes;
164:                                if (quote == ' ') {
165:                                    valueSegment = valueSegmentIncludingQuotes = new Segment(
166:                                            source, currentBegin, i);
167:                                } else {
168:                                    if (isTerminatingCharacter) {
169:                                        if (i == maxEnd) {
170:                                            if (source.logger.isInfoEnabled())
171:                                                log(
172:                                                        source,
173:                                                        logType,
174:                                                        tagName,
175:                                                        logBegin,
176:                                                        "terminated in the middle of a quoted attribute value",
177:                                                        i);
178:                                            if (reachedMaxErrorCount(
179:                                                    ++errorCount, source,
180:                                                    logType, tagName, logBegin,
181:                                                    maxErrorCount))
182:                                                return null;
183:                                            valueSegment = new Segment(source,
184:                                                    currentBegin, i);
185:                                            valueSegmentIncludingQuotes = new Segment(
186:                                                    source, currentBegin - 1, i); // this is missing the end quote
187:                                        } else {
188:                                            // don't want to terminate, only encountered a terminating character in the middle of a quoted value
189:                                            isTerminatingCharacter = false;
190:                                            break;
191:                                        }
192:                                    } else {
193:                                        valueSegment = new Segment(source,
194:                                                currentBegin, i);
195:                                        valueSegmentIncludingQuotes = new Segment(
196:                                                source, currentBegin - 1, i + 1);
197:                                    }
198:                                }
199:                                attributeList.add(new Attribute(source, key,
200:                                        nameSegment, valueSegment,
201:                                        valueSegmentIncludingQuotes));
202:                                attributesEnd = valueSegmentIncludingQuotes
203:                                        .getEnd();
204:                                state = BETWEEN_ATTRIBUTES;
205:                            } else if (ch == '<' && quote == ' ') {
206:                                if (source.logger.isInfoEnabled())
207:                                    log(
208:                                            source,
209:                                            logType,
210:                                            tagName,
211:                                            logBegin,
212:                                            "rejected because of '<' character in unquoted attribute value",
213:                                            i);
214:                                return null;
215:                            }
216:                            break;
217:                        case IN_NAME:
218:                            if (isTerminatingCharacter || ch == '='
219:                                    || isWhiteSpace(ch)) {
220:                                nameSegment = new Segment(source, currentBegin,
221:                                        i);
222:                                key = nameSegment.toString().toLowerCase();
223:                                if (isTerminatingCharacter) {
224:                                    attributeList.add(new Attribute(source,
225:                                            key, nameSegment)); // attribute with no value
226:                                    attributesEnd = i;
227:                                } else {
228:                                    state = (ch == '=' ? START_VALUE
229:                                            : AFTER_NAME);
230:                                }
231:                            } else if (!Tag.isXMLNameChar(ch)) {
232:                                // invalid character detected in attribute name.
233:                                if (ch == '<') {
234:                                    if (source.logger.isInfoEnabled())
235:                                        log(
236:                                                source,
237:                                                logType,
238:                                                tagName,
239:                                                logBegin,
240:                                                "rejected because of '<' character in attribute name",
241:                                                i);
242:                                    return null;
243:                                }
244:                                if (isInvalidEmptyElementTag(startTagType,
245:                                        source, i, logType, tagName, logBegin))
246:                                    break;
247:                                if (source.logger.isInfoEnabled())
248:                                    log(
249:                                            source,
250:                                            logType,
251:                                            tagName,
252:                                            logBegin,
253:                                            "contains attribute name with invalid character",
254:                                            i);
255:                                if (reachedMaxErrorCount(++errorCount, source,
256:                                        logType, tagName, logBegin,
257:                                        maxErrorCount))
258:                                    return null;
259:                            }
260:                            break;
261:                        case AFTER_NAME:
262:                            // attribute name has been followed by whitespace, but may still be followed by an '=' character.
263:                            if (isTerminatingCharacter
264:                                    || !(ch == '=' || isWhiteSpace(ch))) {
265:                                attributeList.add(new Attribute(source, key,
266:                                        nameSegment)); // attribute with no value
267:                                attributesEnd = nameSegment.getEnd();
268:                                if (isTerminatingCharacter)
269:                                    break;
270:                                // The current character is the first character of an attribute name
271:                                state = BETWEEN_ATTRIBUTES;
272:                                i--; // want to reparse the same character again, so decrement i.  Note we could instead just fall into the next case statement without a break, but such code is always discouraged.
273:                            } else if (ch == '=') {
274:                                state = START_VALUE;
275:                            } else if (ch == '<') {
276:                                if (source.logger.isInfoEnabled())
277:                                    log(
278:                                            source,
279:                                            logType,
280:                                            tagName,
281:                                            logBegin,
282:                                            "rejected because of '<' character after attribute name",
283:                                            i);
284:                                return null;
285:                            }
286:                            break;
287:                        case BETWEEN_ATTRIBUTES:
288:                            if (!isTerminatingCharacter) {
289:                                // the quote variable is used here to make sure whitespace has come after the last quoted attribute value
290:                                if (isWhiteSpace(ch)) {
291:                                    quote = ' ';
292:                                } else {
293:                                    if (quote != ' ') {
294:                                        if (source.logger.isInfoEnabled())
295:                                            log(
296:                                                    source,
297:                                                    logType,
298:                                                    tagName,
299:                                                    logBegin,
300:                                                    "has missing whitespace after quoted attribute value",
301:                                                    i);
302:                                        // log this as an error but don't count it
303:                                    }
304:                                    if (!Tag.isXMLNameStartChar(ch)) {
305:                                        // invalid character detected as first character of attribute name.
306:                                        if (ch == '<') {
307:                                            if (source.logger.isInfoEnabled())
308:                                                log(
309:                                                        source,
310:                                                        logType,
311:                                                        tagName,
312:                                                        logBegin,
313:                                                        "rejected because of '<' character",
314:                                                        i);
315:                                            return null;
316:                                        }
317:                                        if (isInvalidEmptyElementTag(
318:                                                startTagType, source, i,
319:                                                logType, tagName, logBegin))
320:                                            break;
321:                                        if (startTagType == StartTagType.NORMAL
322:                                                && startTagType
323:                                                        .atEndOfAttributes(
324:                                                                source, i,
325:                                                                false)) {
326:                                            // This checks whether we've found the characters "/>" but it wasn't recognised as the closing delimiter because isClosingSlashIgnored is true.
327:                                            if (source.logger.isInfoEnabled())
328:                                                log(
329:                                                        source,
330:                                                        logType,
331:                                                        tagName,
332:                                                        logBegin,
333:                                                        "contains a '/' character before the closing '>', which is ignored because tags of this name cannot be empty-element tags");
334:                                            break;
335:                                        }
336:                                        if (source.logger.isInfoEnabled())
337:                                            log(
338:                                                    source,
339:                                                    logType,
340:                                                    tagName,
341:                                                    logBegin,
342:                                                    "contains attribute name with invalid first character",
343:                                                    i);
344:                                        if (reachedMaxErrorCount(++errorCount,
345:                                                source, logType, tagName,
346:                                                logBegin, maxErrorCount))
347:                                            return null;
348:                                    }
349:                                    state = IN_NAME;
350:                                    currentBegin = i;
351:                                }
352:                            }
353:                            break;
354:                        case START_VALUE:
355:                            currentBegin = i;
356:                            if (isTerminatingCharacter) {
357:                                if (source.logger.isInfoEnabled())
358:                                    log(
359:                                            source,
360:                                            logType,
361:                                            tagName,
362:                                            logBegin,
363:                                            "has missing attribute value after '=' sign",
364:                                            i);
365:                                // log this as an error but don't count it
366:                                final Segment valueSegment = new Segment(
367:                                        source, i, i);
368:                                attributeList
369:                                        .add(new Attribute(source, key,
370:                                                nameSegment, valueSegment,
371:                                                valueSegment));
372:                                attributesEnd = i;
373:                                state = BETWEEN_ATTRIBUTES;
374:                                break;
375:                            }
376:                            if (ch == '\'' || ch == '"') {
377:                                quote = ch;
378:                                currentBegin++;
379:                            } else if (isWhiteSpace(ch)) {
380:                                break; // just ignore whitespace after the '=' sign as nearly all browsers do.
381:                            } else if (ch == '<') {
382:                                if (source.logger.isInfoEnabled())
383:                                    log(
384:                                            source,
385:                                            logType,
386:                                            tagName,
387:                                            logBegin,
388:                                            "rejected because of '<' character at the start of an attribute value",
389:                                            i);
390:                                return null;
391:                            } else {
392:                                quote = ' ';
393:                            }
394:                            state = IN_VALUE;
395:                            break;
396:                        case AFTER_TAG_NAME:
397:                            if (!isTerminatingCharacter) {
398:                                if (!isWhiteSpace(ch)) {
399:                                    if (isInvalidEmptyElementTag(startTagType,
400:                                            source, i, logType, tagName,
401:                                            logBegin))
402:                                        break;
403:                                    if (source.logger.isInfoEnabled())
404:                                        log(
405:                                                source,
406:                                                logType,
407:                                                tagName,
408:                                                logBegin,
409:                                                "rejected because the name contains an invalid character",
410:                                                i);
411:                                    return null;
412:                                }
413:                                state = BETWEEN_ATTRIBUTES;
414:                            }
415:                            break;
416:                        }
417:                        i++;
418:                    }
419:                    return new Attributes(source, attributesBegin,
420:                            attributesEnd, attributeList);
421:                } catch (IndexOutOfBoundsException ex) {
422:                    if (source.logger.isInfoEnabled())
423:                        log(source, logType, tagName, logBegin,
424:                                "rejected because it has no closing '>' character");
425:                    return null;
426:                }
427:            }
428:
429:            private static boolean reachedMaxErrorCount(final int errorCount,
430:                    final Source source, final String logType,
431:                    final String tagName, final int logBegin,
432:                    final int maxErrorCount) {
433:                if (errorCount <= maxErrorCount)
434:                    return false;
435:                if (source.logger.isInfoEnabled())
436:                    log(source, logType, tagName, logBegin,
437:                            "rejected because it contains too many errors");
438:                return true;
439:            }
440:
441:            private static boolean isInvalidEmptyElementTag(
442:                    final StartTagType startTagType, final Source source,
443:                    final int i, final String logType, final String tagName,
444:                    final int logBegin) {
445:                // This checks whether we've found the characters "/>" but it wasn't recognised as the closing delimiter because isClosingSlashIgnored is true.
446:                if (startTagType != StartTagType.NORMAL
447:                        || !startTagType.atEndOfAttributes(source, i, false))
448:                    return false;
449:                if (source.logger.isInfoEnabled())
450:                    log(
451:                            source,
452:                            logType,
453:                            tagName,
454:                            logBegin,
455:                            "contains a '/' character before the closing '>', which is ignored because tags of this name cannot be empty-element tags");
456:                return true;
457:            }
458:
459:            /**
460:             * Returns the {@link Attribute} with the specified name (case insensitive).
461:             * <p>
462:             * If more than one attribute exists with the specified name (which is illegal HTML),
463:             * the first is returned.
464:             *
465:             * @param name  the name of the attribute to get.
466:             * @return the attribute with the specified name, or <code>null</code> if no attribute with the specified name exists.
467:             * @see #getValue(String name)
468:             */
469:            public Attribute get(final String name) {
470:                if (size() == 0)
471:                    return null;
472:                for (int i = 0; i < size(); i++) {
473:                    final Attribute attribute = (Attribute) get(i);
474:                    if (attribute.getKey().equalsIgnoreCase(name))
475:                        return attribute;
476:                }
477:                return null;
478:            }
479:
480:            /**
481:             * Returns the {@linkplain CharacterReference#decode(CharSequence) decoded} value of the attribute with the specified name (case insensitive).
482:             * <p>
483:             * Returns <code>null</code> if no attribute with the specified name exists or
484:             * the attribute {@linkplain Attribute#hasValue() has no value}.
485:             * <p>
486:             * This is equivalent to {@link #get(String) get(name)}<code>.</code>{@link Attribute#getValue() getValue()},
487:             * except that it returns <code>null</code> if no attribute with the specified name exists instead of throwing a
488:             * <code>NullPointerException</code>.
489:             *
490:             * @param name  the name of the attribute to get.
491:             * @return the {@linkplain CharacterReference#decode(CharSequence) decoded} value of the attribute with the specified name, or <code>null</code> if the attribute does not exist or {@linkplain Attribute#hasValue() has no value}.
492:             * @see Attribute#getValue()
493:             */
494:            public String getValue(final String name) {
495:                final Attribute attribute = get(name);
496:                return attribute == null ? null : attribute.getValue();
497:            }
498:
499:            /**
500:             * Returns the raw (not {@linkplain CharacterReference#decode(CharSequence) decoded}) value of the attribute, or null if the attribute {@linkplain Attribute#hasValue() has no value}.
501:             * <p>
502:             * This is an internal convenience method.
503:             *
504:             * @return the raw (not {@linkplain CharacterReference#decode(CharSequence) decoded}) value of the attribute, or null if the attribute {@linkplain Attribute#hasValue() has no value}.
505:             */
506:            String getRawValue(final String name) {
507:                final Attribute attribute = get(name);
508:                return attribute == null || !attribute.hasValue() ? null
509:                        : attribute.getValueSegment().toString();
510:            }
511:
512:            /**
513:             * Returns the number of attributes.
514:             * <p>
515:             * This is equivalent to calling the <code>size()</code> method specified in the <code>List</code> interface.
516:             *
517:             * @return the number of attributes.
518:             */
519:            public int getCount() {
520:                return attributeList.size();
521:            }
522:
523:            /**
524:             * Returns an iterator over the {@link Attribute} objects in this list in order of appearance.
525:             * @return an iterator over the {@link Attribute} objects in this list in order of appearance.
526:             */
527:            public Iterator iterator() {
528:                return listIterator();
529:            }
530:
531:            /**
532:             * Returns a list iterator of the {@link Attribute} objects in this list in order of appearance,
533:             * starting at the specified position in the list.
534:             * <p>
535:             * The specified index indicates the first item that would be returned by an initial call to the <code>next()</code> method.
536:             * An initial call to the <code>previous()</code> method would return the item with the specified index minus one.
537:             * <p>
538:             * IMPLEMENTATION NOTE: For efficiency reasons this method does not return an immutable list iterator.
539:             * Calling any of the <code>add(Object)</code>, <code>remove()</code> or <code>set(Object)</code> methods on the returned
540:             * <code>ListIterator</code> does not throw an exception but could result in unexpected behaviour.
541:             *
542:             * @param index  the index of the first item to be returned from the list iterator (by a call to the <code>next()</code> method).
543:             * @return a list iterator of the items in this list (in proper sequence), starting at the specified position in the list.
544:             * @throws IndexOutOfBoundsException if the specified index is out of range (<code>index &lt; 0 || index &gt; size()</code>).
545:             */
546:            public ListIterator listIterator(final int index) {
547:                return attributeList.listIterator(index);
548:            }
549:
550:            /**
551:             * Populates the specified <code>Map</code> with the name/value pairs from these attributes.
552:             * <p>
553:             * Both names and values are stored as <code>String</code> objects.
554:             * <p>
555:             * The entries are added in order of apprearance in the source document.
556:             * <p>
557:             * An attribute with {@linkplain Attribute#hasValue() no value} is represented by a map entry with a <code>null</code> value.
558:             * <p>
559:             * Attribute values are automatically {@linkplain CharacterReference#decode(CharSequence) decoded}
560:             * before storage in the map.
561:             *
562:             * @param attributesMap  the map to populate, must not be <code>null</code>.
563:             * @param convertNamesToLowerCase  specifies whether all attribute names are converted to lower case in the map.
564:             * @return the same map specified as the argument to the <code>attributesMap</code> parameter, populated with the name/value pairs from these attributes.
565:             * @see #generateHTML(Map attributesMap)
566:             */
567:            public Map populateMap(final Map attributesMap,
568:                    final boolean convertNamesToLowerCase) {
569:                for (final Iterator i = listIterator(0); i.hasNext();) {
570:                    final Attribute attribute = (Attribute) i.next();
571:                    attributesMap.put(convertNamesToLowerCase ? attribute
572:                            .getKey() : attribute.getName(), attribute
573:                            .getValue());
574:                }
575:                return attributesMap;
576:            }
577:
578:            /**
579:             * Returns a string representation of this object useful for debugging purposes.
580:             * @return a string representation of this object useful for debugging purposes.
581:             */
582:            public String getDebugInfo() {
583:                final StringBuffer sb = new StringBuffer();
584:                sb.append("Attributes ").append(super .getDebugInfo()).append(
585:                        ": ");
586:                if (isEmpty()) {
587:                    sb.append("EMPTY");
588:                } else {
589:                    sb.append(Config.NewLine);
590:                    for (final Iterator i = listIterator(0); i.hasNext();) {
591:                        Attribute attribute = (Attribute) i.next();
592:                        sb.append("  ").append(attribute.getDebugInfo());
593:                    }
594:                }
595:                return sb.toString();
596:            }
597:
598:            /**
599:             * Returns the default maximum error count allowed when parsing attributes.
600:             * <p>
601:             * The system default value is 2.
602:             * <p>
603:             * When searching for start tags, the parser can find the end of the start tag only by
604:             * {@linkplain StartTagType#parseAttributes(Source,int,String) parsing}
605:             * the attributes, as it is valid HTML for attribute values to contain '&gt;' characters
606:             * (see the <a target="_blank" href="http://www.w3.org/TR/html401/charset.html#h-5.3.2">HTML 4.01 specification section 5.3.2</a>).
607:             * <p>
608:             * If the source text being parsed does not follow the syntax of an attribute list at all, the parser assumes
609:             * that the text which was originally identified as the beginning of of a start tag is in fact some other text,
610:             * such as an invalid '&lt;' character in the middle of some text, or part of a script element.
611:             * In this case the entire start tag is rejected.
612:             * <p>
613:             * On the other hand, it is quite common for attributes to contain minor syntactical errors,
614:             * such as an invalid character in an attribute name, or a couple of special characters in
615:             * {@linkplain TagType#isServerTag() server tags} that otherwise contain only attributes.
616:             * For this reason the parser allows a certain number of minor errors to occur while parsing an
617:             * attribute list before the entire start tag or attribute list is rejected.
618:             * This property indicates the number of minor errors allowed.
619:             * <p>
620:             * Major syntactical errors cause the start tag or attribute list to be rejected immediately, regardless
621:             * of the maximum error count setting.
622:             * <p>
623:             * Some errors are considered too minor to count at all (ignorable), such as missing whitespace between the end
624:             * of a quoted attribute value and the start of the next attribute name.
625:             * <p>
626:             * The classification of particular syntax errors in attribute lists into major, minor, and ignorable is
627:             * not part of the specification and may change in future versions.
628:             * <p>
629:             * Errors are {@linkplain Source#getLogger() logged} as they occur.
630:             * <p>
631:             * The value of this property is set using the {@link #setDefaultMaxErrorCount(int)} method.
632:             *
633:             * @return the default maximum error count allowed when parsing attributes.
634:             * @see Source#parseAttributes(int pos, int maxEnd, int maxErrorCount)
635:             */
636:            public static int getDefaultMaxErrorCount() {
637:                return defaultMaxErrorCount;
638:            }
639:
640:            /**
641:             * Sets the default maximum error count allowed when parsing attributes.
642:             * <p>
643:             * See the {@link #getDefaultMaxErrorCount()} method for a full description of this property.
644:             *
645:             * @param value  the default maximum error count allowed when parsing attributes.
646:             */
647:            public static void setDefaultMaxErrorCount(final int value) {
648:                defaultMaxErrorCount = value;
649:            }
650:
651:            /**
652:             * Returns the contents of the specified {@linkplain #populateMap(Map,boolean) attributes map} as HTML attribute name/value pairs.
653:             * <p>
654:             * Each attribute (including the first) is preceded by a single space, and all values are
655:             * {@linkplain CharacterReference#encode(CharSequence) encoded} and enclosed in double quotes.
656:             * <p>
657:             * The map keys must be of type <code>String</code> and values must be objects that implement the <code>CharSequence</code> interface.
658:             * <p>
659:             * A <code>null</code> value represents an attribute with no value.
660:             *
661:             * @param attributesMap  a map containing attribute name/value pairs.
662:             * @return the contents of the specified {@linkplain #populateMap(Map,boolean) attributes map} as HTML attribute name/value pairs.
663:             * @see StartTag#generateHTML(String tagName, Map attributesMap, boolean emptyElementTag)
664:             */
665:            public static String generateHTML(final Map attributesMap) {
666:                final StringWriter stringWriter = new StringWriter();
667:                try {
668:                    appendHTML(stringWriter, attributesMap);
669:                } catch (IOException ex) {
670:                } // IOException never occurs in StringWriter
671:                return stringWriter.toString();
672:            }
673:
674:            /**
675:             * Outputs the contents of the specified {@linkplain #populateMap(Map,boolean) attributes map} as HTML attribute name/value pairs to the specified <code>Writer</code>.
676:             * <p>
677:             * Each attribute is preceded by a single space, and all values are
678:             * {@linkplain CharacterReference#encode(CharSequence) encoded} and enclosed in double quotes.
679:             *
680:             * @param out  the <code>Writer</code> to which the output is to be sent.
681:             * @param attributesMap  a map containing attribute name/value pairs.
682:             * @throws IOException if an I/O exception occurs.
683:             * @see #populateMap(Map attributesMap, boolean convertNamesToLowerCase)
684:             */
685:            static void appendHTML(final Writer writer, final Map attributesMap)
686:                    throws IOException {
687:                for (final Iterator i = attributesMap.entrySet().iterator(); i
688:                        .hasNext();) {
689:                    final Map.Entry entry = (Map.Entry) i.next();
690:                    Attribute.appendHTML(writer, (String) entry.getKey(),
691:                            (CharSequence) entry.getValue());
692:                }
693:            }
694:
695:            StringBuffer appendTidy(final StringBuffer sb, Tag nextTag) {
696:                for (final Iterator i = listIterator(0); i.hasNext();)
697:                    nextTag = ((Attribute) i.next()).appendTidy(sb, nextTag);
698:                return sb;
699:            }
700:
701:            Map getMap(final boolean convertNamesToLowerCase) {
702:                return populateMap(new LinkedHashMap(getCount() * 2, 1.0F),
703:                        convertNamesToLowerCase);
704:            }
705:
706:            private static void log(final Source source, final String part1,
707:                    final CharSequence part2, final int begin,
708:                    final String part3, final int pos) {
709:                source.logger.info(source.getRowColumnVector(pos).appendTo(
710:                        source.getRowColumnVector(begin).appendTo(
711:                                new StringBuffer(200).append(part1).append(' ')
712:                                        .append(part2).append(" at ")).append(
713:                                ' ').append(part3).append(" at position "))
714:                        .toString());
715:            }
716:
717:            private static void log(final Source source, final String part1,
718:                    final CharSequence part2, final int begin,
719:                    final String part3) {
720:                source.logger.info(source.getRowColumnVector(begin).appendTo(
721:                        new StringBuffer(200).append(part1).append(' ').append(
722:                                part2).append(" at ")).append(' ')
723:                        .append(part3).toString());
724:            }
725:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.