001: /*
002: * Copyright 2001 Sun Microsystems, Inc. All rights reserved.
003: * PROPRIETARY/CONFIDENTIAL. Use of this product is subject to license terms.
004: */
005: package com.sun.portal.rewriter.util.xml;
006:
007: import com.sun.portal.rewriter.util.Constants;
008:
009: /**
010: * TagParser parses an HTML or XML tag and returns a Tag containing the
011: * name of Tag and all of the attributes as key/value pairs within the
012: * Tag class.
013: * Don't sent start and end < and > chars
014: */
015: public class TagParser {
016: public static Tag parse(final String aTagText) {
017: return parse(aTagText, null);
018: }//parse()
019:
020: public static Tag parse(String aTagText, final Tag aTag) {
021: if (aTagText == null) {
022: aTagText = "";
023: }
024:
025: final Tag lTag;
026: if (aTag == null) {
027: lTag = new Tag(); //stores the name of lTag and attributes
028: } else {
029: lTag = aTag;
030: }
031:
032: final String lTagText = aTagText.trim();
033:
034: //check if the tag has attributes
035: if (lTagText.indexOf(Constants.EQUALS) == -1) {
036: lTag.setName(lTagText);
037: return lTag;
038: }
039:
040: //the first word describes the name of the lTag
041: String mayBeTagName = lTagText;
042: int spaceIndex = lTagText.indexOf(Constants.SPACE);
043: if (spaceIndex != -1) {
044: mayBeTagName = lTagText.substring(0, spaceIndex);
045: }
046:
047: //get the attributes from this tag, following the name
048: String attributes = lTagText.substring(mayBeTagName.length())
049: .trim();
050:
051: if ((mayBeTagName.indexOf(Constants.EQUALS) == -1)
052: && ((attributes.length() > 0) && (attributes.charAt(0) != '='))) {
053: lTag.setName(mayBeTagName);
054: } else {
055: lTag.setName(lTag.EMPTY_TAG_NAME);
056: attributes = lTagText;
057: }
058:
059: return parseAttributes(attributes, lTag);
060: }//parse()
061:
062: public static Tag parseAttributes(final String aTagContent,
063: final Tag aTag) {
064: final String lTagContent = aTagContent;
065: final int lStmtLength = lTagContent.length();
066:
067: boolean inSQuote = false; //in Single Quotes
068: boolean inDQuote = false; //in Dobule Quotes
069: boolean inESQuote = false; //in EndOf Single Quotes
070: boolean inEDQuote = false; //in End Of Single Quotes
071: boolean endOfString = false;
072: boolean lookForString = false;
073: int start = 0;
074: int mStartLink = 0, mEndLink = 0;
075: int equalindex = -1, holder = 0;
076:
077: String mAttributeName = "";
078: String mAttributeValue = "";
079:
080: boolean isValueSet = false;
081:
082: char c;
083: for (int i = 0; i < lStmtLength; i++) {
084: c = lTagContent.charAt(i);
085: if (Character.isWhitespace(c) || endOfString) {
086: if (!inSQuote && !inDQuote) {
087: lookForString = false;
088: endOfString = false;
089:
090: if (Character.isWhitespace(c)) {
091: while ((i + 1) < lStmtLength
092: && Character.isWhitespace(lTagContent
093: .charAt(i + 1))) {
094: ++i;
095: }//while loop
096:
097: if ((i + 1) >= lStmtLength) {
098: start = i + 1;
099: } else if (lTagContent.charAt(i + 1) != '=') {
100: start = i + 1;
101: } else {
102: continue;
103: }
104: } else {
105: start = i;
106: }
107:
108: isValueSet = true;
109: }
110: } else if (c == '\\') {
111: if (i + 1 < lStmtLength) {
112: char c2 = lTagContent.charAt(i + 1);
113: if (c2 == '\'') {
114: if ((i == equalindex + 1) || inESQuote) {
115: inESQuote = !inESQuote;
116: if (inESQuote) {
117: mStartLink = i + 2;
118: } else {
119: mEndLink = i;
120: endOfString = true;
121: }
122: i++;
123: }
124: } else if (c2 == '\"') {
125: if ((i == equalindex + 1) || inEDQuote) {
126: inEDQuote = !inEDQuote;
127: if (inEDQuote) {
128: mStartLink = i + 2;
129: } else {
130: mEndLink = i;
131: endOfString = true;
132: }
133: i++;
134: }
135: }
136: }
137: } else if (c == '\'') {
138: if (i == equalindex + 1 || inSQuote) {
139: if (!inDQuote) {
140: inSQuote = !inSQuote;
141: if (inSQuote) {
142: mStartLink = i + 1;
143: } else {
144: mEndLink = i;
145: endOfString = true;
146: }
147: }
148: }
149: } else if (c == '\"') {
150: if ((i == equalindex + 1) || inDQuote) {
151: if (!inSQuote) {
152: inDQuote = !inDQuote;
153: if (inDQuote) {
154: mStartLink = i + 1;
155: } else {
156: mEndLink = i;
157: endOfString = true;
158: }
159: }
160: }
161: } else if (c == '=') {
162: if (!lookForString) {
163: if (!inDQuote && !inSQuote) {
164: equalindex = i;
165: lookForString = true;
166: mStartLink = i + 1;
167:
168: holder = i;
169: while (mStartLink < lStmtLength
170: && Character.isWhitespace(lTagContent
171: .charAt(mStartLink))) {
172: ++equalindex;
173: ++mStartLink;
174: ++holder;
175: }
176:
177: mAttributeName = lTagContent
178: .substring(start, i).trim();
179: i = holder;
180: }
181: }
182: }//if/else
183:
184: if (isValueSet || i + 1 == lStmtLength) {
185: isValueSet = false;
186:
187: if (mStartLink > mEndLink) {
188: mEndLink = i;
189:
190: if (i + 1 == lStmtLength) {
191: mEndLink++;
192: }
193: }
194:
195: mAttributeValue = aTagContent.substring(mStartLink,
196: mEndLink);
197: if (mAttributeName != null)
198: aTag.put(mAttributeName.trim(), mAttributeValue
199: .trim()); //put the attribute in the hash table
200:
201: mEndLink = i;
202: mAttributeName = mAttributeValue = null;
203: }
204: }//for loop
205:
206: return aTag;
207: }//parseAttributes()
208:
209: public static void main(String[] args) {
210: final String[] lData = { "base href=\"10\" u",
211:
212: /*
213: "<base href \n = \n \"/raha.html\" target = \"_blank\">",
214:
215: " <applet name=\"applet\" \n" +
216: " code=\"Chalo.class\" \n" +
217: " width=\"400\" \n" +
218: " height=\"200\" \n" +
219: " codebase=http://www.sun.com/applets/somedir/../rajesh/./mahesh/raju/.\n" +
220: " archive=\"abc/def/Chalo.jar\" \n",
221:
222: "applet name=\"applet\" \n" +
223: " code=\"Hello1.class\" \n" +
224: " width=\"400\" \n" +
225: " height=\"200\" \n" +
226: " codebase=\"/applets/somedir/\" \n" +
227: " archive=\"abc/def/Hello1.jar\"\n",
228:
229: "raju = \"http.com\" href = \"100\"",
230: "raju = href = \"100\"",
231: "raju = href = \" \"",
232: " a raju = href = \" \"",
233: " raju=111 href=\" \" dummy = ",
234: " a raju=111 href=\" \" dummy ",
235: " a raju=111 href=\" \"",
236: " a raju=111 href=\" \" dummy = ",
237: " a raju= href=\" \"",
238: "<a href=\"/mgyhp.html\" onClick = \" var \"ab\" = 10 +45 -122 3339 style.behavior = ' var ab = 10 m+13 ; url(#default#homepage)';setHomePage('http://www.google.com/');\">",
239: "<td id=2 bgcolor=#efefef align=center width=120 nowrap onClick=\"return c('/grphp?hl=en');\" style=cursor:hand;>",*/
240: };
241:
242: for (int i = 0; i < 1; i++) {
243: Tag t = TagParser.parse(lData[i], null);
244: System.out.println(t);
245: }//for loop
246: }//main()
247: }//class TagParser
|