001: /*
002: * ContentParser.java February 2001
003: *
004: * Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net>
005: *
006: * This library is free software; you can redistribute it and/or
007: * modify it under the terms of the GNU Lesser General Public
008: * License as published by the Free Software Foundation.
009: *
010: * This library is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013: * GNU Lesser General Public License for more details.
014: *
015: * You should have received a copy of the GNU Lesser General
016: * Public License along with this library; if not, write to the
017: * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
018: * Boston, MA 02111-1307 USA
019: */
020:
021: package simple.util.parse;
022:
023: import simple.util.net.ContentType;
024:
025: /**
026: * <code>ContentParser</code> provides access to the MIME type parts,
027: * that is the type subtype and an optional <code>charset</code>
028: * parameter. The <code>charset</code> parameter is one of many
029: * parameters that can be assiciated with a MIME type. This however
030: * only provides access to the <code>charset</code> value.
031: * <p>
032: * The <code>getCharset</code> will return <code>val</code> if the
033: * MIME type represented is type/subtype; charset=val. The type and
034: * subtype are set to the <code>String</code> value <code>null</code>
035: * if the <code>setPrimary</code> or <code>setSecondary</code> are given
036: * a <code>null</code><code>String</code>. If the <code>String</code>
037: * that is being parsed does not contain a type or subtype then the
038: * <code>toString</code> will return the value null/null otherwise
039: * it will recreate the MIME type.
040: *
041: * @author Niall Gallagher
042: */
043: public class ContentParser extends Parser implements ContentType {
044:
045: /**
046: * Used to store the characters consumed for the subtype.
047: */
048: private ParseBuffer subtype;
049:
050: /**
051: * Used to store the characters for the <code>charset</code>.
052: */
053: private ParseBuffer charset;
054:
055: /**
056: * Used to store the characters consumed for the type.
057: */
058: private ParseBuffer type;
059:
060: /**
061: * The default constructor will create a <code>ContentParser</code>
062: * that contains no charset, type or subtype. This can be used to
063: * extract the type, subtype and the optional <code>charset</code>
064: * parameter by using the parser's <code>parse(String)</code>
065: * method.
066: */
067: public ContentParser() {
068: this .subtype = new ParseBuffer();
069: this .charset = new ParseBuffer();
070: this .type = new ParseBuffer();
071: }
072:
073: /**
074: * This is primarily a convineance constructor. This will parse
075: * the <code>String</code> given to extract the MIME type. This
076: * could be achived by calling the default no-arg constructor
077: * and then using the instance to invoke the <code>parse</code>
078: * method on that <code>String</code>.
079: *
080: * @param header <code>String</code> containing a MIME type value
081: */
082: public ContentParser(String header) {
083: this ();
084: parse(header);
085: }
086:
087: /**
088: * Sets the type to whatever value is in the <code>String</code>
089: * object. If the <code>String</code> object is <code>null</code>
090: * the this object's <code>toString</code> method will contain
091: * the value <code>null</code>.
092: * <p>
093: * If type is <code>null</code> then the <code>toString</code>
094: * method will be null/subtype;param=value. If the type is
095: * non-null this will contain the value of the <code>String</code>.
096: *
097: * @param primary the type to add to the MIME type
098: */
099: public void setPrimary(String primary) {
100: type.clear();
101: type.append(primary == null ? "" : primary);
102: }
103:
104: /**
105: * This is used to retrive the type of this MIME type. The type
106: * part within the MIME type defines the generic type. For example
107: * <code>type/subtype;param1=value1</code>. This will return the
108: * value of the type part. If there is no type part then this will
109: * return <code>null</code> otherwise the type <code>String</code>.
110: *
111: * @return the type part of the MIME type
112: */
113: public String getPrimary() {
114: if (type.length() == 0) {
115: return null;
116: }
117: return type.toString();
118: }
119:
120: /**
121: * Sets the subtype to whatever value is in the <code>String</code>
122: * object. If the <code>String</code> object is <code>null</code>
123: * the this object's <code>toString</code> method will contain the
124: * value <code>null</code>.
125: * <p>
126: * If subtype is <code>null</code> then the <code>toString</code>
127: * method will be <code>type/null;param=value</code>. If the type
128: * is non-null this will contain the value of the <code>String</code>.
129: *
130: * @param type the type to add to the MIME type
131: */
132: public void setSecondary(String type) {
133: subtype.clear();
134: subtype.append(type == null ? "" : type);
135: }
136:
137: /**
138: * This is used to retrive the subtype of this MIME type. The subtype
139: * part within the MIME type defines the specific type. For example
140: * <code>type/subtype;param1=value1</code>. This will return the value
141: * of the subtype part. If there is no subtype part then this will
142: * return <code>null</code> otherwise the type <code>String</code>.
143: *
144: * @return the subtype part of the MIME type
145: */
146: public String getSecondary() {
147: if (subtype.length() == 0) {
148: return null;
149: }
150: return subtype.toString();
151: }
152:
153: /**
154: * This will set the <code>charset</code> to whatever value is in the
155: * <code>String</code> object. If the <code>String</code> object is
156: * <code>null</code> the this object's <code>toString</code> method
157: * will not contain the <code>charset</code>.
158: * <p>
159: * If <code>charset</code> is null then the <code>toString</code>
160: * method will be type/subtype. If the <code>charset</code> value
161: * is non-null this will contain the <code>charset</code> parameter
162: * with that value.
163: *
164: * @param enc the <code>charset</code> value to add to the MIME type
165: */
166: public void setCharset(String enc) {
167: charset.clear();
168: charset.append(enc == null ? "" : enc);
169: }
170:
171: /**
172: * This is used to retrive the <code>charset</code> of this MIME type.
173: * The <code>charset</code> part within the MIME type is an optional
174: * parameter. For example <code>type/subtype;charset=value</code>. This
175: * will return the value of the <code>charset</code> value. If there is
176: * no <code>charset</code> param then this will return <code>null</code>
177: * otherwise the type <code>String</code>.
178: *
179: * @return the <code>charset</code> value for the MIME type
180: */
181: public String getCharset() {
182: if (charset.length() == 0) {
183: return null;
184: }
185: return charset.toString();
186: }
187:
188: /**
189: * This is used to remove all whitespace characters from the
190: * <code>String</code> excluding the whitespace within literals.
191: * The definition of a literal can be found in RFC 2616.
192: * <p>
193: * The definition of a literal for RFC 2616 is anything between 2
194: * quotes but excuding quotes that are prefixed with the backward
195: * slash character.
196: */
197: private void pack() {
198: int len = count;
199: int seek = 0;
200: int pos = 0;
201: char old = buf[0];
202:
203: while (seek < len) {
204: char ch = buf[seek++];
205: if (ch == '"' && old != '\\') { /* qd-text*/
206: buf[pos++] = ch;
207: while (seek < len) {
208: old = buf[seek - 1];
209: ch = buf[seek++];
210: buf[pos++] = ch;
211: if (ch == '"' && old != '\\') { /*qd-text*/
212: break;
213: }
214: }
215: } else if (!space(ch)) {
216: old = buf[seek - 1];
217: buf[pos++] = old;
218: }
219: }
220: count = pos;
221: }
222:
223: /**
224: * This will initialize the parser when it is ready to parse
225: * a new <code>String</code>. This will reset the parser to a
226: * ready state. The init method is invoked by the parser when
227: * the <code>Parser.parse</code> method is invoked.
228: */
229: protected void init() {
230: pack();
231: type.clear();
232: subtype.clear();
233: charset.clear();
234: off = 0;
235: }
236:
237: /**
238: * Reads and parses the MIME type from the given <code>String</code>
239: * object. This uses the syntax defined by RFC 2616 for the media-type
240: * syntax. This parser is only concerned with one parameter, the
241: * <code>charset</code> parameter. The syntax for thhe media type is
242: * <pre>
243: * media-type = token "/" token *( ";" parameter )
244: * parameter = token | literal
245: * </pre>
246: */
247: protected void parse() {
248: type();
249: off++;
250: subtype();
251: parameters();
252: }
253:
254: /**
255: * This reads the type from the MIME type. This will fill the
256: * type <code>ParseBuffer</code>. This will read all chars
257: * upto but not including the first instance of a '/'. The type
258: * of a media-type as defined by RFC 2616 is
259: * <code>type/subtype;param=val;param2=val</code>.
260: */
261: private void type() {
262: while (off < count) {
263: if (buf[off] == '/') {
264: break;
265: }
266: type.append(buf[off]);
267: off++;
268: }
269: }
270:
271: /**
272: * This reads the subtype from the MIME type. This will fill the
273: * subtype <code>ParseBuffer</code>. This will read all chars
274: * upto but not including the first instance of a ';'. The subtype
275: * of a media-type as defined by RFC 2616 is
276: * <code>type/subtype;param=val;param2=val</code>.
277: */
278: private void subtype() {
279: while (off < count) {
280: if (buf[off] == ';') {
281: break;
282: }
283: subtype.append(buf[off]);
284: off++;
285: }
286: }
287:
288: /**
289: * This will read the parameters from the MIME type. This will search
290: * for the <code>charset</code> parameter within the set of parameters
291: * which are given to the type. The <code>charset</code> param is the
292: * only parameter that this parser will tokenize.
293: * <p>
294: * This will remove any parameters that preceed the charset parameter.
295: * Once the <code>charset</code> is retrived the MIME type is considered
296: * to be parsed.
297: */
298: private void parameters() {
299: while (skip(";")) {
300: if (skip("charset=")) {
301: charset();
302: break;
303: } else {
304: parameter();
305: }
306: }
307: }
308:
309: /**
310: * This is a parameter as defined by RFC 2616. The parameter is added to a
311: * MIME type e.g. <code>type/subtype;param=val</code> etc. The parameter
312: * name and value are not stored. This is used to simply update the read
313: * offset past the parameter. The reason for reading the parameters is to
314: * search for the <code>charset</code> parameter which will indicate the
315: * encoding.
316: */
317: private void parameter() {
318: name();
319: off++; /* = */
320: value();
321: }
322:
323: /**
324: * This will simply read all characters from the buffer before the first '='
325: * character. This represents a parameter name (see RFC 2616 for token). The
326: * parameter name is not buffered it is simply read from the buffer. This will
327: * not cause an <code>IndexOutOfBoundsException</code> as each offset
328: * is checked before it is acccessed.
329: */
330: private void name() {
331: while (off < count) {
332: if (buf[off] == '=') {
333: break;
334: }
335: off++;
336: }
337: }
338:
339: /**
340: * This is used to read a parameters value from the buf. This will read all
341: * <code>char</code>'s upto but excluding the first terminal <code>char</code>
342: * encountered from the off within the buf, or if the value is a literal
343: * it will read a literal from the buffer (literal is any data between
344: * quotes except if the quote is prefixed with a backward slash character).
345: */
346: private void value() {
347: if (buf[off] == '"') {
348: for (off++; off < count;) {
349: if (buf[off++] == '"') {
350: if (buf[off - 2] != '\\') {
351: break;
352: }
353: }
354: }
355: } else {
356: while (off < count) {
357: if (buf[off] == ';')
358: break;
359: off++;
360: }
361: }
362: }
363:
364: /**
365: * This is used to read the value from the <code>charset</code> param.
366: * This will fill the <code>charset</code> <code>ParseBuffer</code> and with
367: * the <code>charset</code> value. This will read a literal or a token as
368: * the <code>charset</code> value. If the <code>charset</code> is a literal
369: * then the quotes will be read as part of the charset.
370: */
371: private void charset() {
372: if (buf[off] == '"') {
373: charset.append('"');
374: for (off++; off < count;) {
375: charset.append(buf[off]);
376: if (buf[off++] == '"')
377: if (buf[off - 2] != '\\') {
378: break;
379: }
380: }
381: } else {
382: while (off < count) {
383: if (buf[off] == ';')
384: break;
385: charset.append(buf[off]);
386: off++;
387: }
388: }
389: }
390:
391: /**
392: * This will return the <code>String</code> value of the MIME type. This
393: * will return the MIME type with the type, subtype and if there is a
394: * <code>charset</code> value specified then a <code>charset</code> parameter.
395: * <p>
396: * The <code>charset</code> parameter is an optional parameter to the MIME
397: * type. An example a MIME type is <code>type/subtype; charset=value</code>.
398: * If the type or subtype is <code>null</code> then the MIME type will be
399: * wither null/subtype, type/null or if both are <code>null</code> null/null.
400: *
401: * @return the <code>String</code> representation of the MIME type
402: */
403: public String toString() {
404: return "" + type + "/" + subtype
405: + (charset.length() > 0 ? "; charset=" + charset : "");
406: }
407: }
|