001: package com.etymon.pj.object;
002:
003: import java.io.*;
004: import com.etymon.pj.exception.*;
005:
006: /**
007: A representation of the PDF string type.
008: @author Nassib Nassar
009: */
010: public class PjString extends PjObject {
011:
012: /**
013: Creates a string object.
014: @param s the string value to initialize this object to.
015: */
016: public PjString(String s) {
017: _s = s;
018: }
019:
020: /**
021: Returns the string value of this object.
022: @return the string value of this object.
023: */
024: public String getString() {
025: return _s;
026: }
027:
028: /**
029: Writes this string to a stream in PDF format.
030: @param os the stream to write to.
031: @return the number of bytes written.
032: @exception IOException if an I/O error occurs.
033: */
034: public long writePdf(OutputStream os) throws IOException {
035: long z = 0;
036: int length = _s.length();
037: char c;
038: z = z + write(os, "(");
039: for (int x = 0; x < length; x++) {
040: c = _s.charAt(x);
041: switch (c) {
042: case '\n':
043: z = z + write(os, "\\n");
044: break;
045: case '\r':
046: z = z + write(os, "\\r");
047: break;
048: case '\t':
049: z = z + write(os, "\\t");
050: break;
051: case '\b':
052: z = z + write(os, "\\b");
053: break;
054: case '\f':
055: z = z + write(os, "\\f");
056: break;
057: case '\\':
058: z = z + write(os, "\\\\");
059: break;
060: case '(':
061: z = z + write(os, "\\(");
062: break;
063: case ')':
064: z = z + write(os, "\\)");
065: break;
066: default:
067: z = z + write(os, c);
068: }
069: }
070: z = z + write(os, ")");
071: return z;
072: }
073:
074: /**
075: Returns a deep copy of this object.
076: @return a deep copy of this object.
077: */
078: public Object clone() {
079: return this ;
080: }
081:
082: /**
083: Returns a string representation of this array in PDF format.
084: @return the string representation.
085: public String toString() {
086: int length = _s.length();
087: char c;
088: StringBuffer sb = new StringBuffer("(");
089: for (int x = 0; x < length; x++) {
090: c = _s.charAt(x);
091: switch (c) {
092: case '\n':
093: sb.append("\\n");
094: break;
095: case '\r':
096: sb.append("\\r");
097: break;
098: case '\t':
099: sb.append("\\t");
100: break;
101: case '\b':
102: sb.append("\\b");
103: break;
104: case '\f':
105: sb.append("\\f");
106: break;
107: case '\\':
108: sb.append("\\\\");
109: break;
110: case '(':
111: sb.append("\\(");
112: break;
113: case ')':
114: sb.append("\\)");
115: break;
116: default:
117: if (Character.isISOControl(c)) {
118: sb.append('\\');
119: sb.append(Integer.toOctalString((int)c));
120: } else {
121: sb.append(c);
122: }
123: }
124: }
125: sb.append(')');
126: return sb.toString();
127: }
128: */
129:
130: /**
131: Converts a PDF-encoded string to a java String, which may
132: be then be used to initialize a PjString object.
133: @param pdfString the PDF-encoded string to be decoded.
134: @return the sequence of characters decoded from pdfString,
135: represented as a java String.
136: @exception PdfFormatException if pdfString is invalid PDF.
137: */
138: public static String decodePdf(String pdfString)
139: throws PdfFormatException {
140: int length = pdfString.length();
141: if (length == 0) {
142: throw new PdfFormatException("'(' or '<' expected.", 0);
143: }
144: switch (pdfString.charAt(0)) {
145: case '(':
146: if (pdfString.charAt(length - 1) != ')') {
147: throw new PdfFormatException("')' expected.", length);
148: }
149: return decodeEscapedString(pdfString);
150: case '<':
151: if (pdfString.charAt(length - 1) != '>') {
152: throw new PdfFormatException("'>' expected.", length);
153: }
154: return decodeHexString(pdfString);
155: default:
156: throw new PdfFormatException("'(' or '<' expected.", 0);
157: }
158: }
159:
160: /**
161: Decodes a PDF string enclosed in parentheses. This method
162: ignores the first and last characters of pdfString because
163: they are assumed to be matching parentheses.
164: @param pdfString the PDF-encoded string to be decoded.
165: @return the sequence of characters decoded from pdfString,
166: represented as a java String.
167: @exception PdfFormatException if invalid PDF encoding is
168: encountered in pdfString.
169: */
170: private static String decodeEscapedString(String pdfString)
171: throws PdfFormatException {
172: int length = pdfString.length();
173: StringBuffer decodedString = new StringBuffer(length);
174: StringBuffer escapeString = new StringBuffer(4);
175: boolean escape = false;
176: char ch;
177: int x = 1;
178: while (x < (length - 1)) {
179: ch = pdfString.charAt(x);
180: if (ch == '\\') {
181: if (escape == false) {
182: // this is the beginning of an escape string
183: escape = true;
184: escapeString.setLength(0);
185: } else {
186: // we're already escaped, so this must be the 2nd backslash in a row
187: decodedString.append('\\');
188: escape = false;
189: }
190: } else {
191: if (escape == false) {
192: // it's a normal character
193: decodedString.append(ch);
194: } else {
195: // this is part of an escaped sequence
196: if (escapeString.length() == 0) {
197: // it's the beginning of the sequence!
198: switch (ch) {
199: case 'n':
200: decodedString.append('\n');
201: escape = false;
202: break;
203: case 'r':
204: decodedString.append('\r');
205: escape = false;
206: break;
207: case 't':
208: decodedString.append('\t');
209: escape = false;
210: break;
211: case 'b':
212: decodedString.append('\b');
213: escape = false;
214: break;
215: case 'f':
216: decodedString.append('\f');
217: escape = false;
218: break;
219: case '\\':
220: decodedString.append('\\');
221: escape = false;
222: break;
223: case '(':
224: decodedString.append('(');
225: escape = false;
226: break;
227: case ')':
228: decodedString.append(')');
229: escape = false;
230: break;
231: case '0':
232: case '1':
233: case '2':
234: case '3':
235: case '4':
236: case '5':
237: case '6':
238: case '7':
239: case '8':
240: case '9':
241: escapeString.append(ch);
242: break;
243: default:
244: // here we should throw a new
245: // PdfFormatException("Invalid escape character.", x);
246: // unfortunately, I ran this on a PDF file created using
247: // Acrobat PDFWriter 2.0 for Windows, and that file had
248: // solitary '\' characters in strings (in a file path in
249: // a /Creator field), which is incorrect,
250: // unless I am missing something.
251: // so we may need to be more forgiving;
252: // for now, if we reach this point, let's just treat the
253: // token as a backslash and exit escape mode.
254: decodedString.append('\\');
255: escape = false;
256: // roll back counter to reprocess this character
257: x--;
258: }
259: } else {
260: // it's just another character in the sequence;
261: // so either it's an octal digit, or else we're
262: // back to non-escape mode
263: switch (ch) {
264: case '0':
265: case '1':
266: case '2':
267: case '3':
268: case '4':
269: case '5':
270: case '6':
271: case '7':
272: case '8':
273: case '9':
274: // octal digit
275: /*
276: if (escapeString.length() < 3) {
277: escapeString.append(ch);
278: break;
279: }
280: */
281: int len = escapeString.length();
282: if (len < 3) {
283: escapeString.append(ch);
284: }
285: if (len < 2) {
286: break;
287: }
288: // otherwise we continue into the default section...
289: // but first push the counter forward, so it doesn't
290: // roll back to the current character.
291: x++;
292: default:
293: // end of escape; we need to decode the octal token and move on
294: decodedString.append((char) Integer
295: .parseInt(escapeString.toString(),
296: 8));
297: escape = false;
298: // roll back counter to reprocess this character
299: x--;
300: }
301: }
302: }
303: }
304: x++;
305: }
306: return decodedString.toString();
307: }
308:
309: /**
310: Decodes a PDF hexadecimal string enclosed in angle
311: brackets. This method ignores the first and last
312: characters of pdfString because they are assumed to be
313: matching angle brackets.
314: @param pdfString the PDF-encoded string to be decoded.
315: @return the sequence of characters decoded from pdfString,
316: represented as a java String.
317: @exception PdfFormatException if invalid PDF encoding is
318: encountered in pdfString.
319: */
320: private static String decodeHexString(String pdfString)
321: throws PdfFormatException {
322: int length = pdfString.length();
323: StringBuffer decodedString = new StringBuffer(length);
324: StringBuffer hexString = new StringBuffer(4);
325: char ch;
326: int x = 1;
327: while (x < (length - 1)) {
328: ch = pdfString.charAt(x);
329: // first make sure it is a hex digit
330: switch (Character.toUpperCase(ch)) {
331: case '0':
332: case '1':
333: case '2':
334: case '3':
335: case '4':
336: case '5':
337: case '6':
338: case '7':
339: case '8':
340: case '9':
341: case 'A':
342: case 'B':
343: case 'C':
344: case 'D':
345: case 'E':
346: case 'F':
347: // good, it is a valid hex character
348: // we accumulate pairs in hexString
349: hexString.append(ch);
350: // if this is the last character, then pad out hexString with a zero if needed
351: if ((x == (length - 2)) && (hexString.length() == 1)) {
352: hexString.append('0');
353: }
354: // now, if we have a pair of digits, evaluate it and clear hexString
355: if (hexString.length() == 2) {
356: decodedString.append((char) Integer.parseInt(
357: hexString.toString(), 16));
358: hexString.setLength(0);
359: }
360: break;
361: case ' ':
362: case '\t':
363: case '\r':
364: case '\n':
365: case '\f':
366: // ignore whitespace
367: break;
368: default:
369: throw new PdfFormatException(
370: "Hexadecimal digit expected.", x);
371: }
372: x++;
373: }
374: return decodedString.toString();
375: }
376:
377: /**
378: Compares two PjString objects for equality.
379: @param obj the reference object to compare to.
380: @return true if this object is the same as obj, false
381: otherwise. */
382: public boolean equals(Object obj) {
383: if (obj == null) {
384: return false;
385: }
386: if (obj instanceof PjString) {
387: return _s.equals(((PjString) obj)._s);
388: } else {
389: return false;
390: }
391: }
392:
393: private String _s;
394:
395: }
|