001: /*
002: * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved
003: *
004: * This file is part of Resin(R) Open Source
005: *
006: * Each copy or derived work must preserve the copyright notice and this
007: * notice unmodified.
008: *
009: * Resin Open Source is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU General Public License as published by
011: * the Free Software Foundation; either version 2 of the License, or
012: * (at your option) any later version.
013: *
014: * Resin Open Source is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
017: * of NON-INFRINGEMENT. See the GNU General Public License for more
018: * details.
019: *
020: * You should have received a copy of the GNU General Public License
021: * along with Resin Open Source; if not, write to the
022: *
023: * Free Software Foundation, Inc.
024: * 59 Temple Place, Suite 330
025: * Boston, MA 02111-1307 USA
026: *
027: * @author Scott Ferguson
028: */
029:
030: package com.caucho.quercus.env;
031:
032: import com.caucho.quercus.Quercus;
033: import com.caucho.quercus.QuercusModuleException;
034: import com.caucho.quercus.QuercusRuntimeException;
035: import com.caucho.vfs.*;
036:
037: import java.io.*;
038: import java.util.IdentityHashMap;
039:
040: /**
041: * Represents a PHP string value.
042: */
043: public class UnicodeBuilderValue extends StringBuilderValue {
044: public static final UnicodeBuilderValue EMPTY = new UnicodeBuilderValue(
045: "");
046:
047: private final static UnicodeBuilderValue[] CHAR_STRINGS;
048:
049: public UnicodeBuilderValue() {
050: }
051:
052: public UnicodeBuilderValue(int capacity) {
053: super (capacity);
054: }
055:
056: public UnicodeBuilderValue(String value) {
057: super (value);
058: }
059:
060: public UnicodeBuilderValue(String value, int minLength) {
061: super (value);
062: }
063:
064: public UnicodeBuilderValue(char[] buffer, int offset, int length) {
065: super (buffer, offset, length);
066: }
067:
068: private UnicodeBuilderValue(char[] buffer, int offset, int length,
069: String copy) {
070: super (buffer, offset, length);
071: }
072:
073: public UnicodeBuilderValue(char[] buffer) {
074: super (buffer, 0, buffer.length);
075: }
076:
077: public UnicodeBuilderValue(char[] buffer, int length) {
078: super (buffer, 0, length);
079: }
080:
081: public UnicodeBuilderValue(char[] buffer, int offset, int length,
082: boolean isExact) {
083: super (buffer, offset, length);
084: }
085:
086: public UnicodeBuilderValue(Character[] buffer) {
087: super (buffer);
088: }
089:
090: public UnicodeBuilderValue(char ch) {
091: super (ch);
092: }
093:
094: public UnicodeBuilderValue(char[] s, Value v1) {
095: super (s, v1);
096: }
097:
098: public UnicodeBuilderValue(Value v1) {
099: super (v1);
100: }
101:
102: /**
103: * Creates the string.
104: */
105: public static StringValue create(char value) {
106: if (value < CHAR_STRINGS.length)
107: return CHAR_STRINGS[value];
108: else
109: return new UnicodeBuilderValue(value);
110: }
111:
112: /**
113: * Creates a PHP string from a Java String.
114: * If the value is null then NullValue is returned.
115: */
116: public static Value create(String value) {
117: if (value == null)
118: return NullValue.NULL;
119: else if (value.length() == 0)
120: return UnicodeBuilderValue.EMPTY;
121: else
122: return new UnicodeBuilderValue(value);
123: }
124:
125: /*
126: * Decodes the Unicode str from charset.
127: *
128: * @param str should be a Unicode string
129: * @param charset to decode string from
130: */
131: @Override
132: public StringValue create(Env env, StringValue str, String charset) {
133: return str;
134: }
135:
136: /*
137: * Creates an empty string builder of the same type.
138: */
139: public StringValue createEmptyStringBuilder() {
140: return new UnicodeBuilderValue();
141: }
142:
143: /*
144: * Returns the empty string of same type.
145: */
146: public StringValue getEmptyString() {
147: return EMPTY;
148: }
149:
150: /**
151: * Decodes from charset and returns UnicodeValue.
152: *
153: * @param env
154: * @param charset
155: */
156: public StringValue convertToUnicode(Env env, String charset) {
157: return this ;
158: }
159:
160: /**
161: * Returns true for UnicodeValue
162: */
163: @Override
164: public boolean isUnicode() {
165: return true;
166: }
167:
168: /**
169: * Returns the value.
170: */
171: public String getValue() {
172: return toString();
173: }
174:
175: /**
176: * Returns the type.
177: */
178: public String getType() {
179: return "string";
180: }
181:
182: /**
183: * Returns the ValueType.
184: */
185: @Override
186: public ValueType getValueType() {
187: return getValueType(_buffer, 0, _length);
188: }
189:
190: /**
191: * Interns the string.
192: */
193: public StringValue intern(Quercus quercus) {
194: return quercus.intern(toString());
195: }
196:
197: /**
198: * Converts to a string builder
199: */
200: @Override
201: public StringValue toStringBuilder() {
202: return new UnicodeBuilderValue(_buffer, 0, _length);
203: }
204:
205: /**
206: * Converts to a UnicodeValue.
207: */
208: @Override
209: public StringValue toUnicodeValue() {
210: return this ;
211: }
212:
213: /**
214: * Converts to a UnicodeValue.
215: */
216: @Override
217: public StringValue toUnicodeValue(Env env) {
218: return this ;
219: }
220:
221: /**
222: * Converts to a UnicodeValue in desired charset.
223: */
224: @Override
225: public StringValue toUnicodeValue(Env env, String charset) {
226: return this ;
227: }
228:
229: /**
230: * Append a buffer to the value.
231: */
232: /*
233: public final StringValue append(byte []buf, int offset, int length)
234: {
235: if (_buffer.length < _length + length)
236: ensureCapacity(_length + length);
237:
238: Env env = Env.getInstance();
239: String charset = (env != null
240: ? env.getRuntimeEncoding().toString()
241: : null);
242:
243: // ...
244:
245: char []charBuffer = _buffer;
246: int charLength = _length;
247:
248: for (int i = 0; i < length; i++)
249: charBuffer[charLength + i] = (char) buf[offset + i];
250:
251: _length += length;
252:
253: return this;
254: }
255: */
256:
257: /*
258: * Appends a Unicode string to the value.
259: *
260: * @param str should be a Unicode string
261: * @param charset to decode string from
262: */
263: @Override
264: public StringValue append(Env env, StringValue unicodeStr,
265: String charset) {
266: return append(unicodeStr);
267: }
268:
269: /**
270: * Append to a string builder.
271: */
272: @Override
273: public StringValue appendTo(UnicodeBuilderValue sb) {
274: sb.append(_buffer, 0, _length);
275:
276: return sb;
277: }
278:
279: /**
280: * Converts to a BinaryValue.
281: */
282: @Override
283: public StringValue toBinaryValue() {
284: return toBinaryValue(Env.getInstance());
285: }
286:
287: /**
288: * Converts to a BinaryValue.
289: */
290: @Override
291: public StringValue toBinaryValue(Env env) {
292: return toBinaryValue(env, env.getRuntimeEncoding());
293: }
294:
295: /**
296: * Converts to a BinaryValue in desired charset.
297: *
298: * @param env
299: * @param charset
300: */
301: public StringValue toBinaryValue(Env env, String charset) {
302: try {
303: BinaryBuilderValue result = new BinaryBuilderValue();
304: BinaryBuilderStream stream = new BinaryBuilderStream(result);
305:
306: // XXX: can use EncodingWriter directly(?)
307: WriteStream out = new WriteStream(stream);
308: out.setEncoding(charset);
309:
310: out.print(_buffer, 0, _length);
311:
312: out.close();
313:
314: return result;
315: } catch (IOException e) {
316: throw new QuercusModuleException(e.getMessage());
317: }
318: }
319:
320: /**
321: * Returns the character at an index
322: */
323: @Override
324: public Value charValueAt(long index) {
325: int len = _length;
326:
327: if (index < 0 || len <= index)
328: return UnsetUnicodeValue.UNSET;
329: else {
330: int ch = _buffer[(int) index];
331:
332: if (ch < CHAR_STRINGS.length)
333: return CHAR_STRINGS[ch];
334: else
335: return new UnicodeBuilderValue((char) ch);
336: }
337: }
338:
339: /**
340: * sets the character at an index
341: */
342: @Override
343: public Value setCharValueAt(long index, String value) {
344: if (_isCopy)
345: copyOnWrite();
346:
347: int len = _length;
348:
349: if (index < 0 || len <= index)
350: return this ;
351: else {
352: UnicodeBuilderValue sb = new UnicodeBuilderValue(_buffer,
353: 0, (int) index);
354: sb.append(value);
355: sb.append(_buffer, (int) (index + 1),
356: (int) (len - index - 1));
357:
358: return sb;
359: }
360: }
361:
362: /**
363: * Returns a subsequence
364: */
365: @Override
366: public CharSequence subSequence(int start, int end) {
367: if (end <= start)
368: return EMPTY;
369:
370: char[] newBuffer = new char[end - start];
371:
372: System.arraycopy(_buffer, start, newBuffer, 0, end - start);
373:
374: return new UnicodeBuilderValue(newBuffer, 0, end - start);
375: }
376:
377: //
378: // java.lang.String
379: //
380:
381: /**
382: * Convert to lower case.
383: */
384: @Override
385: public StringValue toLowerCase() {
386: int length = _length;
387:
388: UnicodeBuilderValue string = new UnicodeBuilderValue(length);
389:
390: char[] srcBuffer = _buffer;
391: char[] dstBuffer = string._buffer;
392:
393: for (int i = 0; i < length; i++) {
394: char ch = srcBuffer[i];
395:
396: if ('A' <= ch && ch <= 'Z')
397: dstBuffer[i] = (char) (ch + 'a' - 'A');
398: else if (ch < 0x80)
399: dstBuffer[i] = ch;
400: else if (Character.isUpperCase(ch))
401: dstBuffer[i] = Character.toLowerCase(ch);
402: else
403: dstBuffer[i] = ch;
404: }
405:
406: string._length = length;
407:
408: return string;
409: }
410:
411: /**
412: * Convert to lower case.
413: */
414: @Override
415: public StringValue toUpperCase() {
416: int length = _length;
417:
418: UnicodeBuilderValue string = new UnicodeBuilderValue(_length);
419:
420: char[] srcBuffer = _buffer;
421: char[] dstBuffer = string._buffer;
422:
423: for (int i = 0; i < length; i++) {
424: char ch = srcBuffer[i];
425:
426: if ('a' <= ch && ch <= 'z')
427: dstBuffer[i] = (char) (ch + 'A' - 'a');
428: else if (ch < 0x80)
429: dstBuffer[i] = ch;
430: else if (Character.isLowerCase(ch))
431: dstBuffer[i] = Character.toUpperCase(ch);
432: else
433: dstBuffer[i] = ch;
434: }
435:
436: string._length = length;
437:
438: return string;
439: }
440:
441: /**
442: * Returns a character array
443: */
444: @Override
445: public char[] toCharArray() {
446: char[] dest = new char[_length];
447:
448: System.arraycopy(_buffer, 0, dest, 0, _length);
449:
450: return dest;
451: }
452:
453: public char[] getRawCharArray() {
454: return _buffer;
455: }
456:
457: /**
458: * Returns the buffer backing this StringBuilderValue.
459: */
460: public char[] getBuffer() {
461: return _buffer;
462: }
463:
464: /**
465: * Prints the value.
466: * @param env
467: */
468: public void print(Env env) {
469: env.print(_buffer, 0, _length);
470: }
471:
472: /**
473: * Serializes the value.
474: */
475: public void serialize(StringBuilder sb) {
476: sb.append("s:");
477: sb.append(_length);
478: sb.append(":\"");
479: sb.append(_buffer, 0, _length);
480: sb.append("\";");
481: }
482:
483: //
484: // append code
485: //
486:
487: /**
488: * Creates a string builder of the same type.
489: */
490: @Override
491: public StringValue createStringBuilder() {
492: return new UnicodeBuilderValue();
493: }
494:
495: /**
496: * Creates a string builder of the same type.
497: */
498: @Override
499: public StringValue createStringBuilder(int length) {
500: return new UnicodeBuilderValue(length);
501: }
502:
503: /**
504: * Converts to a string builder
505: */
506: @Override
507: public StringValue toStringBuilder(Env env) {
508: return new UnicodeBuilderValue(_buffer, 0, _length);
509: }
510:
511: @Override
512: public String toDebugString() {
513: StringBuilder sb = new StringBuilder();
514:
515: int length = length();
516:
517: sb.append("unicode(");
518: sb.append(length);
519: sb.append(") \"");
520:
521: int appendLength = length > 256 ? 256 : length;
522:
523: for (int i = 0; i < appendLength; i++)
524: sb.append(charAt(i));
525:
526: if (length > 256)
527: sb.append(" ...");
528:
529: sb.append('"');
530:
531: return sb.toString();
532: }
533:
534: @Override
535: public void varDumpImpl(Env env, WriteStream out, int depth,
536: IdentityHashMap<Value, String> valueSet) throws IOException {
537: int length = length();
538:
539: if (length < 0)
540: length = 0;
541:
542: out.print("unicode(");
543: out.print(length);
544: out.print(") \"");
545:
546: for (int i = 0; i < length; i++)
547: out.print(charAt(i));
548:
549: out.print("\"");
550: }
551:
552: //
553: // java.lang.Object methods
554: //
555:
556: //
557: // Java generator code
558: //
559:
560: /**
561: * Generates code to recreate the expression.
562: *
563: * @param out the writer to the Java source code.
564: */
565: public void generate(PrintWriter out) throws IOException {
566: out.print("new UnicodeBuilderValue(\"");
567: printJavaString(out, this );
568: out.print("\")");
569: }
570:
571: private void copyOnWrite() {
572: if (_isCopy) {
573: _isCopy = false;
574: char[] buffer = new char[_buffer.length];
575: System.arraycopy(_buffer, 0, buffer, 0, _length);
576: _buffer = buffer;
577: }
578: }
579:
580: //
581: // static helper functions
582: //
583:
584: public static int getNumericType(char[] buffer, int offset, int len) {
585: if (len == 0)
586: return IS_STRING;
587:
588: int i = offset;
589: int ch = 0;
590: boolean hasPoint = false;
591:
592: if (i < len && ((ch = buffer[i]) == '+' || ch == '-')) {
593: i++;
594: }
595:
596: if (len <= i)
597: return IS_STRING;
598:
599: ch = buffer[i];
600:
601: if (ch == '.') {
602: for (i++; i < len && '0' <= (ch = buffer[i]) && ch <= '9'; i++) {
603: return IS_DOUBLE;
604: }
605:
606: return IS_STRING;
607: } else if (!('0' <= ch && ch <= '9'))
608: return IS_STRING;
609:
610: for (; i < len && '0' <= (ch = buffer[i]) && ch <= '9'; i++) {
611: }
612:
613: if (len <= i)
614: return IS_LONG;
615: else if (ch == '.' || ch == 'e' || ch == 'E') {
616: for (i++; i < len
617: && ('0' <= (ch = buffer[i]) && ch <= '9'
618: || ch == '+' || ch == '-' || ch == 'e' || ch == 'E'); i++) {
619: }
620:
621: if (i < len)
622: return IS_STRING;
623: else
624: return IS_DOUBLE;
625: } else
626: return IS_STRING;
627: }
628:
629: public static ValueType getValueType(char[] buffer, int offset,
630: int len) {
631: if (len == 0) {
632: // php/0307
633: return ValueType.LONG_ADD;
634: }
635:
636: int i = offset;
637: int ch = 0;
638: boolean hasPoint = false;
639:
640: if (i < len && ((ch = buffer[i]) == '+' || ch == '-')) {
641: i++;
642: }
643:
644: if (len <= i)
645: return ValueType.STRING;
646:
647: ch = buffer[i];
648:
649: if (ch == '.') {
650: for (i++; i < len && '0' <= (ch = buffer[i]) && ch <= '9'; i++) {
651: return ValueType.DOUBLE_CMP;
652: }
653:
654: return ValueType.STRING;
655: } else if (!('0' <= ch && ch <= '9'))
656: return ValueType.STRING;
657:
658: for (; i < len && '0' <= (ch = buffer[i]) && ch <= '9'; i++) {
659: }
660:
661: if (len <= i)
662: return ValueType.LONG_EQ;
663: else if (ch == '.' || ch == 'e' || ch == 'E') {
664: for (i++; i < len
665: && ('0' <= (ch = buffer[i]) && ch <= '9'
666: || ch == '+' || ch == '-' || ch == 'e' || ch == 'E'); i++) {
667: }
668:
669: if (i < len)
670: return ValueType.STRING;
671: else
672: return ValueType.DOUBLE_CMP;
673: } else
674: return ValueType.STRING;
675: }
676:
677: /**
678: * Converts to a long.
679: */
680: public static long toLong(char[] buffer, int offset, int len) {
681: if (len == 0)
682: return 0;
683:
684: long value = 0;
685: long sign = 1;
686:
687: int i = 0;
688: int end = offset + len;
689:
690: if (buffer[offset] == '-') {
691: sign = -1;
692: offset++;
693: } else if (buffer[offset] == '+') {
694: sign = +1;
695: offset++;
696: }
697:
698: while (offset < end) {
699: int ch = buffer[offset++];
700:
701: if ('0' <= ch && ch <= '9')
702: value = 10 * value + ch - '0';
703: else
704: return sign * value;
705: }
706:
707: return sign * value;
708: }
709:
710: public static double toDouble(char[] buffer, int offset, int len) {
711: int i = offset;
712: int ch = 0;
713:
714: if (i < len && ((ch = buffer[i]) == '+' || ch == '-')) {
715: i++;
716: }
717:
718: for (; i < len && '0' <= (ch = buffer[i]) && ch <= '9'; i++) {
719: }
720:
721: if (ch == '.') {
722: for (i++; i < len && '0' <= (ch = buffer[i]) && ch <= '9'; i++) {
723: }
724:
725: if (i == 1)
726: return 0;
727: }
728:
729: if (ch == 'e' || ch == 'E') {
730: int e = i++;
731:
732: if (i < len && (ch = buffer[i]) == '+' || ch == '-') {
733: i++;
734: }
735:
736: for (; i < len && '0' <= (ch = buffer[i]) && ch <= '9'; i++) {
737: }
738:
739: if (i == e + 1)
740: i = e;
741: }
742:
743: if (i == 0)
744: return 0;
745:
746: try {
747: return Double.parseDouble(new String(buffer, 0, i));
748: } catch (NumberFormatException e) {
749: return 0;
750: }
751: }
752:
753: static {
754: CHAR_STRINGS = new UnicodeBuilderValue[256];
755:
756: for (int i = 0; i < CHAR_STRINGS.length; i++)
757: CHAR_STRINGS[i] = new UnicodeBuilderValue((char) i);
758: }
759: }
|