001: /*
002: * (c) Copyright 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
003: * [See end of file]
004: */
005:
006: package com.hp.hpl.jena.util.tuple;
007:
008: import java.io.*;
009: import java.util.*;
010:
011: import org.apache.commons.logging.Log;
012: import org.apache.commons.logging.LogFactory;
013:
014: /**
015: * @author Andy Seaborne
016: * @version $Id: TupleSet.java,v 1.9 2008/01/02 12:10:59 andy_seaborne Exp $
017: */
018:
019: public class TupleSet implements Iterator {
020: BufferedReader in;
021: public String line = null;
022: public int lineNumber = 0;
023:
024: static final char COMMENTCHAR = '#';
025: List current = null;
026: boolean finished = false;
027:
028: protected static Log logger = LogFactory.getLog(TupleSet.class);
029:
030: /** Creates new TupleSet */
031: public TupleSet(Reader r) {
032: if (!(r instanceof BufferedReader))
033: in = new BufferedReader(r);
034: else
035: in = (BufferedReader) r;
036: }
037:
038: public boolean hasNext() {
039: if (finished)
040: return false;
041:
042: if (current == null)
043: current = tuple();
044: return current != null;
045: }
046:
047: public Object next() {
048: if (hasNext()) {
049: List x = current;
050: current = null;
051: return x;
052: } else
053: return null;
054: }
055:
056: public void remove() {
057: throw new java.lang.UnsupportedOperationException(
058: "TupleSet.remove");
059: }
060:
061: private List tuple() {
062:
063: try {
064: lineNumber++;
065: line = in.readLine();
066: } catch (IOException e) {
067: }
068:
069: if (line == null) {
070: finished = true;
071: return null;
072: }
073:
074: //System.out.println("Line: "+line) ;
075: List tuple = new ArrayList();
076: int i = 0;
077: int j = 0;
078: boolean errorFound = false;
079:
080: tupleLoop: for (;;) {
081: // Move to beginning of next item.
082: i = skipwhitespace(line, j);
083:
084: if (i < 0)
085: break;
086:
087: int iStart = -2; // Points to the beginning of the item as found
088: int jStart = -2; // Points to the item without quotes
089: int iFinish = -2; // Points after the end of the item as found
090: int jFinish = -2; // Points after the end of the item without quotes
091: int dtStart = -2; // Points to start of datatype (after < quote)
092: int dtFinish = -2; // Points to end of datatype
093: int type = TupleItem.UNKNOWN;
094:
095: switch (line.charAt(i)) {
096: case COMMENTCHAR:
097: break tupleLoop;
098: case '<':
099: type = TupleItem.URI;
100: iStart = i;
101: jStart = i + 1;
102: int newPosn = parseURI(i, line);
103: if (newPosn < 0) {
104: errorFound = true;
105: break tupleLoop;
106: }
107: j = newPosn;
108:
109: iFinish = j + 1;
110: jFinish = j;
111: break;
112: case '"':
113: type = TupleItem.STRING;
114: iStart = i;
115: jStart = i + 1;
116: boolean inEscape = false;
117: for (j = i + 1; j < line.length(); j++) {
118: char ch = line.charAt(j);
119: if (inEscape) {
120: // ToDo: escape
121: inEscape = false;
122: continue;
123: }
124: // Not an escape
125: if (ch == '"')
126: break;
127:
128: if (ch == '\\')
129: inEscape = true;
130: if (ch == '\n' || ch == '\r') {
131: errorFound = true;
132: break tupleLoop;
133:
134: }
135: }
136:
137: // Malformed
138: if (j == line.length()) {
139: errorFound = true;
140: break tupleLoop;
141: }
142:
143: iFinish = j + 1;
144: jFinish = j;
145: // RDF literals may be followed by their type.
146:
147: if (j < line.length() - 3 && line.charAt(j + 1) == '^'
148: && line.charAt(j + 2) == '^'
149: && line.charAt(j + 3) == '<') {
150: dtFinish = parseURI(j + 3, line);
151: dtStart = j + 4;
152: if (dtFinish < 0) {
153: errorFound = true;
154: break tupleLoop;
155: }
156: j = dtFinish + 1;
157: //String dt = line.substring(dtStart, dtFinish) ;
158: //System.out.println("I see a datatype:"+dt) ;
159: }
160:
161: break;
162: case '_':
163: type = TupleItem.ANON;
164: iStart = i;
165: for (j = i + 1; j < line.length(); j++) {
166: char ch = line.charAt(j);
167: if (ch == ' ' || ch == '\t' || ch == '.')
168: break;
169: if (!Character.isLetterOrDigit(ch) && !(ch == '_')
170: && !(ch == ':')) {
171: errorFound = true;
172: break tupleLoop;
173: }
174: }
175: iFinish = j;
176: jStart = iStart;
177: jFinish = iFinish;
178: break;
179: case '.':
180: case '\n':
181: case '\r':
182: return tuple;
183: default:
184: type = TupleItem.UNQUOTED;
185: iStart = i;
186: jStart = i;
187: for (j = i + 1; j < line.length(); j++) {
188: char ch = line.charAt(j);
189: if (ch == ' ' || ch == '\t' || ch == '.')
190: break;
191:
192: //if ( ! Character.isLetterOrDigit(line.charAt(i)) )
193: //{
194: // errorFound = true ;
195: // break tupleLoop;
196: //}
197: }
198: // Malformed
199: if (j == line.length() + 1) {
200: errorFound = true;
201: break tupleLoop;
202: }
203: iFinish = j;
204: jFinish = j;
205: break;
206: }
207: String item = line.substring(jStart, jFinish);
208: String literal = line.substring(iStart, iFinish);
209: String dt = null;
210: if (dtStart > 0)
211: dt = line.substring(dtStart, dtFinish);
212:
213: tuple.add(new TupleItem(item, literal, type, dt));
214: j++;
215: // End of item.
216: }
217: //End of this line.
218: if (errorFound) {
219: logger.error("Error in TupleSet.tuple: " + line);
220:
221: String s = "";
222: int k = 0;
223: for (; k < i; k++)
224: s = s + " ";
225: s = s + "^";
226: for (; k < j - 1; k++)
227: s = s + " ";
228: s = s + "^";
229: logger.error(s);
230: return null;
231: }
232:
233: if (tuple.size() == 0) {
234: // Nothing found : loop by tail recursion
235: return tuple();
236: }
237: return tuple;
238: }
239:
240: private int skipwhitespace(String s, int i) {
241: for (; i < s.length(); i++) {
242: char ch = s.charAt(i);
243: // Horizonal whitespace
244: if (ch != ' ' && ch != '\t')
245: return i;
246: }
247: return -1;
248: }
249:
250: private int parseURI(int i, String line) {
251: int j;
252: for (j = i + 1; j < line.length(); j++) {
253: char ch = line.charAt(j);
254: if (ch == '>')
255: break;
256: if (ch == '\n' || ch == '\r')
257: return -1;
258: }
259: // Malformed
260: if (j == line.length())
261: return -2;
262: return j;
263: }
264: }
265:
266: /*
267: * (c) Copyright 2003, 2004, 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
268: * All rights reserved.
269: *
270: * Redistribution and use in source and binary forms, with or without
271: * modification, are permitted provided that the following conditions
272: * are met:
273: * 1. Redistributions of source code must retain the above copyright
274: * notice, this list of conditions and the following disclaimer.
275: * 2. Redistributions in binary form must reproduce the above copyright
276: * notice, this list of conditions and the following disclaimer in the
277: * documentation and/or other materials provided with the distribution.
278: * 3. The name of the author may not be used to endorse or promote products
279: * derived from this software without specific prior written permission.
280: *
281: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
282: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
283: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
284: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
285: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
286: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
287: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
288: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
289: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
290: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
291: */
|