001: package gnu.xquery.util;
002:
003: import gnu.lists.*;
004: import gnu.mapping.*;
005: import gnu.kawa.functions.NumberCompare;
006: import gnu.kawa.xml.KNode;
007: import gnu.kawa.xml.UntypedAtomic;
008:
009: /** Helper class used in conjunction with {@link OrderedMap}.
010: * It has the tuples from the {@code for} and {@code let}-clauses,
011: * as filtered by the {@code where}-clause.
012: *
013: * The tuples are sorted using a linked-list version of merge sort.
014: *
015: * The sequence of n tuples for m variables is represented using
016: * an array of length n where each element is an array of length m.
017: * A possible future optimization would be to instead use m
018: * different arrays of of length n. The advantage is that each
019: * of the M arrays could have the "correct" type for each variable,
020: * and so we avoid casts or boxing/unboxing.
021: */
022:
023: public class OrderedTuples extends FilterConsumer {
024: /** The number of tuples. */
025: int n;
026:
027: /** The sequence of tuples, in input (unsorted) order. */
028: Object[] tuples; // Actually: Object[][] tuples.
029:
030: /** The compator functions.
031: * If there are k comparator, the array's length is 3*k.
032: * comps[3*i] is the i'th comparison function
033: * (represented as a procedure on a tuple);
034: * comps[3*i+1] is the i'th set of flags encoded as a string;
035: * and comps[3*i+2] is the i'th collator
036: * (either null or a NamedCollator).
037: */
038: Object[] comps;
039:
040: /* The index of the first tuple, after sorting. */
041: int first;
042: /** Used to chain the tuples after sorting.
043: * I.e. if the i'th tuple (is sort order) is tuples[k],
044: * then the (i+1)'the sorted tuple is tuples[next[k]].
045: * The end of the list is indicated by -1.
046: */
047: int[] next;
048:
049: /** The return clause, encoded as a procedure on a tuple. */
050: Procedure body;
051:
052: public void writeObject(Object v) {
053: if (n >= tuples.length) {
054: Object[] tmp = new Object[2 * n];
055: System.arraycopy(tuples, 0, tmp, 0, n);
056: tuples = tmp;
057: }
058: tuples[n++] = v;
059: }
060:
061: OrderedTuples() {
062: super (null);
063: tuples = new Object[10];
064: }
065:
066: public static OrderedTuples make$V(Procedure body, Object[] comps) {
067: OrderedTuples tuples = new OrderedTuples();
068: tuples.comps = comps;
069: tuples.body = body;
070: return tuples;
071: }
072:
073: public void run$X(CallContext ctx) throws Throwable {
074: first = listsort(0);
075: emit(ctx);
076: }
077:
078: void emit(CallContext ctx) throws Throwable {
079: for (int p = first; p >= 0; p = next[p])
080: emit(p, ctx);
081: }
082:
083: void emit(int index, CallContext ctx) throws Throwable {
084: Object[] args = (Object[]) tuples[index];
085: body.checkN(args, ctx);
086: ctx.runUntilDone();
087: }
088:
089: // The following sort routine is derived from Simon Tatham's listsort.c.
090: // However we use array indexes instead of pointers, and the next
091: // element instead of a next field.
092: // I.e. p->next is mapped to next[p].
093: // Instead of NULL we use -1.
094:
095: /*
096: * Demonstration code for sorting a linked list.
097: *
098: * The algorithm used is Mergesort, because that works really well
099: * on linked lists, without requiring the O(N) extra space it needs
100: * when you do it on arrays.
101: */
102:
103: /*
104: * This file is copyright 2001 Simon Tatham.
105: *
106: * Permission is hereby granted, free of charge, to any person
107: * obtaining a copy of this software and associated documentation
108: * files (the "Software"), to deal in the Software without
109: * restriction, including without limitation the rights to use,
110: * copy, modify, merge, publish, distribute, sublicense, and/or
111: * sell copies of the Software, and to permit persons to whom the
112: * Software is furnished to do so, subject to the following
113: * conditions:
114: *
115: * The above copyright notice and this permission notice shall be
116: * included in all copies or substantial portions of the Software.
117: *
118: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
119: * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
120: * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
121: * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
122: * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
123: * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
124: * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
125: * SOFTWARE.
126: */
127:
128: int cmp(int a, int b) throws Throwable {
129: for (int i = 0; i < comps.length; i += 3) {
130: Procedure comparator = (Procedure) comps[i];
131: String flags = (String) comps[i + 1];
132: NamedCollator collator = (NamedCollator) comps[i + 2];
133: if (collator == null)
134: collator = NamedCollator.codepointCollation;
135: Object val1 = comparator.applyN((Object[]) tuples[a]);
136: Object val2 = comparator.applyN((Object[]) tuples[b]);
137: val1 = KNode.atomicValue(val1);
138: val2 = KNode.atomicValue(val2);
139: if (val1 instanceof UntypedAtomic)
140: val1 = val1.toString();
141: if (val2 instanceof UntypedAtomic)
142: val2 = val2.toString();
143: boolean empty1 = SequenceUtils.isEmptySequence(val1);
144: boolean empty2 = SequenceUtils.isEmptySequence(val2);
145: if (empty1 && empty2)
146: continue;
147: int c;
148: if (empty1 || empty2) {
149: char emptyOrder = flags.charAt(1);
150: c = empty1 == (emptyOrder == 'L') ? -1 : 1;
151: } else {
152: boolean isNaN1 = val1 instanceof Number
153: && Double.isNaN(((Number) val1).doubleValue());
154: boolean isNaN2 = val2 instanceof Number
155: && Double.isNaN(((Number) val2).doubleValue());
156: if (isNaN1 && isNaN2)
157: continue;
158: if (isNaN1 || isNaN2) {
159: char emptyOrder = flags.charAt(1);
160: c = isNaN1 == (emptyOrder == 'L') ? -1 : 1;
161: } else if (val1 instanceof Number
162: && val2 instanceof Number)
163: c = NumberCompare.compare(val1, val2, false);
164: else
165: c = collator.compare(val1.toString(), val2
166: .toString());
167: }
168: if (c == 0)
169: continue;
170: return flags.charAt(0) == 'A' ? c : -c;
171: }
172: return 0;
173: }
174:
175: /*
176: * This is the actual sort function. Notice that it returns the new
177: * head of the list. (It has to, because the head will not
178: * generally be the same element after the sort.) So unlike sorting
179: * an array, where you can do
180: *
181: * sort(myarray);
182: *
183: * you now have to do
184: *
185: * list = listsort(mylist);
186: */
187: int listsort(int list) throws Throwable {// indexes
188: int p, q, e, tail, oldhead;
189: int insize, nmerges, psize, qsize, i;
190:
191: /*
192: * Silly special case: if `list' was passed in as NULL, return
193: * NULL immediately.
194: */
195: if (n == 0)
196: return -1;
197:
198: next = new int[n];
199:
200: for (i = 1;; i++) {
201: if (i == n) {
202: next[i - 1] = -1;
203: break;
204: } else
205: next[i - 1] = i;
206: }
207:
208: insize = 1;
209:
210: for (;;) {
211: p = list;
212: list = -1;
213: tail = -1;
214:
215: nmerges = 0; /* count number of merges we do in this pass */
216:
217: while (p >= 0) {
218: nmerges++; /* there exists a merge to be done */
219: /* step `insize' places along from p */
220: q = p;
221: psize = 0;
222: for (i = 0; i < insize; i++) {
223: psize++;
224: q = next[q];
225: if (q < 0)
226: break;
227: }
228: /* if q hasn't fallen off end, we have two lists to merge */
229: qsize = insize;
230:
231: /* now we have two lists; merge them */
232: while (psize > 0 || (qsize > 0 && q >= 0)) {
233:
234: /* decide whether next element of merge comes from p or q */
235: if (psize == 0) {
236: /* p is empty; e must come from q. */
237: e = q;
238: q = next[q];
239: qsize--;
240: } else if (qsize == 0 || q < 0) {
241: /* q is empty; e must come from p. */
242: e = p;
243: p = next[p];
244: psize--;
245: } else if (cmp(p, q) <= 0) {
246: /* First element of p is lower (or same);
247: * e must come from p. */
248: e = p;
249: p = next[p];
250: psize--;
251: } else {
252: /* First element of q is lower; e must come from q. */
253: e = q;
254: q = next[q];
255: qsize--;
256: }
257:
258: /* add the next element to the merged list */
259: if (tail >= 0)
260: next[tail] = e;
261: else
262: list = e;
263: tail = e;
264: }
265:
266: /* now p has stepped `insize' places along, and q has too */
267: p = q;
268: }
269: next[tail] = -1;
270:
271: /* If we have done only one merge, we're finished. */
272: if (nmerges <= 1) /* allow for nmerges==0, the empty list case */
273: return list;
274:
275: /* Otherwise repeat, merging lists twice the size */
276: insize *= 2;
277: }
278: }
279: }
|