001: package net.sf.saxon.sort;
002:
003: import net.sf.saxon.expr.Expression;
004: import net.sf.saxon.expr.LastPositionFinder;
005: import net.sf.saxon.expr.XPathContext;
006: import net.sf.saxon.om.Item;
007: import net.sf.saxon.om.ListIterator;
008: import net.sf.saxon.om.SequenceIterator;
009: import net.sf.saxon.om.LookaheadIterator;
010: import net.sf.saxon.trace.Location;
011: import net.sf.saxon.trans.XPathException;
012: import net.sf.saxon.value.AtomicValue;
013:
014: import java.util.ArrayList;
015: import java.util.Comparator;
016: import java.util.HashMap;
017: import java.util.List;
018:
019: /**
020: * A GroupByIterator iterates over a sequence of groups defined by
021: * xsl:for-each-group group-by="x". The groups are returned in
022: * order of first appearance. Note that an item can appear in several groups;
023: * indeed, an item may be the leading item of more than one group, which means
024: * that knowing the leading item is not enough to know the current group.
025: *
026: * <p>The GroupByIterator acts as a SequenceIterator, where successive calls of
027: * next() return the leading item of each group in turn. The current item of
028: * the iterator is therefore the leading item of the current group. To get access
029: * to all the members of the current group, the method iterateCurrentGroup() is used;
030: * this underpins the current-group() function in XSLT. The grouping key for the
031: * current group is available via the getCurrentGroupingKey() method.</p>
032: */
033:
034: public class GroupByIterator implements GroupIterator,
035: LastPositionFinder, LookaheadIterator {
036:
037: // The implementation of group-by is not pipelined. All the items in the population
038: // are read at the start, their grouping keys are calculated, and the groups are formed
039: // in memory as a hash table indexed by the grouping key. This hash table is then
040: // flattened into three parallel lists: a list of groups (each group being represented
041: // as a list of items in population order), a list of grouping keys, and a list of
042: // the initial items of the groups.
043:
044: private SequenceIterator population;
045: private Expression keyExpression;
046: private Comparator collator;
047: private XPathContext keyContext;
048: private int position = 0;
049:
050: // Main data structure holds one entry for each group. The entry is also an ArrayList,
051: // which contains the Items that are members of the group, in population order.
052: // The groups are arranged in order of first appearance within the population.
053: private ArrayList groups = new ArrayList(40);
054:
055: // This parallel structure identifies the grouping key for each group. The list
056: // corresponds one-to-one with the list of groups.
057: private ArrayList groupKeys = new ArrayList(40);
058:
059: // For convenience (so that we can use a standard ArrayListIterator) we define
060: // another parallel array holding the initial items of each group.
061: private ArrayList initialItems = new ArrayList(40);
062:
063: // An AtomicSortComparer is used to do the comparisons
064: private AtomicSortComparer comparer;
065:
066: /**
067: * Create a GroupByIterator
068: * @param population iterator over the population to be grouped
069: * @param keyExpression the expression used to calculate the grouping key
070: * @param keyContext dynamic context for calculating the grouping key
071: * @param collator Collation to be used for comparing grouping keys
072: * @throws XPathException
073: */
074:
075: public GroupByIterator(SequenceIterator population,
076: Expression keyExpression, XPathContext keyContext,
077: Comparator collator) throws XPathException {
078: this .population = population;
079: this .keyExpression = keyExpression;
080: this .keyContext = keyContext;
081: this .collator = collator;
082: this .comparer = new AtomicSortComparer(collator, keyContext);
083: buildIndexedGroups();
084: }
085:
086: /**
087: * Build the grouping table forming groups of items with equal keys.
088: * This form of grouping allows a member of the population to be present in zero
089: * or more groups, one for each value of the grouping key.
090: */
091:
092: private void buildIndexedGroups() throws XPathException {
093: HashMap index = new HashMap(40);
094: XPathContext c2 = keyContext.newMinorContext();
095: c2.setCurrentIterator(population);
096: c2.setOriginatingConstructType(Location.GROUPING_KEY);
097: while (true) {
098: Item item = population.next();
099: if (item == null) {
100: break;
101: }
102: SequenceIterator keys = keyExpression.iterate(c2);
103: boolean firstKey = true;
104: while (true) {
105: AtomicValue key = (AtomicValue) keys.next();
106: if (key == null) {
107: break;
108: }
109: AtomicSortComparer.ComparisonKey comparisonKey = comparer
110: .getComparisonKey(key);
111: ArrayList g = (ArrayList) index.get(comparisonKey);
112: if (g == null) {
113: ArrayList newGroup = new ArrayList(20);
114: newGroup.add(item);
115: groups.add(newGroup);
116: groupKeys.add(key);
117: initialItems.add(item);
118: index.put(comparisonKey, newGroup);
119: } else {
120: if (firstKey) {
121: g.add(item);
122: } else {
123: // if this is not the first key value for this item, we
124: // check whether the item is already in this group before
125: // adding it again. If it is in this group, then we know
126: // it will be at the end.
127: if (g.get(g.size() - 1) != item) {
128: g.add(item);
129: }
130: }
131: }
132: firstKey = false;
133: }
134: }
135: }
136:
137: /**
138: * Get the value of the grouping key for the current group
139: * @return the grouping key
140: */
141:
142: public AtomicValue getCurrentGroupingKey() {
143: return (AtomicValue) groupKeys.get(position - 1);
144: }
145:
146: /**
147: * Get an iterator over the items in the current group
148: * @return the iterator
149: */
150:
151: public SequenceIterator iterateCurrentGroup() {
152: return new ListIterator((ArrayList) groups.get(position - 1));
153: }
154:
155: /**
156: * Get the contents of the current group as a java List
157: * @return the contents of the current group
158: */
159:
160: public List getCurrentGroup() {
161: return (ArrayList) groups.get(position - 1);
162: }
163:
164: public boolean hasNext() {
165: return position < groups.size();
166: }
167:
168: public Item next() throws XPathException {
169: if (position < groups.size()) {
170: position++;
171: return current();
172: } else {
173: position = -1;
174: return null;
175: }
176: }
177:
178: public Item current() {
179: if (position < 1) {
180: return null;
181: }
182: // return the initial item of the current group
183: return (Item) ((ArrayList) groups.get(position - 1)).get(0);
184: }
185:
186: public int position() {
187: return position;
188: }
189:
190: public SequenceIterator getAnother() throws XPathException {
191: XPathContext c2 = keyContext.newMinorContext();
192: c2.setOriginatingConstructType(Location.GROUPING_KEY);
193: return new GroupByIterator(population.getAnother(),
194: keyExpression, c2, collator);
195: }
196:
197: /**
198: * Get properties of this iterator, as a bit-significant integer.
199: *
200: * @return the properties of this iterator. This will be some combination of
201: * properties such as {@link GROUNDED}, {@link LAST_POSITION_FINDER},
202: * and {@link LOOKAHEAD}. It is always
203: * acceptable to return the value zero, indicating that there are no known special properties.
204: * It is acceptable for the properties of the iterator to change depending on its state.
205: */
206:
207: public int getProperties() {
208: return LAST_POSITION_FINDER | LOOKAHEAD;
209: }
210:
211: /**
212: * Get the last position (that is, the number of groups)
213: */
214:
215: public int getLastPosition() throws XPathException {
216: return groups.size();
217: }
218:
219: }
220:
221: //
222: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
223: // you may not use this file except in compliance with the License. You may obtain a copy of the
224: // License at http://www.mozilla.org/MPL/
225: //
226: // Software distributed under the License is distributed on an "AS IS" basis,
227: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
228: // See the License for the specific language governing rights and limitations under the License.
229: //
230: // The Original Code is: all this file.
231: //
232: // The Initial Developer of the Original Code is Michael H. Kay
233: //
234: // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
235: //
236: // Contributor(s): none
237: //
|