001: /*
002:
003: Derby - Class org.apache.derby.impl.sql.compile.IntersectNode
004:
005: Licensed to the Apache Software Foundation (ASF) under one or more
006: contributor license agreements. See the NOTICE file distributed with
007: this work for additional information regarding copyright ownership.
008: The ASF licenses this file to you under the Apache License, Version 2.0
009: (the "License"); you may not use this file except in compliance with
010: the License. You may obtain a copy of the License at
011:
012: http://www.apache.org/licenses/LICENSE-2.0
013:
014: Unless required by applicable law or agreed to in writing, software
015: distributed under the License is distributed on an "AS IS" BASIS,
016: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017: See the License for the specific language governing permissions and
018: limitations under the License.
019:
020: */
021:
022: package org.apache.derby.impl.sql.compile;
023:
024: import org.apache.derby.iapi.reference.ClassName;
025:
026: import org.apache.derby.iapi.services.sanity.SanityManager;
027: import org.apache.derby.iapi.services.classfile.VMOpcode;
028: import org.apache.derby.iapi.services.compiler.MethodBuilder;
029: import org.apache.derby.iapi.services.context.ContextManager;
030:
031: import org.apache.derby.iapi.error.StandardException;
032:
033: import org.apache.derby.iapi.sql.compile.NodeFactory;
034: import org.apache.derby.iapi.sql.compile.Optimizable;
035: import org.apache.derby.iapi.sql.compile.OptimizablePredicate;
036: import org.apache.derby.iapi.sql.compile.OptimizablePredicateList;
037: import org.apache.derby.iapi.sql.compile.Optimizer;
038: import org.apache.derby.iapi.sql.compile.CostEstimate;
039: import org.apache.derby.iapi.sql.compile.RowOrdering;
040: import org.apache.derby.iapi.sql.compile.C_NodeTypes;
041:
042: import org.apache.derby.iapi.sql.dictionary.ConglomerateDescriptor;
043:
044: import org.apache.derby.iapi.reference.SQLState;
045:
046: import org.apache.derby.iapi.types.DataTypeDescriptor;
047:
048: import org.apache.derby.iapi.util.JBitSet;
049: import org.apache.derby.iapi.util.ReuseFactory;
050:
051: import java.sql.Types;
052:
053: import java.util.BitSet;
054:
055: /**
056: * A IntersectOrExceptNode represents an INTERSECT or EXCEPT DML statement.
057: *
058: * @author Jack Klebanoff
059: */
060:
061: public class IntersectOrExceptNode extends SetOperatorNode {
062: /* Currently we implement INTERSECT and EXCEPT by rewriting
063: * t1 (INTERSECT|EXCEPT) [ALL] t2
064: * as (roughly)
065: * setOpResultSet( opType, all, (select * from t1 order by 1,2,...n), (select * from t2 ORDER BY 1,2,...,n))
066: * where n is the number of columns in t1 (which must be the same as the number of columns in t2),
067: * and opType is INTERSECT, or EXCEPT.
068: *
069: * The setOpResultSet result set simultaneously scans through its two ordered inputs and
070: * performs the intersect or except.
071: *
072: * There are other query plans that may be more efficient, depending on the sizes. One plan is
073: * to make a hash table from one of the input tables and then look up each row of the other input
074: * table in the hash table. However, we have not yet implemented spilling to disk in the
075: * BackingStoreHashtable class: currently the whole hash table is in RAM. If we were to use it
076: * we would blow up on large input tables.
077: */
078:
079: private int opType;
080: public static final int INTERSECT_OP = 1;
081: public static final int EXCEPT_OP = 2;
082:
083: /* Only optimize it once */
084: /* Only call addNewNodes() once */
085: private boolean addNewNodesCalled;
086:
087: private int[] intermediateOrderByColumns; // The input result sets will be ordered on these columns. 0 indexed
088: private int[] intermediateOrderByDirection; // ascending = 1, descending = -1
089:
090: /**
091: * Initializer for a SetOperatorNode.
092: *
093: * @param leftResult The ResultSetNode on the left side of this union
094: * @param rightResult The ResultSetNode on the right side of this union
095: * @param all Whether or not this is an ALL.
096: * @param tableProperties Properties list associated with the table
097: *
098: * @exception StandardException Thrown on error
099: */
100:
101: public void init(Object opType, Object leftResult,
102: Object rightResult, Object all, Object tableProperties)
103: throws StandardException {
104: super .init(leftResult, rightResult, all, tableProperties);
105: this .opType = ((Integer) opType).intValue();
106: }
107:
108: private int getOpType() {
109: return opType;
110: }
111:
112: /**
113: * Push order by lists down to the children so that we can implement the intersect/except
114: * by scan of the two sorted inputs.
115: *
116: * @param numTables Number of tables in the DML Statement
117: * @param gbl The group by list, if any
118: * @param fromList The from list, if any
119: *
120: * @return The preprocessed ResultSetNode that can be optimized
121: *
122: * @exception StandardException Thrown on error
123: */
124:
125: public ResultSetNode preprocess(int numTables, GroupByList gbl,
126: FromList fromList) throws StandardException {
127: // RESOLVE: We are in a quandary as to when and how we should generate order by lists. SelectNode processing
128: // requires order by lists at the start of preprocess. That is why we are doing it here. However we can
129: // pick any column ordering. Depending on the child expressions the optimizer may be able to avoid a
130: // sort if we pick the right column ordering. For instance if one of the child expressions is
131: // "select <key columns>, <other expressions> from T" where there is a unique index on <key columns>
132: // then we can just generate an order by on the key columns and the optimizer should use the unique index
133: // to produce the sorted result set. However the ResultSetNode class does not make it easy to
134: // find the structure of the query expression. Furthermore we most want to avoid a sort on the larger
135: // input, but the size estimate is not available at preprocess time.
136:
137: intermediateOrderByColumns = new int[getResultColumns().size()];
138: intermediateOrderByDirection = new int[intermediateOrderByColumns.length];
139: /* If there is an order by on the result of the intersect then use that because we know that doing so
140: * will avoid a sort. If the output of the intersect/except is small relative to its inputs then in some
141: * cases it would be better to sort the inputs on a different sequence of columns, but it is hard to analyze
142: * the input query expressions to see if a sort can be avoided.
143: */
144: if (orderByList != null) {
145: BitSet colsOrdered = new BitSet(
146: intermediateOrderByColumns.length);
147: int orderByListSize = orderByList.size();
148: int intermediateOrderByIdx = 0;
149: for (int i = 0; i < orderByListSize; i++) {
150: if (colsOrdered.get(i))
151: continue;
152: OrderByColumn orderByColumn = orderByList
153: .getOrderByColumn(i);
154: intermediateOrderByDirection[intermediateOrderByIdx] = orderByColumn
155: .isAscending() ? 1 : -1;
156: int columnIdx = orderByColumn.getResultColumn()
157: .getColumnPosition() - 1;
158: intermediateOrderByColumns[intermediateOrderByIdx] = columnIdx;
159: colsOrdered.set(columnIdx);
160: intermediateOrderByIdx++;
161: }
162: for (int i = 0; i < intermediateOrderByColumns.length; i++) {
163: if (!colsOrdered.get(i)) {
164: intermediateOrderByDirection[intermediateOrderByIdx] = 1;
165: intermediateOrderByColumns[intermediateOrderByIdx] = i;
166: intermediateOrderByIdx++;
167: }
168: }
169: orderByList = null; // It will be pushed down.
170: } else // The output of the intersect/except does not have to be ordered
171: {
172: // Pick an intermediate ordering that minimizes the cost.
173: // RESOLVE: how do you do that?
174: for (int i = 0; i < intermediateOrderByColumns.length; i++) {
175: intermediateOrderByDirection[i] = 1;
176: intermediateOrderByColumns[i] = i;
177: }
178: }
179: pushOrderingDown(leftResultSet);
180: pushOrderingDown(rightResultSet);
181:
182: return super .preprocess(numTables, gbl, fromList);
183: } // end of preprocess
184:
185: private void pushOrderingDown(ResultSetNode rsn)
186: throws StandardException {
187: ContextManager cm = getContextManager();
188: NodeFactory nf = getNodeFactory();
189: OrderByList orderByList = (OrderByList) nf.getNode(
190: C_NodeTypes.ORDER_BY_LIST, cm);
191: for (int i = 0; i < intermediateOrderByColumns.length; i++) {
192: OrderByColumn orderByColumn = (OrderByColumn) nf
193: .getNode(
194: C_NodeTypes.ORDER_BY_COLUMN,
195: nf
196: .getNode(
197: C_NodeTypes.INT_CONSTANT_NODE,
198: ReuseFactory
199: .getInteger(intermediateOrderByColumns[i] + 1),
200: cm), cm);
201: if (intermediateOrderByDirection[i] < 0)
202: orderByColumn.setDescending();
203: orderByList.addOrderByColumn(orderByColumn);
204: }
205: orderByList.bindOrderByColumns(rsn);
206: rsn.pushOrderByList(orderByList);
207: } // end of pushOrderingDown
208:
209: /**
210: * @see org.apache.derby.iapi.sql.compile.Optimizable#estimateCost
211: */
212: public CostEstimate estimateCost(OptimizablePredicateList predList,
213: ConglomerateDescriptor cd, CostEstimate outerCost,
214: Optimizer optimizer, RowOrdering rowOrdering)
215: throws StandardException {
216: leftResultSet = optimizeSource(optimizer, leftResultSet,
217: (PredicateList) null, outerCost);
218:
219: rightResultSet = optimizeSource(optimizer, rightResultSet,
220: (PredicateList) null, outerCost);
221:
222: CostEstimate costEstimate = getCostEstimate(optimizer);
223: CostEstimate leftCostEstimate = leftResultSet.getCostEstimate();
224: CostEstimate rightCostEstimate = rightResultSet
225: .getCostEstimate();
226: // The cost is the sum of the two child costs plus the cost of sorting the union.
227: costEstimate.setCost(leftCostEstimate.getEstimatedCost()
228: + rightCostEstimate.getEstimatedCost(),
229: getRowCountEstimate(leftCostEstimate.rowCount(),
230: rightCostEstimate.rowCount()),
231: getSingleScanRowCountEstimate(leftCostEstimate
232: .singleScanRowCount(), rightCostEstimate
233: .singleScanRowCount()));
234:
235: return costEstimate;
236: } // End of estimateCost
237:
238: /**
239: * @see Optimizable#modifyAccessPath
240: *
241: * @exception StandardException Thrown on error
242: */
243: public Optimizable modifyAccessPath(JBitSet outerTables)
244: throws StandardException {
245: Optimizable retOptimizable;
246: retOptimizable = super .modifyAccessPath(outerTables);
247:
248: /* We only want call addNewNodes() once */
249: if (addNewNodesCalled) {
250: return retOptimizable;
251: }
252: return (Optimizable) addNewNodes();
253: }
254:
255: /**
256: * @see ResultSetNode#modifyAccessPaths
257: *
258: * @exception StandardException Thrown on error
259: */
260: public ResultSetNode modifyAccessPaths() throws StandardException {
261: ResultSetNode retRSN;
262: retRSN = super .modifyAccessPaths();
263:
264: /* We only want call addNewNodes() once */
265: if (addNewNodesCalled) {
266: return retRSN;
267: }
268: return addNewNodes();
269: }
270:
271: /**
272: * Add any new ResultSetNodes that are necessary to the tree.
273: * We wait until after optimization to do this in order to
274: * make it easier on the optimizer.
275: *
276: * @return (Potentially new) head of the ResultSetNode tree.
277: *
278: * @exception StandardException Thrown on error
279: */
280: private ResultSetNode addNewNodes() throws StandardException {
281: /* Only call addNewNodes() once */
282: if (addNewNodesCalled) {
283: return this ;
284: }
285:
286: addNewNodesCalled = true;
287:
288: if (orderByList == null)
289: return this ;
290: // Generate an order by node on top of the intersect/except
291: return (ResultSetNode) getNodeFactory().getNode(
292: C_NodeTypes.ORDER_BY_NODE, this , orderByList,
293: tableProperties, getContextManager());
294: } // end of addNewNodes
295:
296: /**
297: * Generate the code.
298: *
299: * @exception StandardException Thrown on error
300: */
301: public void generate(ActivationClassBuilder acb, MethodBuilder mb)
302: throws StandardException {
303:
304: /* Get the next ResultSet #, so that we can number this ResultSetNode, its
305: * ResultColumnList and ResultSet.
306: */
307: assignResultSetNumber();
308:
309: // Get our final cost estimate based on the child estimates.
310: costEstimate = getFinalCostEstimate();
311:
312: // build up the tree.
313:
314: /* Generate the SetOpResultSet. Arguments:
315: * 1) expression for left child ResultSet
316: * 2) expression for right child ResultSet
317: * 3) activation
318: * 4) resultSetNumber
319: * 5) estimated row count
320: * 6) estimated cost
321: * 7) opType
322: * 8) all
323: * 9) close method
324: * 10) intermediateOrderByColumns saved object index
325: * 11) intermediateOrderByDirection saved object index
326: */
327:
328: acb.pushGetResultSetFactoryExpression(mb); // instance for getSetOpResultSet
329:
330: getLeftResultSet().generate(acb, mb);
331: getRightResultSet().generate(acb, mb);
332:
333: acb.pushThisAsActivation(mb);
334: mb.push(resultSetNumber);
335: mb.push(costEstimate.getEstimatedRowCount());
336: mb.push(costEstimate.getEstimatedCost());
337: mb.push(getOpType());
338: mb.push(all);
339: mb.push(getCompilerContext().addSavedObject(
340: intermediateOrderByColumns));
341: mb.push(getCompilerContext().addSavedObject(
342: intermediateOrderByDirection));
343:
344: mb.callMethod(VMOpcode.INVOKEINTERFACE, (String) null,
345: "getSetOpResultSet", ClassName.NoPutResultSet, 10);
346: } // end of generate
347:
348: /**
349: * @see ResultSetNode#getFinalCostEstimate
350: *
351: * Get the final CostEstimate for this IntersectOrExceptNode.
352: *
353: * @return The final CostEstimate for this IntersectOrExceptNode,
354: * which is the sum of the two child costs. The final number of
355: * rows depends on whether this is an INTERSECT or EXCEPT (see
356: * getRowCountEstimate() in this class for more).
357: */
358: public CostEstimate getFinalCostEstimate() throws StandardException {
359: if (finalCostEstimate != null)
360: return finalCostEstimate;
361:
362: CostEstimate leftCE = leftResultSet.getFinalCostEstimate();
363: CostEstimate rightCE = rightResultSet.getFinalCostEstimate();
364:
365: finalCostEstimate = getNewCostEstimate();
366: finalCostEstimate.setCost(leftCE.getEstimatedCost()
367: + rightCE.getEstimatedCost(), getRowCountEstimate(
368: leftCE.rowCount(), rightCE.rowCount()),
369: getSingleScanRowCountEstimate(leftCE
370: .singleScanRowCount(), rightCE
371: .singleScanRowCount()));
372:
373: return finalCostEstimate;
374: }
375:
376: String getOperatorName() {
377: switch (opType) {
378: case INTERSECT_OP:
379: return "INTERSECT";
380:
381: case EXCEPT_OP:
382: return "EXCEPT";
383: }
384: if (SanityManager.DEBUG)
385: SanityManager
386: .THROWASSERT("Invalid intersectOrExcept opType: "
387: + opType);
388: return "?";
389: }
390:
391: double getRowCountEstimate(double leftRowCount, double rightRowCount) {
392: switch (opType) {
393: case INTERSECT_OP:
394: // The result has at most min( leftRowCount, rightRowCount). Estimate the actual row count at
395: // half that.
396: return Math.min(leftRowCount, rightRowCount) / 2;
397:
398: case EXCEPT_OP:
399: // The result has at most leftRowCount rows and at least
400: // max(0, leftRowCount - rightRowCount) rows. Use the mean
401: // of those two as the estimate.
402: return (leftRowCount + Math.max(0, leftRowCount
403: - rightRowCount)) / 2;
404: }
405: if (SanityManager.DEBUG)
406: SanityManager
407: .THROWASSERT("Invalid intersectOrExcept opType: "
408: + opType);
409: return 1.0;
410: } // end of getRowCountEstimate
411:
412: double getSingleScanRowCountEstimate(double leftSingleScanRowCount,
413: double rightSingleScanRowCount) {
414: return getRowCountEstimate(leftSingleScanRowCount,
415: rightSingleScanRowCount);
416: }
417: }
|