001: /*
002:
003: Derby - Class org.apache.derby.impl.sql.compile.HashJoinStrategy
004:
005: Licensed to the Apache Software Foundation (ASF) under one or more
006: contributor license agreements. See the NOTICE file distributed with
007: this work for additional information regarding copyright ownership.
008: The ASF licenses this file to you under the Apache License, Version 2.0
009: (the "License"); you may not use this file except in compliance with
010: the License. You may obtain a copy of the License at
011:
012: http://www.apache.org/licenses/LICENSE-2.0
013:
014: Unless required by applicable law or agreed to in writing, software
015: distributed under the License is distributed on an "AS IS" BASIS,
016: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017: See the License for the specific language governing permissions and
018: limitations under the License.
019:
020: */
021:
022: package org.apache.derby.impl.sql.compile;
023:
024: import org.apache.derby.iapi.sql.compile.CostEstimate;
025: import org.apache.derby.iapi.sql.compile.ExpressionClassBuilderInterface;
026: import org.apache.derby.iapi.sql.compile.JoinStrategy;
027: import org.apache.derby.iapi.sql.compile.Optimizable;
028: import org.apache.derby.iapi.sql.compile.Optimizer;
029: import org.apache.derby.iapi.sql.compile.OptimizablePredicate;
030: import org.apache.derby.iapi.sql.compile.OptimizablePredicateList;
031:
032: import org.apache.derby.iapi.sql.dictionary.DataDictionary;
033: import org.apache.derby.iapi.sql.dictionary.ConglomerateDescriptor;
034:
035: import org.apache.derby.iapi.store.access.StoreCostController;
036: import org.apache.derby.iapi.store.access.TransactionController;
037:
038: import org.apache.derby.iapi.services.compiler.MethodBuilder;
039:
040: import org.apache.derby.impl.sql.compile.ExpressionClassBuilder;
041: import org.apache.derby.impl.sql.compile.ProjectRestrictNode;
042: import org.apache.derby.impl.sql.compile.Predicate;
043:
044: import org.apache.derby.iapi.error.StandardException;
045:
046: import org.apache.derby.iapi.reference.SQLState;
047:
048: import org.apache.derby.iapi.services.cache.ClassSize;
049:
050: import org.apache.derby.iapi.services.sanity.SanityManager;
051:
052: import org.apache.derby.iapi.services.io.FormatableArrayHolder;
053: import org.apache.derby.iapi.services.io.FormatableIntHolder;
054:
055: import org.apache.derby.iapi.util.JBitSet;
056:
057: import java.util.Vector;
058:
059: public class HashJoinStrategy extends BaseJoinStrategy {
060: public HashJoinStrategy() {
061: }
062:
063: /**
064: * @see JoinStrategy#feasible
065: *
066: * @exception StandardException Thrown on error
067: */
068: public boolean feasible(Optimizable innerTable,
069: OptimizablePredicateList predList, Optimizer optimizer)
070: throws StandardException {
071: int[] hashKeyColumns = null;
072:
073: ConglomerateDescriptor cd = null;
074:
075: /* If the innerTable is a VTI, then we
076: * must check to see if there are any
077: * join columns in the VTI's parameters.
078: * If so, then hash join is not feasible.
079: */
080: if (!innerTable.isMaterializable()) {
081:
082: optimizer.trace(Optimizer.HJ_SKIP_NOT_MATERIALIZABLE, 0, 0,
083: 0.0, null);
084: return false;
085: }
086:
087: /* Don't consider hash join on the target table of an update/delete.
088: * RESOLVE - this is a temporary restriction. Problem is that we
089: * do not put RIDs into the row in the hash table when scanning
090: * the heap and we need them for a target table.
091: */
092: if (innerTable.isTargetTable()) {
093: return false;
094: }
095:
096: /* If the predicate given by the user _directly_ references
097: * any of the base tables _beneath_ this node, then we
098: * cannot safely use the predicate for a hash because the
099: * predicate correlates two nodes at different nesting levels.
100: * If we did a hash join in this case, materialization of
101: * innerTable could lead to incorrect results--and in particular,
102: * results that are missing rows. We can check for this by
103: * looking at the predicates' reference maps, which are set based
104: * on the initial query (as part of pre-processing). Note that
105: * by the time we get here, it's possible that a predicate's
106: * reference map holds table numbers that do not agree with the
107: * table numbers of the column references used by the predicate.
108: * That's okay--this occurs as a result of "remapping" predicates
109: * that have been pushed down the query tree. And in fact
110: * it's a good thing because, by looking at the column reference's
111: * own table numbers instead of the predicate's referenced map,
112: * we are more readily able to find equijoin predicates that
113: * we otherwise would not have found.
114: *
115: * Note: do not perform this check if innerTable is a FromBaseTable
116: * because a base table does not have a "subtree" to speak of.
117: */
118: if ((predList != null) && (predList.size() > 0)
119: && !(innerTable instanceof FromBaseTable)) {
120: FromTable ft = (FromTable) innerTable;
121:
122: // First get a list of all of the base tables in the subtree
123: // below innerTable.
124: JBitSet tNums = new JBitSet(ft.getReferencedTableMap()
125: .size());
126: BaseTableNumbersVisitor btnVis = new BaseTableNumbersVisitor(
127: tNums);
128: ft.accept(btnVis);
129:
130: // Now get a list of all table numbers referenced by the
131: // join predicates that we'll be searching.
132: JBitSet pNums = new JBitSet(tNums.size());
133: Predicate pred = null;
134: for (int i = 0; i < predList.size(); i++) {
135: pred = (Predicate) predList.getOptPredicate(i);
136: if (pred.isJoinPredicate())
137: pNums.or(pred.getReferencedSet());
138: }
139:
140: // If tNums and pNums have anything in common, then at
141: // least one predicate in the list refers directly to
142: // a base table beneath this node (as opposed to referring
143: // just to this node), which means it's not safe to do a
144: // hash join.
145: tNums.and(pNums);
146: if (tNums.getFirstSetBit() != -1)
147: return false;
148: }
149:
150: if (innerTable.isBaseTable()) {
151: /* Must have an equijoin on a column in the conglomerate */
152: cd = innerTable.getCurrentAccessPath()
153: .getConglomerateDescriptor();
154: }
155:
156: /* Look for equijoins in the predicate list */
157: hashKeyColumns = findHashKeyColumns(innerTable, cd, predList);
158:
159: if (SanityManager.DEBUG) {
160: if (hashKeyColumns == null) {
161: optimizer.trace(Optimizer.HJ_SKIP_NO_JOIN_COLUMNS, 0,
162: 0, 0.0, null);
163: } else {
164: optimizer.trace(Optimizer.HJ_HASH_KEY_COLUMNS, 0, 0,
165: 0.0, hashKeyColumns);
166: }
167: }
168:
169: if (hashKeyColumns == null) {
170: return false;
171: }
172:
173: return true;
174: }
175:
176: /** @see JoinStrategy#ignoreBulkFetch */
177: public boolean ignoreBulkFetch() {
178: return true;
179: }
180:
181: /** @see JoinStrategy#multiplyBaseCostByOuterRows */
182: public boolean multiplyBaseCostByOuterRows() {
183: return false;
184: }
185:
186: /**
187: * @see JoinStrategy#getBasePredicates
188: *
189: * @exception StandardException Thrown on error
190: */
191: public OptimizablePredicateList getBasePredicates(
192: OptimizablePredicateList predList,
193: OptimizablePredicateList basePredicates,
194: Optimizable innerTable) throws StandardException {
195: if (SanityManager.DEBUG) {
196: SanityManager.ASSERT(basePredicates.size() == 0,
197: "The base predicate list should be empty.");
198: }
199:
200: for (int i = predList.size() - 1; i >= 0; i--) {
201: OptimizablePredicate pred = predList.getOptPredicate(i);
202:
203: if (innerTable.getReferencedTableMap().contains(
204: pred.getReferencedMap())) {
205: basePredicates.addOptPredicate(pred);
206: predList.removeOptPredicate(i);
207: }
208: }
209:
210: basePredicates.classify(innerTable, innerTable
211: .getCurrentAccessPath().getConglomerateDescriptor());
212:
213: return basePredicates;
214: }
215:
216: /** @see JoinStrategy#nonBasePredicateSelectivity */
217: public double nonBasePredicateSelectivity(Optimizable innerTable,
218: OptimizablePredicateList predList) throws StandardException {
219: double retval = 1.0;
220:
221: if (predList != null) {
222: for (int i = 0; i < predList.size(); i++) {
223: // Don't include redundant join predicates in selectivity calculations
224: if (predList.isRedundantPredicate(i)) {
225: continue;
226: }
227:
228: retval *= predList.getOptPredicate(i).selectivity(
229: innerTable);
230: }
231: }
232:
233: return retval;
234: }
235:
236: /**
237: * @see JoinStrategy#putBasePredicates
238: *
239: * @exception StandardException Thrown on error
240: */
241: public void putBasePredicates(OptimizablePredicateList predList,
242: OptimizablePredicateList basePredicates)
243: throws StandardException {
244: for (int i = basePredicates.size() - 1; i >= 0; i--) {
245: OptimizablePredicate pred = basePredicates
246: .getOptPredicate(i);
247:
248: predList.addOptPredicate(pred);
249: basePredicates.removeOptPredicate(i);
250: }
251: }
252:
253: /** @see JoinStrategy#estimateCost */
254: public void estimateCost(Optimizable innerTable,
255: OptimizablePredicateList predList,
256: ConglomerateDescriptor cd, CostEstimate outerCost,
257: Optimizer optimizer, CostEstimate costEstimate) {
258: /*
259: ** The cost of a hash join is the cost of building the hash table.
260: ** There is no extra cost per outer row, so don't do anything here.
261: */
262: }
263:
264: /** @see JoinStrategy#maxCapacity */
265: public int maxCapacity(int userSpecifiedCapacity,
266: int maxMemoryPerTable, double perRowUsage) {
267: if (userSpecifiedCapacity >= 0)
268: return userSpecifiedCapacity;
269: perRowUsage += ClassSize.estimateHashEntrySize();
270: if (perRowUsage <= 1)
271: return maxMemoryPerTable;
272: return (int) (maxMemoryPerTable / perRowUsage);
273: }
274:
275: /** @see JoinStrategy#getName */
276: public String getName() {
277: return "HASH";
278: }
279:
280: /** @see JoinStrategy#scanCostType */
281: public int scanCostType() {
282: return StoreCostController.STORECOST_SCAN_SET;
283: }
284:
285: /** @see JoinStrategy#resultSetMethodName */
286: public String resultSetMethodName(boolean bulkFetch) {
287: return "getHashScanResultSet";
288: }
289:
290: /** @see JoinStrategy#joinResultSetMethodName */
291: public String joinResultSetMethodName() {
292: return "getHashJoinResultSet";
293: }
294:
295: /** @see JoinStrategy#halfOuterJoinResultSetMethodName */
296: public String halfOuterJoinResultSetMethodName() {
297: return "getHashLeftOuterJoinResultSet";
298: }
299:
300: /**
301: * @see JoinStrategy#getScanArgs
302: *
303: * @exception StandardException Thrown on error
304: */
305: public int getScanArgs(TransactionController tc, MethodBuilder mb,
306: Optimizable innerTable,
307: OptimizablePredicateList storeRestrictionList,
308: OptimizablePredicateList nonStoreRestrictionList,
309: ExpressionClassBuilderInterface acbi, int bulkFetch,
310: MethodBuilder resultRowAllocator, int colRefItem,
311: int indexColItem, int lockMode, boolean tableLocked,
312: int isolationLevel, int maxMemoryPerTable)
313: throws StandardException {
314: ExpressionClassBuilder acb = (ExpressionClassBuilder) acbi;
315:
316: fillInScanArgs1(tc, mb, innerTable, storeRestrictionList, acb,
317: resultRowAllocator);
318:
319: nonStoreRestrictionList.generateQualifiers(acb, mb, innerTable,
320: true);
321: mb.push(innerTable.initialCapacity());
322: mb.push(innerTable.loadFactor());
323: mb.push(innerTable.maxCapacity((JoinStrategy) this ,
324: maxMemoryPerTable));
325: /* Get the hash key columns and wrap them in a formattable */
326: int[] hashKeyColumns = innerTable.hashKeyColumns();
327: FormatableIntHolder[] fihArray = FormatableIntHolder
328: .getFormatableIntHolders(hashKeyColumns);
329: FormatableArrayHolder hashKeyHolder = new FormatableArrayHolder(
330: fihArray);
331: int hashKeyItem = acb.addItem(hashKeyHolder);
332: mb.push(hashKeyItem);
333:
334: fillInScanArgs2(mb, innerTable, bulkFetch, colRefItem,
335: indexColItem, lockMode, tableLocked, isolationLevel);
336:
337: return 28;
338: }
339:
340: /**
341: * @see JoinStrategy#divideUpPredicateLists
342: *
343: * @exception StandardException Thrown on error
344: */
345: public void divideUpPredicateLists(Optimizable innerTable,
346: OptimizablePredicateList originalRestrictionList,
347: OptimizablePredicateList storeRestrictionList,
348: OptimizablePredicateList nonStoreRestrictionList,
349: OptimizablePredicateList requalificationRestrictionList,
350: DataDictionary dd) throws StandardException {
351: /*
352: ** If we are walking a non-covering index, then all predicates that
353: ** get evaluated in the HashScanResultSet, whether during the building
354: ** or probing of the hash table, need to be evaluated at both the
355: ** IndexRowToBaseRowResultSet and the HashScanResultSet to ensure
356: ** that the rows materialized into the hash table still qualify when
357: ** we go to read the row from the heap. This also includes predicates
358: ** that are not qualifier/start/stop keys (hence not in store/non-store
359: ** list).
360: */
361: originalRestrictionList
362: .copyPredicatesToOtherList(requalificationRestrictionList);
363:
364: ConglomerateDescriptor cd = innerTable
365: .getTrulyTheBestAccessPath()
366: .getConglomerateDescriptor();
367:
368: /* For the inner table of a hash join, then divide up the predicates:
369: *
370: * o restrictionList - predicates that get applied when creating
371: * the hash table (single table clauses)
372: *
373: * o nonBaseTableRestrictionList
374: * - those that get applied when probing into the
375: * hash table (equijoin clauses on key columns,
376: * ordered by key column position first, followed
377: * by any other join predicates. (All predicates
378: * in this list are qualifiers which can be
379: * evaluated in the store).
380: *
381: * o baseTableRL - Only applicable if this is not a covering
382: * index. In that case, we will need to
383: * requalify the data page. Thus, this list
384: * will include all predicates.
385: */
386:
387: // Build the list to be applied when creating the hash table
388: originalRestrictionList.transferPredicates(
389: storeRestrictionList, innerTable
390: .getReferencedTableMap(), innerTable);
391:
392: /*
393: * Eliminate any non-qualifiers that may have been pushed, but
394: * are redundant and not useful for hash join.
395: *
396: * For instance "in" (or other non-qualifier) was pushed down for
397: * start/stop key, * but for hash join, it may no longer be because
398: * previous key column may have been disqualified (eg., correlation).
399: * We simply remove
400: * such non-qualifier ("in") because we left it as residual predicate
401: * anyway. It's easier/safer to filter it out here than detect it
402: * ealier (and not push it down). Beetle 4316.
403: *
404: * Can't filter out OR list, as it is not a residual predicate,
405: */
406: for (int i = storeRestrictionList.size() - 1; i >= 0; i--) {
407: Predicate p1 = (Predicate) storeRestrictionList
408: .getOptPredicate(i);
409:
410: if (!p1.isStoreQualifier() && !p1.isStartKey()
411: && !p1.isStopKey()) {
412: storeRestrictionList.removeOptPredicate(i);
413: }
414: }
415:
416: for (int i = originalRestrictionList.size() - 1; i >= 0; i--) {
417: Predicate p1 = (Predicate) originalRestrictionList
418: .getOptPredicate(i);
419:
420: if (!p1.isStoreQualifier())
421: originalRestrictionList.removeOptPredicate(i);
422: }
423:
424: /* Copy the rest of the predicates to the non-store list */
425: originalRestrictionList
426: .copyPredicatesToOtherList(nonStoreRestrictionList);
427:
428: /* If innerTable is ProjectRestrictNode, we need to use its child
429: * to find hash key columns, this is because ProjectRestrictNode may
430: * not have underlying node's every result column as result column,
431: * and the predicate's column reference was bound to the underlying
432: * node's column position. Also we have to pass in the
433: * ProjectRestrictNode rather than the underlying node to this method
434: * because a predicate's referencedTableMap references the table number
435: * of the ProjectRestrictiveNode. And we need this info to see if
436: * a predicate is in storeRestrictionList that can be pushed down.
437: * Beetle 3458.
438: */
439: Optimizable hashTableFor = innerTable;
440: if (innerTable instanceof ProjectRestrictNode) {
441: ProjectRestrictNode prn = (ProjectRestrictNode) innerTable;
442: if (prn.getChildResult() instanceof Optimizable)
443: hashTableFor = (Optimizable) (prn.getChildResult());
444: }
445: int[] hashKeyColumns = findHashKeyColumns(hashTableFor, cd,
446: nonStoreRestrictionList);
447: if (hashKeyColumns != null) {
448: innerTable.setHashKeyColumns(hashKeyColumns);
449: } else {
450: String name;
451: if (cd != null && cd.isIndex()) {
452: name = cd.getConglomerateName();
453: } else {
454: name = innerTable.getBaseTableName();
455: }
456:
457: throw StandardException.newException(
458: SQLState.LANG_HASH_NO_EQUIJOIN_FOUND, name,
459: innerTable.getBaseTableName());
460: }
461:
462: // Mark all of the predicates in the probe list as qualifiers
463: nonStoreRestrictionList.markAllPredicatesQualifiers();
464:
465: int[] conglomColumn = new int[hashKeyColumns.length];
466: if (cd != null && cd.isIndex()) {
467: /*
468: ** If the conglomerate is an index, get the column numbers of the
469: ** hash keys in the base heap.
470: */
471: for (int index = 0; index < hashKeyColumns.length; index++) {
472: conglomColumn[index] = cd.getIndexDescriptor()
473: .baseColumnPositions()[hashKeyColumns[index]];
474: }
475: } else {
476: /*
477: ** If the conglomerate is a heap, the column numbers of the hash
478: ** key are the column numbers returned by findHashKeyColumns().
479: **
480: ** NOTE: Must switch from zero-based to one-based
481: */
482: for (int index = 0; index < hashKeyColumns.length; index++) {
483: conglomColumn[index] = hashKeyColumns[index] + 1;
484: }
485: }
486:
487: /* Put the equality predicates on the key columns for the hash first.
488: * (Column # is columns[colCtr] from above.)
489: */
490: for (int index = hashKeyColumns.length - 1; index >= 0; index--) {
491: nonStoreRestrictionList
492: .putOptimizableEqualityPredicateFirst(innerTable,
493: conglomColumn[index]);
494: }
495: }
496:
497: /**
498: * @see JoinStrategy#isHashJoin
499: */
500: public boolean isHashJoin() {
501: return true;
502: }
503:
504: /**
505: * @see JoinStrategy#doesMaterialization
506: */
507: public boolean doesMaterialization() {
508: return true;
509: }
510:
511: /**
512: * Find the hash key columns, if any, to use with this join.
513: *
514: * @param innerTable The inner table of the join
515: * @param cd The conglomerate descriptor to use on inner table
516: * @param predList The predicate list to look for the equijoin in
517: *
518: * @return the numbers of the hash key columns, or null if no hash key column
519: *
520: * @exception StandardException Thrown on error
521: */
522: private int[] findHashKeyColumns(Optimizable innerTable,
523: ConglomerateDescriptor cd, OptimizablePredicateList predList)
524: throws StandardException {
525: if (predList == null)
526: return (int[]) null;
527:
528: /* Find the column to use as the hash key.
529: * (There must be an equijoin condition on this column.)
530: * If cd is null, then Optimizable is not a scan.
531: * For indexes, we start at the first column in the key
532: * and walk the key columns until we find the first one with
533: * an equijoin condition on it. We do essentially the same
534: * for heaps. (From column 1 through column n.)
535: */
536: int[] columns = null;
537: if (cd == null) {
538: columns = new int[innerTable.getNumColumnsReturned()];
539: for (int j = 0; j < columns.length; j++) {
540: columns[j] = j + 1;
541: }
542: } else if (cd.isIndex()) {
543: columns = cd.getIndexDescriptor().baseColumnPositions();
544: } else {
545: columns = new int[innerTable.getTableDescriptor()
546: .getNumberOfColumns()];
547: for (int j = 0; j < columns.length; j++) {
548: columns[j] = j + 1;
549: }
550: }
551:
552: // Build a Vector of all the hash key columns
553: int colCtr;
554: Vector hashKeyVector = new Vector();
555: for (colCtr = 0; colCtr < columns.length; colCtr++) {
556: // Is there an equijoin condition on this column?
557: if (predList.hasOptimizableEquijoin(innerTable,
558: columns[colCtr])) {
559: hashKeyVector.addElement(new Integer(colCtr));
560: }
561: }
562:
563: // Convert the Vector into an int[], if there are hash key columns
564: if (hashKeyVector.size() > 0) {
565: int[] keyCols = new int[hashKeyVector.size()];
566: for (int index = 0; index < keyCols.length; index++) {
567: keyCols[index] = ((Integer) hashKeyVector
568: .elementAt(index)).intValue();
569: }
570: return keyCols;
571: } else
572: return (int[]) null;
573: }
574:
575: public String toString() {
576: return getName();
577: }
578: }
|