001: /*
002:
003: Derby - Class org.apache.derby.impl.store.access.heap.HeapCostController
004:
005: Licensed to the Apache Software Foundation (ASF) under one or more
006: contributor license agreements. See the NOTICE file distributed with
007: this work for additional information regarding copyright ownership.
008: The ASF licenses this file to you under the Apache License, Version 2.0
009: (the "License"); you may not use this file except in compliance with
010: the License. You may obtain a copy of the License at
011:
012: http://www.apache.org/licenses/LICENSE-2.0
013:
014: Unless required by applicable law or agreed to in writing, software
015: distributed under the License is distributed on an "AS IS" BASIS,
016: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017: See the License for the specific language governing permissions and
018: limitations under the License.
019:
020: */
021:
022: package org.apache.derby.impl.store.access.heap;
023:
024: import org.apache.derby.iapi.reference.SQLState;
025: import org.apache.derby.iapi.reference.Property;
026:
027: import org.apache.derby.iapi.services.sanity.SanityManager;
028:
029: import org.apache.derby.iapi.error.StandardException;
030:
031: import org.apache.derby.iapi.store.access.conglomerate.TransactionManager;
032:
033: import org.apache.derby.iapi.types.RowLocation;
034: import org.apache.derby.iapi.store.access.StoreCostController;
035: import org.apache.derby.iapi.store.access.StoreCostResult;
036:
037: import org.apache.derby.iapi.store.raw.ContainerHandle;
038: import org.apache.derby.iapi.store.raw.LockingPolicy;
039: import org.apache.derby.iapi.store.raw.RawStoreFactory;
040: import org.apache.derby.iapi.store.raw.Transaction;
041:
042: import org.apache.derby.impl.store.access.conglomerate.GenericCostController;
043: import org.apache.derby.impl.store.access.conglomerate.OpenConglomerate;
044:
045: import org.apache.derby.iapi.store.access.RowUtil;
046:
047: import org.apache.derby.iapi.types.DataValueDescriptor;
048:
049: import org.apache.derby.iapi.services.io.FormatableBitSet;
050: import java.util.Properties;
051:
052: /**
053:
054: The StoreCostController interface provides methods that an access client
055: (most likely the system optimizer) can use to get store's estimated cost of
056: various operations on the conglomerate the StoreCostController was opened
057: for.
058: <p>
059: It is likely that the implementation of StoreCostController will open
060: the conglomerate and will leave the conglomerate open until the
061: StoreCostController is closed. This represents a significant amount of
062: work, so the caller if possible should attempt to open the StoreCostController
063: once per unit of work and rather than close and reopen the controller. For
064: instance if the optimizer needs to cost 2 different scans against a single
065: conglomerate, it should use one instance of the StoreCostController.
066: <p>
067: The locking behavior of the implementation of a StoreCostController is
068: undefined, it may or may not get locks on the underlying conglomerate. It
069: may or may not hold locks until end of transaction.
070: An optimal implementation will not get any locks on the underlying
071: conglomerate, thus allowing concurrent access to the table by a executing
072: query while another query is optimizing.
073: <p>
074: The StoreCostController gives 2 kinds of cost information
075:
076: **/
077:
078: public class HeapCostController extends GenericCostController implements
079: StoreCostController {
080: /**
081: * Only lookup these estimates from raw store once.
082: **/
083: long num_pages;
084: long num_rows;
085: long page_size;
086: long row_size;
087:
088: /* Private/Protected methods of This class: */
089:
090: /**
091: * Initialize the cost controller.
092: * <p>
093: * Let super.init() do it's work and then get the initial stats about the
094: * table from raw store.
095: *
096: * @exception StandardException Standard exception policy.
097: **/
098: public void init(OpenConglomerate open_conglom)
099: throws StandardException {
100: super .init(open_conglom);
101:
102: ContainerHandle container = open_conglom.getContainer();
103:
104: // look up costs from raw store.
105: num_rows = container.getEstimatedRowCount(/*unused flag*/0);
106:
107: // Don't use 0 rows (use 1 instead), as 0 rows often leads the
108: // optimizer to produce plans which don't use indexes because of the 0
109: // row edge case.
110: //
111: // Eventually the plan is recompiled when rows are added, but we
112: // have seen multiple customer cases of deadlocks and timeouts
113: // because of these 0 row based plans.
114: if (num_rows == 0)
115: num_rows = 1;
116:
117: // eliminate the allocation page from the count.
118: num_pages = container
119: .getEstimatedPageCount(/* unused flag */0);
120:
121: Properties prop = new Properties();
122: prop.put(Property.PAGE_SIZE_PARAMETER, "");
123: container.getContainerProperties(prop);
124: page_size = Integer.parseInt(prop
125: .getProperty(Property.PAGE_SIZE_PARAMETER));
126:
127: row_size = (num_pages * page_size / num_rows);
128:
129: return;
130: }
131:
132: /* Public Methods of This class: */
133: /* Public Methods of XXXX class: */
134:
135: /**
136: * Return the cost of calling ConglomerateController.fetch().
137: * <p>
138: * Return the estimated cost of calling ConglomerateController.fetch()
139: * on the current conglomerate. This gives the cost of finding a record
140: * in the conglomerate given the exact RowLocation of the record in
141: * question.
142: * <p>
143: * The validColumns describes what kind of row is being fetched,
144: * ie. it may be cheaper to fetch a partial row than a complete row.
145: * <p>
146: *
147: *
148: * @param validColumns A description of which columns to return from
149: * row on the page into "templateRow." templateRow,
150: * and validColumns work together to
151: * describe the row to be returned by the fetch -
152: * see RowUtil for description of how these three
153: * parameters work together to describe a fetched
154: * "row".
155: *
156: * @param access_type Describe the type of access the query will be
157: * performing to the ConglomerateController.
158: *
159: * STORECOST_CLUSTERED - The location of one fetch
160: * is likely clustered "close" to the next
161: * fetch. For instance if the query plan were
162: * to sort the RowLocations of a heap and then
163: * use those RowLocations sequentially to
164: * probe into the heap, then this flag should
165: * be specified. If this flag is not set then
166: * access to the table is assumed to be
167: * random - ie. the type of access one gets
168: * if you scan an index and probe each row
169: * in turn into the base table is "random".
170: *
171: *
172: * @return The cost of the fetch.
173: *
174: * @exception StandardException Standard exception policy.
175: *
176: * @see RowUtil
177: **/
178: public double getFetchFromRowLocationCost(
179: FormatableBitSet validColumns, int access_type)
180: throws StandardException {
181: double ret_cost;
182:
183: // get "per-byte" cost of fetching a row from the page.
184: ret_cost = row_size * BASE_ROW_PER_BYTECOST;
185:
186: long num_pages_per_row = (row_size / page_size) + 1;
187:
188: if ((access_type & StoreCostController.STORECOST_CLUSTERED) == 0) {
189: // this is the "base" unit case.
190: ret_cost += (BASE_UNCACHED_ROW_FETCH_COST * num_pages_per_row);
191: } else {
192: ret_cost += (BASE_CACHED_ROW_FETCH_COST * num_pages_per_row);
193: }
194:
195: return (ret_cost);
196: }
197:
198: /**
199: * Calculate the cost of a scan.
200: * <p>
201: * Cause this object to calculate the cost of performing the described
202: * scan. The interface is setup such that first a call is made to
203: * calcualteScanCost(), and then subsequent calls to accessor routines
204: * are made to get various pieces of information about the cost of
205: * the scan.
206: * <p>
207: * For the purposes of costing this routine is going to assume that
208: * a page will remain in cache between the time one next()/fetchNext()
209: * call and a subsequent next()/fetchNext() call is made within a scan.
210: * <p>
211: * The result of costing the scan is placed in the "cost_result".
212: * The cost of the scan is stored by calling
213: * cost_result.setEstimatedCost(cost).
214: * The estimated row count is stored by calling
215: * cost_result.setEstimatedRowCount(row_count).
216: * <p>
217: * The estimated cost of the scan assumes the caller will
218: * execute a fetchNext() loop for every row that qualifies between
219: * start and stop position. Note that this cost is different than
220: * execution a next(),fetch() loop; or if the scan is going to be
221: * terminated by client prior to reaching the stop condition.
222: * <p>
223: * The estimated number of rows returned from the scan
224: * assumes the caller will execute a fetchNext() loop for every
225: * row that qualifies between start and stop position.
226: * <p>
227: *
228: *
229: * @param scan_type The type of scan that will be executed. There
230: * are currently 2 types:
231: * STORECOST_SCAN_NORMAL - scans will be executed
232: * using the standard next/fetch, where each fetch
233: * can retrieve 1 or many rows (if fetchNextGroup()
234: * interface is used).
235: *
236: * STORECOST_SCAN_SET - The entire result set will
237: * be retrieved using the the fetchSet() interface.
238: *
239: * @param row_count Estimated total row count of the table. The
240: * current system tracks row counts in heaps better
241: * than btree's (btree's have "rows" which are not
242: * user rows - branch rows, control rows), so
243: * if available the client should
244: * pass in the base table's row count into this
245: * routine to be used as the index's row count.
246: * If the caller has no idea, pass in -1.
247: *
248: * @param group_size The number of rows to be returned by a single
249: * fetch call for STORECOST_SCAN_NORMAL scans.
250: *
251: * @param forUpdate Should be true if the caller intends to update
252: * through the scan.
253: *
254: * @param scanColumnList A description of which columns to return from
255: * every fetch in the scan. template,
256: * and scanColumnList work together
257: * to describe the row to be returned by the scan -
258: * see RowUtil for description of how these three
259: * parameters work together to describe a "row".
260: *
261: * @param template A prototypical row which the scan may use to
262: * maintain its position in the conglomerate. Not
263: * all access method scan types will require this,
264: * if they don't it's ok to pass in null.
265: * In order to scan a conglomerate one must
266: * allocate 2 separate "row" templates. The "row"
267: * template passed into openScan is for the private
268: * use of the scan itself, and no access to it
269: * should be made by the caller while the scan is
270: * still open. Because of this the scanner must
271: * allocate another "row" template to hold the
272: * values returned from fetch(). Note that this
273: * template must be for the full row, whether a
274: * partial row scan is being executed or not.
275: *
276: * @param startKeyValue An indexable row which holds a (partial) key
277: * value which, in combination with the
278: * startSearchOperator, defines the starting
279: * position of the scan. If null, the starting
280: * position of the scan is the first row of the
281: * conglomerate. The startKeyValue must only
282: * reference columns included in the scanColumnList.
283: *
284: * @param startSearchOperator
285: * an operator which defines how the startKeyValue
286: * is to be searched for. If startSearchOperation
287: * is ScanController.GE, the scan starts on the
288: * first row which is greater than or equal to the
289: * startKeyValue. If startSearchOperation is
290: * ScanController.GT, the scan starts on the first
291: * row whose key is greater than startKeyValue. The
292: * startSearchOperation parameter is ignored if the
293: * startKeyValue parameter is null.
294: *
295: * @param stopKeyValue An indexable row which holds a (partial) key
296: * value which, in combination with the
297: * stopSearchOperator, defines the ending position
298: * of the scan. If null, the ending position of the
299: * scan is the last row of the conglomerate. The
300: * stopKeyValue must only reference columns included
301: * in the scanColumnList.
302: *
303: * @param stopSearchOperator
304: * an operator which defines how the stopKeyValue
305: * is used to determine the scan stopping position.
306: * If stopSearchOperation is ScanController.GE, the
307: * scan stops just before the first row which is
308: * greater than or equal to the stopKeyValue. If
309: * stopSearchOperation is ScanController.GT, the
310: * scan stops just before the first row whose key
311: * is greater than startKeyValue. The
312: * stopSearchOperation parameter is ignored if the
313: * stopKeyValue parameter is null.
314: *
315: *
316: * @param access_type Describe the type of access the query will be
317: * performing to the ScanController.
318: *
319: * STORECOST_CLUSTERED - The location of one scan
320: * is likely clustered "close" to the previous
321: * scan. For instance if the query plan were
322: * to used repeated "reopenScan()'s" to probe
323: * for the next key in an index, then this flag
324: * should be be specified. If this flag is not
325: * set then each scan will be costed independant
326: * of any other predicted scan access.
327: *
328: *
329: * @exception StandardException Standard exception policy.
330: *
331: * @see RowUtil
332: **/
333: public void getScanCost(int scan_type, long row_count,
334: int group_size, boolean forUpdate,
335: FormatableBitSet scanColumnList,
336: DataValueDescriptor[] template,
337: DataValueDescriptor[] startKeyValue,
338: int startSearchOperator,
339: DataValueDescriptor[] stopKeyValue, int stopSearchOperator,
340: boolean reopen_scan, int access_type,
341: StoreCostResult cost_result) throws StandardException {
342: if (SanityManager.DEBUG) {
343: SanityManager
344: .ASSERT(scan_type == StoreCostController.STORECOST_SCAN_NORMAL
345: || scan_type == StoreCostController.STORECOST_SCAN_SET);
346: }
347:
348: long estimated_row_count = ((row_count < 0) ? num_rows
349: : row_count);
350:
351: // This cost is if the caller has to go in and out of access for
352: // every row in the table. The cost will be significantly less if
353: // group fetch is used, or if qualifiers
354:
355: // first the base cost of bringing each page in from cache:
356: double cost = (num_pages * BASE_UNCACHED_ROW_FETCH_COST);
357:
358: // the cost associated with the number of bytes in each row:
359: cost += (estimated_row_count * row_size)
360: * BASE_ROW_PER_BYTECOST;
361:
362: // the base cost of getting each of the rows from a page assumed
363: // to already be cached (by the scan fetch) - this is only for all
364: // rows after the initial row on the page has been accounted for
365: // under the BASE_UNCACHED_ROW_FETCH_COST cost.:
366: long cached_row_count = estimated_row_count - num_pages;
367: if (cached_row_count < 0)
368: cached_row_count = 0;
369:
370: if (scan_type == StoreCostController.STORECOST_SCAN_NORMAL)
371: cost += cached_row_count * BASE_GROUPSCAN_ROW_COST;
372: else
373: cost += cached_row_count * BASE_HASHSCAN_ROW_FETCH_COST;
374:
375: if (SanityManager.DEBUG) {
376: SanityManager.ASSERT(cost >= 0);
377: SanityManager.ASSERT(estimated_row_count >= 0);
378: }
379:
380: cost_result.setEstimatedCost(cost);
381:
382: // return that all rows will be scanned.
383: cost_result.setEstimatedRowCount(estimated_row_count);
384:
385: return;
386: }
387: }
|