001: /*
002:
003: Derby - Class org.apache.derby.iapi.store.access.StoreCostController
004:
005: Licensed to the Apache Software Foundation (ASF) under one or more
006: contributor license agreements. See the NOTICE file distributed with
007: this work for additional information regarding copyright ownership.
008: The ASF licenses this file to you under the Apache License, Version 2.0
009: (the "License"); you may not use this file except in compliance with
010: the License. You may obtain a copy of the License at
011:
012: http://www.apache.org/licenses/LICENSE-2.0
013:
014: Unless required by applicable law or agreed to in writing, software
015: distributed under the License is distributed on an "AS IS" BASIS,
016: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017: See the License for the specific language governing permissions and
018: limitations under the License.
019:
020: */
021:
022: package org.apache.derby.iapi.store.access;
023:
024: import org.apache.derby.iapi.types.DataValueDescriptor;
025:
026: import org.apache.derby.iapi.types.RowLocation;
027:
028: import org.apache.derby.iapi.error.StandardException;
029: import org.apache.derby.iapi.services.io.FormatableBitSet;
030:
031: /**
032:
033: The StoreCostController interface provides methods that an access client
034: (most likely the system optimizer) can use to get store's estimated cost of
035: various operations on the conglomerate the StoreCostController was opened
036: for.
037: <p>
038: It is likely that the implementation of StoreCostController will open
039: the conglomerate and will leave the conglomerate open until the
040: StoreCostController is closed. This represents a significant amount of
041: work, so the caller if possible should attempt to open the StoreCostController
042: once per unit of work and rather than close and reopen the controller. For
043: instance if the optimizer needs to cost 2 different scans against a single
044: conglomerate, it should use one instance of the StoreCostController.
045: <p>
046: The locking behavior of the implementation of a StoreCostController is
047: undefined, it may or may not get locks on the underlying conglomerate. It
048: may or may not hold locks until end of transaction.
049: An optimal implementation will not get any locks on the underlying
050: conglomerate, thus allowing concurrent access to the table by a executing
051: query while another query is optimizing.
052: <p>
053: @see TransactionController#openStoreCost
054: @see RowCountable
055:
056: **/
057:
058: public interface StoreCostController extends RowCountable {
059: // The folllowing constants should only be used by StoreCostController
060: // implementors.
061:
062: // The base cost to fetch a cached page, and select a single
063: // heap row by RowLocation, fetching 0 columns.
064: public static final double BASE_CACHED_ROW_FETCH_COST = 0.17;
065:
066: // The base cost to page in a page from disk to cache, and select a single
067: // heap row by RowLocation, fetching 0 columns.
068: public static final double BASE_UNCACHED_ROW_FETCH_COST = 1.5;
069:
070: // The base cost to fetch a single row as part of group fetch scan with
071: // 16 rows per group, fetching 0 columns.
072: public static final double BASE_GROUPSCAN_ROW_COST = 0.12;
073:
074: // The base cost to fetch a single row as part of a nongroup fetch scan
075: // fetching 0 columns.
076: public static final double BASE_NONGROUPSCAN_ROW_FETCH_COST = 0.25;
077:
078: // The base cost to fetch a single row as part of a nongroup fetch scan
079: // fetching 1 columns.
080: public static final double BASE_HASHSCAN_ROW_FETCH_COST = 0.14;
081:
082: // This is an estimate of the per byte cost associated with fetching the
083: // row from the table, it just assumes the cost scales per byte which is
084: // probably not true, but a good first guess. It is meant to be added
085: // to the above costs - for instance the cost of fetching a 100 byte
086: // row from a page assumed to be in the cache is:
087: // BASE_CACHED_ROW_FETCH_COST + (100 * BASE_ROW_PER_BYTECOST)
088: //
089: // The estimate for this number is the cost of retrieving all cost from
090: // a cached 100 byte row - the cost of getting 0 colums from cached row.
091: public static final double BASE_ROW_PER_BYTECOST = (0.56 - 0.16) / 100;
092:
093: /**
094: * Indicates that access to the page necessary to fulfill the fetch
095: * request is likely to be a page "recently" used. See
096: * getFetchFromFullKeyCost() and getScanCost().
097: **/
098: public static final int STORECOST_CLUSTERED = 0x01;
099:
100: /**
101: * Used for the scan_type parameter to the getScanCost() routine.
102: * STORECOST_SCAN_NORMAL indicates that the scan will use the standard
103: * next/fetch, where each fetch can retrieve 1 or many rows (if
104: * fetchNextGroup() interface is used).
105: **/
106: public static final int STORECOST_SCAN_SET = 0x01;
107:
108: /**
109: * Used for the scan_type parameter to the getScanCost() routine.
110: * STORECOST_SCAN_SET - The entire result set will be retrieved using the
111: * the fetchSet() interface.
112: **/
113: public static final int STORECOST_SCAN_NORMAL = 0x02;
114:
115: /**
116: * Close the controller.
117: * <p>
118: * Close the open controller. This method always succeeds, and never
119: * throws any exceptions. Callers must not use the StoreCostController
120: * Cost controller after closing it; they are strongly advised to clear
121: * out the scan controller reference after closing.
122: * <p>
123: *
124: * @exception StandardException Standard exception policy.
125: **/
126: void close() throws StandardException;
127:
128: /**
129: * Return the cost of calling ConglomerateController.fetch().
130: * <p>
131: * Return the estimated cost of calling ConglomerateController.fetch()
132: * on the current conglomerate. This gives the cost of finding a record
133: * in the conglomerate given the exact RowLocation of the record in
134: * question.
135: * <p>
136: * The validColumns parameter describe what kind of row
137: * is being fetched, ie. it may be cheaper to fetch a partial row than a
138: * complete row.
139: * <p>
140: *
141: *
142: * @param validColumns A description of which columns to return from
143: * row on the page into "templateRow." templateRow,
144: * and validColumns work together to
145: * describe the row to be returned by the fetch -
146: * see RowUtil for description of how these three
147: * parameters work together to describe a fetched
148: * "row".
149: *
150: * @param access_type Describe the type of access the query will be
151: * performing to the ConglomerateController.
152: *
153: * STORECOST_CLUSTERED - The location of one fetch
154: * is likely clustered "close" to the next
155: * fetch. For instance if the query plan were
156: * to sort the RowLocations of a heap and then
157: * use those RowLocations sequentially to
158: * probe into the heap, then this flag should
159: * be specified. If this flag is not set then
160: * access to the table is assumed to be
161: * random - ie. the type of access one gets
162: * if you scan an index and probe each row
163: * in turn into the base table is "random".
164: *
165: *
166: * @return The cost of the fetch.
167: *
168: * @exception StandardException Standard exception policy.
169: *
170: * @see RowUtil
171: **/
172: public double getFetchFromRowLocationCost(
173: FormatableBitSet validColumns, int access_type)
174: throws StandardException;
175:
176: /**
177: * Return the cost of exact key lookup.
178: * <p>
179: * Return the estimated cost of calling ScanController.fetch()
180: * on the current conglomerate, with start and stop positions set such
181: * that an exact match is expected.
182: * <p>
183: * This call returns the cost of a fetchNext() performed on a scan which
184: * has been positioned with a start position which specifies exact match
185: * on all keys in the row.
186: * <p>
187: * Example:
188: * <p>
189: * In the case of a btree this call can be used to determine the cost of
190: * doing an exact probe into btree, giving all key columns. This cost
191: * can be used if the client knows it will be doing an exact key probe
192: * but does not have the key's at optimize time to use to make a call to
193: * getScanCost()
194: * <p>
195: *
196: *
197: * @param validColumns A description of which columns to return from
198: * row on the page into "templateRow." templateRow,
199: * and validColumns work together to
200: * describe the row to be returned by the fetch -
201: * see RowUtil for description of how these three
202: * parameters work together to describe a fetched
203: * "row".
204: *
205: * @param access_type Describe the type of access the query will be
206: * performing to the ScanController.
207: *
208: * STORECOST_CLUSTERED - The location of one scan
209: * is likely clustered "close" to the previous
210: * scan. For instance if the query plan were
211: * to used repeated "reopenScan()'s" to probe
212: * for the next key in an index, then this flag
213: * should be be specified. If this flag is not
214: * set then each scan will be costed independant
215: * of any other predicted scan access.
216: *
217: * @return The cost of the fetch.
218: *
219: * @exception StandardException Standard exception policy.
220: *
221: * @see RowUtil
222: **/
223: public double getFetchFromFullKeyCost(
224: FormatableBitSet validColumns, int access_type)
225: throws StandardException;
226:
227: /**
228: * Calculate the cost of a scan.
229: * <p>
230: * Cause this object to calculate the cost of performing the described
231: * scan. The interface is setup such that first a call is made to
232: * calcualteScanCost(), and then subsequent calls to accessor routines
233: * are made to get various pieces of information about the cost of
234: * the scan.
235: * <p>
236: * For the purposes of costing this routine is going to assume that
237: * a page will remain in cache between the time one next()/fetchNext()
238: * call and a subsequent next()/fetchNext() call is made within a scan.
239: * <p>
240: * The result of costing the scan is placed in the "cost_result".
241: * The cost of the scan is stored by calling
242: * cost_result.setEstimatedCost(cost).
243: * The estimated row count is stored by calling
244: * cost_result.setEstimatedRowCount(row_count).
245: * <p>
246: * The estimated cost of the scan assumes the caller will
247: * execute a fetchNext() loop for every row that qualifies between
248: * start and stop position. Note that this cost is different than
249: * execution a next(),fetch() loop; or if the scan is going to be
250: * terminated by client prior to reaching the stop condition.
251: * <p>
252: * The estimated number of rows returned from the scan
253: * assumes the caller will execute a fetchNext() loop for every
254: * row that qualifies between start and stop position.
255: * <p>
256: *
257: *
258: * @param scan_type The type of scan that will be executed. There
259: * are currently 2 types:
260: * STORECOST_SCAN_NORMAL - scans will be executed
261: * using the standard next/fetch, where each fetch
262: * can retrieve 1 or many rows (if fetchNextGroup()
263: * interface is used).
264: *
265: * STORECOST_SCAN_SET - The entire result set will
266: * be retrieved using the the fetchSet() interface.
267: *
268: * @param row_count Estimated total row count of the table. The
269: * current system tracks row counts in heaps better
270: * than btree's (btree's have "rows" which are not
271: * user rows - branch rows, control rows), so
272: * if available the client should
273: * pass in the base table's row count into this
274: * routine to be used as the index's row count.
275: * If the caller has no idea, pass in -1.
276: *
277: * @param group_size The number of rows to be returned by a single
278: * fetch call for STORECOST_SCAN_NORMAL scans.
279: *
280: * @param forUpdate Should be true if the caller intends to update
281: * through the scan.
282: *
283: * @param scanColumnList A description of which columns to return from
284: * every fetch in the scan. template,
285: * and scanColumnList work together
286: * to describe the row to be returned by the scan -
287: * see RowUtil for description of how these three
288: * parameters work together to describe a "row".
289: *
290: * @param template A prototypical row which the scan may use to
291: * maintain its position in the conglomerate. Not
292: * all access method scan types will require this,
293: * if they don't it's ok to pass in null.
294: * In order to scan a conglomerate one must
295: * allocate 2 separate "row" templates. The "row"
296: * template passed into openScan is for the private
297: * use of the scan itself, and no access to it
298: * should be made by the caller while the scan is
299: * still open. Because of this the scanner must
300: * allocate another "row" template to hold the
301: * values returned from fetch(). Note that this
302: * template must be for the full row, whether a
303: * partial row scan is being executed or not.
304: *
305: * @param startKeyValue An indexable row which holds a (partial) key
306: * value which, in combination with the
307: * startSearchOperator, defines the starting
308: * position of the scan. If null, the starting
309: * position of the scan is the first row of the
310: * conglomerate. The startKeyValue must only
311: * reference columns included in the scanColumnList.
312: *
313: * @param startSearchOperator
314: * an operator which defines how the startKeyValue
315: * is to be searched for. If startSearchOperation
316: * is ScanController.GE, the scan starts on the
317: * first row which is greater than or equal to the
318: * startKeyValue. If startSearchOperation is
319: * ScanController.GT, the scan starts on the first
320: * row whose key is greater than startKeyValue. The
321: * startSearchOperation parameter is ignored if the
322: * startKeyValue parameter is null.
323: *
324: * @param stopKeyValue An indexable row which holds a (partial) key
325: * value which, in combination with the
326: * stopSearchOperator, defines the ending position
327: * of the scan. If null, the ending position of the
328: * scan is the last row of the conglomerate. The
329: * stopKeyValue must only reference columns included
330: * in the scanColumnList.
331: *
332: * @param stopSearchOperator
333: * an operator which defines how the stopKeyValue
334: * is used to determine the scan stopping position.
335: * If stopSearchOperation is ScanController.GE, the
336: * scan stops just before the first row which is
337: * greater than or equal to the stopKeyValue. If
338: * stopSearchOperation is ScanController.GT, the
339: * scan stops just before the first row whose key
340: * is greater than startKeyValue. The
341: * stopSearchOperation parameter is ignored if the
342: * stopKeyValue parameter is null.
343: *
344: *
345: * @param access_type Describe the type of access the query will be
346: * performing to the ScanController.
347: *
348: * STORECOST_CLUSTERED - The location of one scan
349: * is likely clustered "close" to the previous
350: * scan. For instance if the query plan were
351: * to used repeated "reopenScan()'s" to probe
352: * for the next key in an index, then this flag
353: * should be be specified. If this flag is not
354: * set then each scan will be costed independant
355: * of any other predicted scan access.
356: *
357: *
358: * @exception StandardException Standard exception policy.
359: *
360: * @see RowUtil
361: **/
362: public void getScanCost(int scan_type, long row_count,
363: int group_size, boolean forUpdate,
364: FormatableBitSet scanColumnList,
365: DataValueDescriptor[] template,
366: DataValueDescriptor[] startKeyValue,
367: int startSearchOperator,
368: DataValueDescriptor[] stopKeyValue, int stopSearchOperator,
369: boolean reopen_scan, int access_type,
370: StoreCostResult cost_result) throws StandardException;
371:
372: /**
373: * Return an "empty" row location object of the correct type.
374: * <p>
375: *
376: * @return The empty Rowlocation.
377: *
378: * @exception StandardException Standard exception policy.
379: **/
380: RowLocation newRowLocationTemplate() throws StandardException;
381: }
|