001: /*
002:
003: Derby - Class org.apache.derby.impl.store.access.sort.ExternalSortFactory
004:
005: Licensed to the Apache Software Foundation (ASF) under one or more
006: contributor license agreements. See the NOTICE file distributed with
007: this work for additional information regarding copyright ownership.
008: The ASF licenses this file to you under the Apache License, Version 2.0
009: (the "License"); you may not use this file except in compliance with
010: the License. You may obtain a copy of the License at
011:
012: http://www.apache.org/licenses/LICENSE-2.0
013:
014: Unless required by applicable law or agreed to in writing, software
015: distributed under the License is distributed on an "AS IS" BASIS,
016: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017: See the License for the specific language governing permissions and
018: limitations under the License.
019:
020: */
021:
022: package org.apache.derby.impl.store.access.sort;
023:
024: import java.util.Properties;
025:
026: import org.apache.derby.iapi.services.monitor.ModuleControl;
027: import org.apache.derby.iapi.services.monitor.ModuleSupportable;
028: import org.apache.derby.iapi.services.monitor.Monitor;
029: import org.apache.derby.iapi.services.property.PropertyUtil;
030: import org.apache.derby.iapi.services.sanity.SanityManager;
031:
032: import org.apache.derby.iapi.error.StandardException;
033:
034: import org.apache.derby.iapi.store.access.conglomerate.MethodFactory;
035: import org.apache.derby.iapi.store.access.conglomerate.Sort;
036: import org.apache.derby.iapi.store.access.conglomerate.SortFactory;
037:
038: import org.apache.derby.iapi.store.access.SortObserver;
039: import org.apache.derby.iapi.store.access.SortCostController;
040: import org.apache.derby.iapi.store.access.ColumnOrdering;
041: import org.apache.derby.iapi.store.access.TransactionController;
042:
043: import org.apache.derby.iapi.types.DataValueDescriptor;
044:
045: import org.apache.derby.iapi.services.uuid.UUIDFactory;
046:
047: import org.apache.derby.catalog.UUID;
048:
049: /**
050:
051: **/
052:
053: public class ExternalSortFactory implements SortFactory, ModuleControl,
054: ModuleSupportable, SortCostController {
055:
056: private boolean userSpecified; // did the user specify sortBufferMax
057: private int defaultSortBufferMax;
058: private int sortBufferMax;
059:
060: private static final String IMPLEMENTATIONID = "sort external";
061: private static final String FORMATUUIDSTRING = "D2976090-D9F5-11d0-B54D-00A024BF8879";
062: private UUID formatUUID = null;
063: private static final int DEFAULT_SORTBUFFERMAX = 1024;
064: private static final int MINIMUM_SORTBUFFERMAX = 4;
065:
066: protected static final int DEFAULT_MEM_USE = 1024 * 1024; // aim for about 1Meg
067: // how many sort runs to combined into a larger sort run
068: // (DERBY-1661)
069: protected static final int DEFAULT_MAX_MERGE_RUN = 512;
070:
071: // sizeof Node + reference to Node + 12 bytes tax
072: private static final int SORT_ROW_OVERHEAD = 8 * 4 + 12;
073:
074: /*
075: ** Methods of MethodFactory
076: */
077:
078: /**
079: There are no default properties for the external sort..
080: @see MethodFactory#defaultProperties
081: **/
082: public Properties defaultProperties() {
083: return new Properties();
084: }
085:
086: /**
087: @see MethodFactory#supportsImplementation
088: **/
089: public boolean supportsImplementation(String implementationId) {
090: return implementationId.equals(IMPLEMENTATIONID);
091: }
092:
093: /**
094: @see MethodFactory#primaryImplementationType
095: **/
096: public String primaryImplementationType() {
097: return IMPLEMENTATIONID;
098: }
099:
100: /**
101: @see MethodFactory#supportsFormat
102: **/
103: public boolean supportsFormat(UUID formatid) {
104: return formatid.equals(formatUUID);
105: }
106:
107: /**
108: @see MethodFactory#primaryFormat
109: **/
110: public UUID primaryFormat() {
111: return formatUUID;
112: }
113:
114: /*
115: ** Methods of SortFactory
116: */
117:
118: /**
119: Create a sort.
120: This method could choose among different sort options,
121: depending on the properties etc., but currently it always
122: returns a merge sort.
123: @see SortFactory#createSort
124: **/
125: public Sort createSort(TransactionController tran, int segment,
126: Properties implParameters, DataValueDescriptor[] template,
127: ColumnOrdering columnOrdering[], SortObserver sortObserver,
128: boolean alreadyInOrder, long estimatedRows,
129: int estimatedRowSize) throws StandardException {
130: MergeSort sort = new MergeSort();
131:
132: // RESOLVE - mikem change this to use estimatedRows and
133: // estimatedRowSize to come up with a smarter number for sortBufferMax
134: // than a fixed number of rows. At least 2 possibilities:
135: // 1) add sortBufferMaxMem which would be the amount of memory
136: // the sorter could use, and then just pick the number of
137: // rows as (sortBufferMaxMem / (estimatedRows * estimatedRowSize)
138: // 2) add sortBufferUsePercentFree. This would be how much of
139: // the current free memory can the current sort use.
140: //
141:
142: if (!userSpecified) {
143: // derby.storage.sortBufferMax is not specified by the
144: // user, use default or try to figure out a reasonable sort
145: // size.
146:
147: // if we have some idea on row size, set sort approx 1 meg of
148: // memory sort.
149: if (estimatedRowSize > 0) {
150: //
151: // for each column, there is a reference from the key array and
152: // the 4 bytes reference to the column object plus 12 bytes
153: // tax on the column object
154: // for each row, SORT_ROW_OVERHEAD is the Node and 4 bytes to
155: // point to the column array and 4 for alignment
156: //
157: estimatedRowSize += SORT_ROW_OVERHEAD
158: + (template.length * (4 + 12)) + 8;
159: sortBufferMax = DEFAULT_MEM_USE / estimatedRowSize;
160: } else {
161: sortBufferMax = defaultSortBufferMax;
162: }
163:
164: // if there are barely more rows than sortBufferMax, use 2
165: // smaller runs of similar size instead of one larger run
166: //
167: // 10% slush is added to estimated Rows to catch the case where
168: // estimated rows underestimate the actual number of rows by 10%.
169: //
170: if (estimatedRows > sortBufferMax
171: && (estimatedRows * 1.1) < sortBufferMax * 2)
172: sortBufferMax = (int) (estimatedRows / 2 + estimatedRows / 10);
173:
174: // Make sure it is at least the minimum sort buffer size
175: if (sortBufferMax < MINIMUM_SORTBUFFERMAX)
176: sortBufferMax = MINIMUM_SORTBUFFERMAX;
177: } else {
178: // if user specified derby.storage.sortBufferMax, use it.
179: sortBufferMax = defaultSortBufferMax;
180: }
181:
182: if (SanityManager.DEBUG) {
183: if (SanityManager.DEBUG_ON("SortTuning")) {
184: SanityManager.DEBUG("SortTuning", "sortBufferMax = "
185: + sortBufferMax + " estimatedRows = "
186: + estimatedRows + " estimatedRowSize = "
187: + estimatedRowSize + " defaultSortBufferMax = "
188: + defaultSortBufferMax);
189: }
190: }
191:
192: sort.initialize(template, columnOrdering, sortObserver,
193: alreadyInOrder, estimatedRows, sortBufferMax);
194: return sort;
195: }
196:
197: /**
198: * Return an open SortCostController.
199: * <p>
200: * Return an open SortCostController which can be used to ask about
201: * the estimated costs of SortController() operations.
202: * <p>
203: *
204: * @return The open SortCostController.
205: *
206: * @exception StandardException Standard exception policy.
207: *
208: * @see SortCostController
209: **/
210: public SortCostController openSortCostController()
211: throws StandardException {
212: return (this );
213: }
214:
215: /*
216: ** Methods of SortCostController
217: */
218:
219: public void close() {
220: // nothing to do.
221: }
222:
223: /**
224: * Short one line description of routine.
225: * <p>
226: * The sort algorithm is a N * log(N) algorithm. The following numbers
227: * on a PII, 400 MHZ machine, jdk117 with jit, insane.zip. This test
228: * is a simple "select * from table order by first_int_column. I then
229: * subtracted the time it takes to do "select * from table" from the
230: * result.
231: *
232: * number of rows elaspsed time in seconds
233: * -------------- -----------------------------
234: * 1000 0.20
235: * 10000 10.5
236: * 100000 80.0
237: *
238: * We assume that the formula for sort performance is of the form:
239: * performance = K * N * log(N). Solving the equation for the 1000
240: * and 100000 case we come up with:
241: *
242: * performance = 1 + 0.08 N ln(n)
243: *
244: * NOTE: Apparently, these measurements were done on a faster machine
245: * than was used for other performance measurements used by the optimizer.
246: * Experiments show that the 0.8 multiplier is off by a factor of 4
247: * with respect to other measurements (such as the time it takes to
248: * scan a conglomerate). I am correcting the formula to use 0.32
249: * rather than 0.08.
250: *
251: * - Jeff
252: *
253: * <p>
254: * RESOLVE (mikem) - this formula is very crude at the moment and will be
255: * refined later. known problems:
256: * 1) internal vs. external sort - we know that the performance of sort
257: * is discontinuous when we go from an internal to an external sort.
258: * A better model is probably a different set of contants for internal
259: * vs. external sort and some way to guess when this is going to happen.
260: * 2) current row size is never considered but is critical to performance.
261: * 3) estimatedExportRows is not used. This is a critical number to know
262: * if an internal vs. an external sort will happen.
263: *
264: * <p>
265: *
266: * @return The identifier to be used to open the conglomerate later.
267: *
268: *
269: * @exception StandardException Standard exception policy.
270: **/
271: public double getSortCost(DataValueDescriptor[] template,
272: ColumnOrdering columnOrdering[], boolean alreadyInOrder,
273: long estimatedInputRows, long estimatedExportRows,
274: int estimatedRowSize) throws StandardException {
275: /* Avoid taking the log of 0 */
276: if (estimatedInputRows == 0)
277: return 0.0;
278:
279: // RESOLVE - come up with some real benchmark. For now the cost
280: // of sort is 3 times the cost of scanning the data.
281:
282: if (SanityManager.DEBUG) {
283: SanityManager.ASSERT(estimatedInputRows >= 0);
284: SanityManager.ASSERT(estimatedExportRows >= 0);
285: }
286:
287: double ret_val = 1 + ((0.32) * (estimatedInputRows) * Math
288: .log(estimatedInputRows));
289:
290: return (ret_val);
291: }
292:
293: /*
294: ** Methods of ModuleControl.
295: */
296:
297: public boolean canSupport(Properties startParams) {
298:
299: if (startParams == null)
300: return false;
301:
302: String impl = startParams
303: .getProperty("derby.access.Conglomerate.type");
304: if (impl == null)
305: return false;
306:
307: return supportsImplementation(impl);
308: }
309:
310: public void boot(boolean create, Properties startParams)
311: throws StandardException {
312: // Find the UUID factory.
313: UUIDFactory uuidFactory = Monitor.getMonitor().getUUIDFactory();
314:
315: // Make a UUID that identifies this sort's format.
316: formatUUID = uuidFactory.recreateUUID(FORMATUUIDSTRING);
317:
318: // See if there's a new maximum sort buffer size.
319: defaultSortBufferMax = PropertyUtil.getSystemInt(
320: "derby.storage.sortBufferMax", 0, Integer.MAX_VALUE, 0);
321:
322: // if defaultSortBufferMax is 0, the user did not specify
323: // sortBufferMax, then just set it to DEFAULT_SORTBUFFERMAX.
324: // if defaultSortBufferMax is not 0, the user specified sortBufferMax,
325: // do not override it.
326: if (defaultSortBufferMax == 0) {
327: userSpecified = false;
328: defaultSortBufferMax = DEFAULT_SORTBUFFERMAX;
329: } else {
330: userSpecified = true;
331: if (defaultSortBufferMax < MINIMUM_SORTBUFFERMAX)
332: defaultSortBufferMax = MINIMUM_SORTBUFFERMAX;
333: }
334:
335: }
336:
337: public void stop() {
338: }
339:
340: }
|