001: /* Copyright (c) 2001-2005, The HSQL Development Group
002: * All rights reserved.
003: *
004: * Redistribution and use in source and binary forms, with or without
005: * modification, are permitted provided that the following conditions are met:
006: *
007: * Redistributions of source code must retain the above copyright notice, this
008: * list of conditions and the following disclaimer.
009: *
010: * Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: *
014: * Neither the name of the HSQL Development Group nor the names of its
015: * contributors may be used to endorse or promote products derived from this
016: * software without specific prior written permission.
017: *
018: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
019: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
020: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
021: * ARE DISCLAIMED. IN NO EVENT SHALL HSQL DEVELOPMENT GROUP, HSQLDB.ORG,
022: * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
023: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
024: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
025: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
026: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
027: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
028: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
029: */
030:
031: package org.hsqldb.lib;
032:
033: /**
034: * Collection of routines for counting the distribution of the values
035: * in an int[] array.
036: *
037: * @author fredt@users
038: * @version 1.7.2
039: * @since 1.7.2
040: */
041: public class ArrayCounter {
042:
043: /**
044: * Returns an int[] array of length segments containing the distribution
045: * count of the elements in unsorted int[] array with values between min
046: * and max (range). Values outside the min-max reange are ignored<p>
047: *
048: * A usage example is determining the count of people of each age group
049: * in a large int[] array containing the age of each person. Called with
050: * (array, 16,0,79), it will return an int[16] with the first element
051: * the count of people aged 0-4, the second element the count of those
052: * aged 5-9, and so on. People above the age of 79 are excluded. If the
053: * range is not a multiple of segments, the last segment will be cover a
054: * smaller sub-range than the rest.
055: *
056: */
057: public static int[] countSegments(int[] array, int elements,
058: int segments, int start, int limit) {
059:
060: int[] counts = new int[segments];
061: long interval = calcInterval(segments, start, limit);
062: int index = 0;
063: int element = 0;
064:
065: if (interval <= 0) {
066: return counts;
067: }
068:
069: for (int i = 0; i < elements; i++) {
070: element = array[i];
071:
072: if (element < start || element >= limit) {
073: continue;
074: }
075:
076: index = (int) ((element - start) / interval);
077:
078: counts[index]++;
079: }
080:
081: return counts;
082: }
083:
084: /**
085: * With an unsorted int[] array and with target a positive integer in the
086: * range (1,array.length), finds the value in the range (start,limit) of the
087: * largest element (rank) where the count of all smaller elements in that
088: * range is less than or equals target. Parameter margin indicates the
089: * margin of error in target<p>
090: *
091: * In statistics, this can be used to calculate a median or quadrile value.
092: * A usage example applied to an array of age values is to determine
093: * the maximum age of a given number of people. With the example array
094: * given in countSegments, rank(array, c, 6000, 18, 65, 0) will return an age
095: * value between 18-64 (inclusive) and the count of all people aged between
096: * 18 and the returned value(exclusive) will be less than or equal
097: * 6000.
098: *
099: */
100: public static int rank(int[] array, int elements, int target,
101: int start, int limit, int margin) {
102:
103: final int segments = 256;
104: int elementCount = 0;
105: int currentLimit = limit;
106:
107: for (;;) {
108: long interval = calcInterval(segments, start, currentLimit);
109: int[] counts = countSegments(array, elements, segments,
110: start, currentLimit);
111:
112: for (int i = 0; i < counts.length; i++) {
113: if (elementCount + counts[i] < target) {
114: elementCount += counts[i];
115: start += interval;
116: } else {
117: break;
118: }
119: }
120:
121: if (elementCount + margin >= target) {
122: return start;
123: }
124:
125: if (interval <= 1) {
126: return start;
127: }
128:
129: currentLimit = start + interval < limit ? (int) (start + interval)
130: : limit;
131: }
132: }
133:
134: /**
135: * Helper method to calculate the span of the sub-interval. Simply returns
136: * the cieling of ((limit - start) / segments) and accounts for invalid
137: * start and limit combinations.
138: */
139: static long calcInterval(int segments, int start, int limit) {
140:
141: long range = limit - start;
142:
143: if (range < 0) {
144: return 0;
145: }
146:
147: int partSegment = (range % segments) == 0 ? 0 : 1;
148:
149: return (range / segments) + partSegment;
150: }
151: }
|