Source Code Cross Referenced for BCDUtils.java in » Search-Engine » apache-solr-1.2.0 » org » apache » solr » util » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Search Engine » apache solr 1.2.0 » org.apache.solr.util
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /**
002:         * Licensed to the Apache Software Foundation (ASF) under one or more
003:         * contributor license agreements.  See the NOTICE file distributed with
004:         * this work for additional information regarding copyright ownership.
005:         * The ASF licenses this file to You under the Apache License, Version 2.0
006:         * (the "License"); you may not use this file except in compliance with
007:         * the License.  You may obtain a copy of the License at
008:         *
009:         *     http://www.apache.org/licenses/LICENSE-2.0
010:         *
011:         * Unless required by applicable law or agreed to in writing, software
012:         * distributed under the License is distributed on an "AS IS" BASIS,
013:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014:         * See the License for the specific language governing permissions and
015:         * limitations under the License.
016:         */package org.apache.solr.util;
017:
018:        /**
019:         * @author yonik
020:         * @version $Id: BCDUtils.java 472574 2006-11-08 18:25:52Z yonik $
021:         */
022:        public class BCDUtils {
023:            // idiv is expensive...
024:            // use fixed point math to multiply by 1/10
025:            // http://www.cs.uiowa.edu/~jones/bcd/divide.html
026:            private static int div10(int a) {
027:                return (a * 0xcccd) >>> 19;
028:            }
029:
030:            private static int mul10(int a) {
031:                return (a * 10);
032:            }
033:
034:            // private static int mul10(int a) { return ((a<<3)+(a<<1)); }
035:            // private static int mul10(int a) { return (a+(a<<2))<<1; } // attempt to use LEA instr
036:            // (imul32 on AMD64 only has a 3 cycle latency in any case)
037:
038:            // something that won't clash with other base100int
039:            // chars (something >= 100)
040:            private static final char NEG_CHAR = (char) 126;
041:            // The zero exponent.
042:            // NOTE: for smaller integer representations, this current implementation
043:            // combines sign and exponent into the first char.  sign is negative if
044:            // exponent is less than the zero point (no negative exponents themselves)
045:            private static final int ZERO_EXPONENT = 'a'; // 97
046:
047:            // WARNING: assumption is that this is a legal int...
048:            // no validation is done.  [+-]?digit*
049:            //
050:            // Normalization of zeros *is* done...
051:            //  0004, 004, 04, 4 will all end up being equal
052:            //  0,-0 are normalized to '' (zero length)
053:            //
054:            // The value is written to the output buffer
055:            // from the end to the start.  The return value
056:            // is the start of the Base100 int in the output buffer.
057:            //
058:            // As the output will be smaller than the input, arr and
059:            // out may refer to the same array if desired.
060:            //
061:            public static int base10toBase100(char[] arr, int start, int end,
062:                    char[] out, int outend) {
063:                int wpos = outend; // write position
064:                boolean neg = false;
065:
066:                while (--end >= start) {
067:                    int val = arr[end];
068:                    if (val == '+') {
069:                        break;
070:                    } else if (val == '-') {
071:                        neg = !neg;
072:                        break;
073:                    } else {
074:                        val = val - '0';
075:                        if (end > start) {
076:                            int val2 = arr[end - 1];
077:                            if (val2 == '+') {
078:                                out[--wpos] = (char) val;
079:                                break;
080:                            }
081:                            if (val2 == '-') {
082:                                out[--wpos] = (char) val;
083:                                neg = !neg;
084:                                break;
085:                            }
086:                            end--;
087:                            val = val + (val2 - '0') * 10;
088:                        }
089:                        out[--wpos] = (char) val;
090:                    }
091:                }
092:
093:                // remove leading base100 zeros
094:                while (wpos < outend && out[wpos] == 0)
095:                    wpos++;
096:
097:                // check for a zero value
098:                if (wpos == outend) {
099:                    // if zero, don't add negative sign
100:                } else if (neg) {
101:                    out[--wpos] = NEG_CHAR;
102:                }
103:
104:                return wpos; // the start of the base100 int
105:            }
106:
107:            // Converts a base100 number to base10 character form
108:            // returns number of chars written.
109:            // At least 1 char is always written.
110:            public static int base100toBase10(char[] arr, int start, int end,
111:                    char[] out, int offset) {
112:                int wpos = offset; // write position
113:                boolean firstDigit = true;
114:                for (int i = start; i < end; i++) {
115:                    int val = arr[i];
116:                    if (val == NEG_CHAR) {
117:                        out[wpos++] = '-';
118:                        continue;
119:                    }
120:                    char tens = (char) (val / 10 + '0');
121:                    if (!firstDigit || tens != '0') { // skip leading 0
122:                        out[wpos++] = (char) (val / 10 + '0'); // tens position
123:                    }
124:                    out[wpos++] = (char) (val % 10 + '0'); // ones position
125:                    firstDigit = false;
126:                }
127:                if (firstDigit)
128:                    out[wpos++] = '0';
129:                return wpos - offset;
130:            }
131:
132:            public static String base10toBase100SortableInt(String val) {
133:                char[] arr = new char[val.length() + 1];
134:                val.getChars(0, val.length(), arr, 0);
135:                int len = base10toBase100SortableInt(arr, 0, val.length(), arr,
136:                        arr.length);
137:                return new String(arr, arr.length - len, len);
138:            }
139:
140:            public static String base100SortableIntToBase10(String val) {
141:                int slen = val.length();
142:                char[] arr = new char[slen << 2];
143:                val.getChars(0, slen, arr, 0);
144:                int len = base100SortableIntToBase10(arr, 0, slen, arr, slen);
145:                return new String(arr, slen, len);
146:            }
147:
148:            public static String base10toBase10kSortableInt(String val) {
149:                char[] arr = new char[val.length() + 1];
150:                val.getChars(0, val.length(), arr, 0);
151:                int len = base10toBase10kSortableInt(arr, 0, val.length(), arr,
152:                        arr.length);
153:                return new String(arr, arr.length - len, len);
154:            }
155:
156:            public static String base10kSortableIntToBase10(String val) {
157:                int slen = val.length();
158:                char[] arr = new char[slen * 5]; // +1 time for orig, +4 for new
159:                val.getChars(0, slen, arr, 0);
160:                int len = base10kSortableIntToBase10(arr, 0, slen, arr, slen);
161:                return new String(arr, slen, len);
162:            }
163:
164:            /********* FUTURE
165:              // the zero exponent... exponents above this point are positive
166:              // and below are negative.
167:              // It is desirable to make ordinary numbers have a single byte
168:              // exponent when converted to UTF-8
169:              // For integers, the exponent will always be >=0, but this format
170:              // is meant to be valid for floating point numbers as well...
171:              private static final int ZERO_EXPONENT='a';  // 97
172:
173:              // if exponent is larger than what can be represented
174:              // in a single byte (char), then this is the multibyte
175:              // escape char.
176:              // UCS-2 surrogates start at 0xD800
177:              private static final int POSITIVE_EXPONENT_ESCAPE=0x3fff;
178:
179:              // if exponent is smaller than what can be represented in
180:              // a single byte, then this is the multibyte escape
181:              private static final int NEGATIVE_EXPONENT_ESCAPE=1;
182:
183:              // if number is negative, it starts with this optional value
184:              // this should not overlap with any exponent values
185:              private static final int NEGATIVE_SIGN=0;
186:             **********/
187:
188:            // WARNING: assumption is that this is a legal int...
189:            // no validation is done.  [+-]?digit*
190:            //
191:            // Normalization of zeros *is* done...
192:            //  0004, 004, 04, 4 will all end up being equal
193:            //  0,-0 are normalized to '' (zero length)
194:            //
195:            // The value is written to the output buffer
196:            // from the end to the start.  The return value
197:            // is the start of the Base100 int in the output buffer.
198:            //
199:            // As the output will be smaller than the input, arr and
200:            // out may refer to the same array if desired.
201:            //
202:            public static int base10toBase100SortableInt(char[] arr, int start,
203:                    int end, char[] out, int outend) {
204:                int wpos = outend; // write position
205:                boolean neg = false;
206:                --end; // position end pointer *on* the last char
207:
208:                // read signs and leading zeros
209:                while (start <= end) {
210:                    char val = arr[start];
211:                    if (val == '-')
212:                        neg = !neg;
213:                    else if (val >= '1' && val <= '9')
214:                        break;
215:                    start++;
216:                }
217:
218:                // eat whitespace on RHS?
219:                outer: while (start <= end) {
220:                    switch (arr[end]) {
221:                    case ' ':
222:                    case '\t':
223:                    case '\n':
224:                    case '\r':
225:                        end--;
226:                        break;
227:                    default:
228:                        break outer;
229:                    }
230:                }
231:
232:                int hundreds = 0;
233:                /******************************************************
234:                 * remove RHS zero normalization since it only helps 1 in 100
235:                 * numbers and complicates both encoding and decoding.
236:
237:                // remove pairs of zeros on the RHS and keep track of
238:                // the count.
239:                while (start <= end) {
240:                  char val = arr[end];
241:
242:                  if (val=='0' && start <= end) {
243:                    val=arr[end-1];
244:                    if (val=='0') {
245:                      hundreds++;
246:                      end-=2;
247:                      continue;
248:                    }
249:                  }
250:
251:                  break;
252:                }
253:                 *************************************************************/
254:
255:                // now start at the end and work our way forward
256:                // encoding two base 10 digits into 1 base 100 digit
257:                while (start <= end) {
258:                    int val = arr[end--];
259:                    val = val - '0';
260:                    if (start <= end) {
261:                        int val2 = arr[end--];
262:                        val = val + (val2 - '0') * 10;
263:                    }
264:                    out[--wpos] = neg ? (char) (99 - val) : (char) val;
265:                }
266:
267:                /****** FUTURE: not needed for this implementation of exponent combined with sign
268:                // normalize all zeros to positive values
269:                if (wpos==outend) neg=false;
270:                 ******/
271:
272:                // adjust exponent by the number of base 100 chars written
273:                hundreds += outend - wpos;
274:
275:                // write the exponent and sign combined
276:                out[--wpos] = neg ? (char) (ZERO_EXPONENT - hundreds)
277:                        : (char) (ZERO_EXPONENT + hundreds);
278:
279:                return outend - wpos; // the length of the base100 int
280:            }
281:
282:            // Converts a base100 sortable number to base10 character form
283:            // returns number of chars written.
284:            // At least 1 char is always written.
285:            public static int base100SortableIntToBase10(char[] arr, int start,
286:                    int end, char[] out, int offset) {
287:                // Take care of "0" case first.  It's the only number that is represented
288:                // in one char.
289:                if (end - start == 1) {
290:                    out[offset] = '0';
291:                    return 1;
292:                }
293:
294:                int wpos = offset; // write position
295:                boolean neg = false;
296:                int exp = arr[start++];
297:                if (exp < ZERO_EXPONENT) {
298:                    neg = true;
299:                    exp = ZERO_EXPONENT - exp;
300:                    out[wpos++] = '-';
301:                }
302:
303:                boolean firstDigit = true;
304:                while (start < end) {
305:                    int val = arr[start++];
306:                    if (neg)
307:                        val = 99 - val;
308:                    // opt - if we ever want a faster version we can avoid one integer
309:                    // divide by using fixed point math to multiply by 1/10
310:                    // http://www.cs.uiowa.edu/~jones/bcd/divide.html
311:                    // TIP: write a small function in gcc or cl and see what
312:                    // the optimized assemply output looks like (and which is fastest).
313:                    // In C you can specify "unsigned" which gives the compiler more
314:                    // info than the Java compiler has.
315:                    char tens = (char) (val / 10 + '0');
316:                    if (!firstDigit || tens != '0') { // skip leading 0
317:                        out[wpos++] = tens; // write tens position
318:                    }
319:                    out[wpos++] = (char) (val % 10 + '0'); // write ones position
320:                    firstDigit = false;
321:                }
322:
323:                // OPTIONAL: if trailing zeros were truncated, then this is where
324:                // we would restore them (compare number of chars read vs exponent)
325:
326:                return wpos - offset;
327:            }
328:
329:            public static int base10toBase10kSortableInt(char[] arr, int start,
330:                    int end, char[] out, int outend) {
331:                int wpos = outend; // write position
332:                boolean neg = false;
333:                --end; // position end pointer *on* the last char
334:
335:                // read signs and leading zeros
336:                while (start <= end) {
337:                    char val = arr[start];
338:                    if (val == '-')
339:                        neg = !neg;
340:                    else if (val >= '1' && val <= '9')
341:                        break;
342:                    start++;
343:                }
344:
345:                // eat whitespace on RHS?
346:                outer: while (start <= end) {
347:                    switch (arr[end]) {
348:                    case ' ': // fallthrough
349:                    case '\t': // fallthrough
350:                    case '\n': // fallthrough
351:                    case '\r':
352:                        end--;
353:                        break;
354:                    default:
355:                        break outer;
356:                    }
357:                }
358:
359:                int exp = 0;
360:
361:                /******************************************************
362:                 * remove RHS zero normalization since it only helps 1 in 100
363:                 * numbers and complicates both encoding and decoding.
364:
365:                // remove pairs of zeros on the RHS and keep track of
366:                // the count.
367:                while (start <= end) {
368:                  char val = arr[end];
369:
370:                  if (val=='0' && start <= end) {
371:                    val=arr[end-1];
372:                    if (val=='0') {
373:                      hundreds++;
374:                      end-=2;
375:                      continue;
376:                    }
377:                  }
378:
379:                  break;
380:                }
381:                 *************************************************************/
382:
383:                // now start at the end and work our way forward
384:                // encoding two base 10 digits into 1 base 100 digit
385:                while (start <= end) {
386:                    int val = arr[end--] - '0'; // ones
387:                    if (start <= end) {
388:                        val += (arr[end--] - '0') * 10; // tens
389:                        if (start <= end) {
390:                            val += (arr[end--] - '0') * 100; // hundreds
391:                            if (start <= end) {
392:                                val += (arr[end--] - '0') * 1000; // thousands
393:                            }
394:                        }
395:                    }
396:                    out[--wpos] = neg ? (char) (9999 - val) : (char) val;
397:                }
398:
399:                /****** FUTURE: not needed for this implementation of exponent combined with sign
400:                // normalize all zeros to positive values
401:                if (wpos==outend) neg=false;
402:                 ******/
403:
404:                // adjust exponent by the number of base 100 chars written
405:                exp += outend - wpos;
406:
407:                // write the exponent and sign combined
408:                out[--wpos] = neg ? (char) (ZERO_EXPONENT - exp)
409:                        : (char) (ZERO_EXPONENT + exp);
410:
411:                return outend - wpos; // the length of the base100 int
412:            }
413:
414:            // Converts a base100 sortable number to base10 character form
415:            // returns number of chars written.
416:            // At least 1 char is always written.
417:            public static int base10kSortableIntToBase10(char[] arr, int start,
418:                    int end, char[] out, int offset) {
419:                // Take care of "0" case first.  It's the only number that is represented
420:                // in one char since we don't chop trailing zeros.
421:                if (end - start == 1) {
422:                    out[offset] = '0';
423:                    return 1;
424:                }
425:
426:                int wpos = offset; // write position
427:                boolean neg;
428:                int exp = arr[start++];
429:                if (exp < ZERO_EXPONENT) {
430:                    neg = true;
431:                    // We don't currently use exp on decoding...
432:                    // exp = ZERO_EXPONENT - exp;
433:                    out[wpos++] = '-';
434:                } else {
435:                    neg = false;
436:                }
437:
438:                // since so many values will fall in one char, pull it
439:                // out of the loop (esp since the first value must
440:                // be special-cased to not print leading zeros.
441:                // integer division is still expensive, so it's best to check
442:                // if you actually need to do it.
443:                //
444:                // TIP: write a small function in gcc or cl and see what
445:                // the optimized assemply output looks like (and which is fastest).
446:                // In C you can specify "unsigned" which gives the compiler more
447:                // info than the Java compiler has.
448:                int val = arr[start++];
449:                if (neg)
450:                    val = 9999 - val;
451:
452:                /***
453:                if (val < 10) {
454:                  out[wpos++] = (char)(val + '0');
455:                } else if (val < 100) {
456:                  out[wpos++] = (char)(val/10 + '0');
457:                  out[wpos++] = (char)(val%10 + '0');
458:                } else if (val < 1000) {
459:                  out[wpos++] = (char)(val/100 + '0');
460:                  out[wpos++] = (char)((val/10)%10 + '0');
461:                  out[wpos++] = (char)(val%10 + '0');
462:                } else {
463:                  out[wpos++] = (char)(val/1000 + '0');
464:                  out[wpos++] = (char)((val/100)%10 + '0');
465:                  out[wpos++] = (char)((val/10)%10 + '0');
466:                  out[wpos++] = (char)(val % 10 + '0');
467:                }
468:                 ***/
469:
470:                if (val < 10) {
471:                    out[wpos++] = (char) (val + '0');
472:                } else if (val < 100) {
473:                    int div = div10(val);
474:                    int ones = val - mul10(div); // mod 10
475:                    out[wpos++] = (char) (div + '0');
476:                    out[wpos++] = (char) (ones + '0');
477:                } else if (val < 1000) {
478:                    int div = div10(val);
479:                    int ones = val - mul10(div); // mod 10
480:                    val = div;
481:                    div = div10(val);
482:                    int tens = val - mul10(div); // mod 10
483:                    out[wpos++] = (char) (div + '0');
484:                    out[wpos++] = (char) (tens + '0');
485:                    out[wpos++] = (char) (ones + '0');
486:                } else {
487:                    int div = div10(val);
488:                    int ones = val - mul10(div); // mod 10
489:                    val = div;
490:                    div = div10(val);
491:                    int tens = val - mul10(div); // mod 10
492:                    val = div;
493:                    div = div10(val);
494:                    int hundreds = val - mul10(div); // mod 10
495:
496:                    out[wpos++] = (char) (div + '0');
497:                    out[wpos++] = (char) (hundreds + '0');
498:                    out[wpos++] = (char) (tens + '0');
499:                    out[wpos++] = (char) (ones + '0');
500:                }
501:
502:                while (start < end) {
503:                    val = arr[start++];
504:                    if (neg)
505:                        val = 9999 - val;
506:
507:                    int div = div10(val);
508:                    int ones = val - mul10(div); // mod 10
509:                    val = div;
510:                    div = div10(val);
511:                    int tens = val - mul10(div); // mod 10
512:                    val = div;
513:                    div = div10(val);
514:                    int hundreds = val - mul10(div); // mod 10
515:
516:                    /***
517:                    int ones = val % 10;
518:                    val /= 10;
519:                    int tens = val!=0 ? val % 10 : 0;
520:                    val /= 10;
521:                    int hundreds = val!=0 ? val % 10 : 0;
522:                    val /= 10;
523:                    int thousands = val!=0 ? val % 10 : 0;
524:                     ***/
525:
526:                    /***
527:                    int thousands = val>=1000 ? val/1000 : 0;
528:                    int hundreds  = val>=100 ? (val/100)%10 : 0;
529:                    int tens      = val>=10 ? (val/10)%10 : 0;
530:                    int ones      = val % 10;
531:                     ***/
532:
533:                    /***
534:                    int thousands =  val/1000;
535:                    int hundreds  = (val/100)%10;
536:                    int tens      = (val/10)%10;
537:                    int ones      = val % 10;
538:                     ***/
539:
540:                    out[wpos++] = (char) (div + '0');
541:                    out[wpos++] = (char) (hundreds + '0');
542:                    out[wpos++] = (char) (tens + '0');
543:                    out[wpos++] = (char) (ones + '0');
544:                }
545:
546:                // OPTIONAL: if trailing zeros were truncated, then this is where
547:                // we would restore them (compare number of chars read vs exponent)
548:
549:                return wpos - offset;
550:            }
551:
552:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.