01: /**
02: *******************************************************************************
03: * Copyright (C) 2005, International Business Machines Corporation and *
04: * others. All Rights Reserved. *
05: *******************************************************************************
06: */package com.ibm.icu.text;
07:
08: /**
09: * Abstract class for recognizing a single charset.
10: * Part of the implementation of ICU's CharsetDetector.
11: *
12: * Each specific charset that can be recognized will have an instance
13: * of some subclass of this class. All interaction between the overall
14: * CharsetDetector and the stuff specific to an individual charset happens
15: * via the interface provided here.
16: *
17: * Instances of CharsetDetector DO NOT have or maintain
18: * state pertaining to a specific match or detect operation.
19: * The WILL be shared by multiple instances of CharsetDetector.
20: * They encapsulate const charset-specific information.
21: *
22: * @internal
23: */
24: abstract class CharsetRecognizer {
25: /**
26: * Get the IANA name of this charset.
27: * @return the charset name.
28: */
29: abstract String getName();
30:
31: /**
32: * Get the ISO language code for this charset.
33: * @return the language code, or <code>null</code> if the language cannot be determined.
34: */
35: public String getLanguage() {
36: return null;
37: }
38:
39: /**
40: * Test the match of this charset with the input text data
41: * which is obtained via the CharsetDetector object.
42: *
43: * @param det The CharsetDetector, which contains the input text
44: * to be checked for being in this charset.
45: * @return Two values packed into one int (Damn java, anyhow)
46: * <br/>
47: * bits 0-7: the match confidence, ranging from 0-100
48: * <br/>
49: * bits 8-15: The match reason, an enum-like value.
50: */
51: abstract int match(CharsetDetector det);
52:
53: }
|