001: /*
002: * (c) Copyright 2007 by Volker Bergmann. All rights reserved.
003: *
004: * Redistribution and use in source and binary forms, with or without
005: * modification, is permitted under the terms of the
006: * GNU General Public License.
007: *
008: * For redistributing this software or a derivative work under a license other
009: * than the GPL-compatible Free Software License as defined by the Free
010: * Software Foundation or approved by OSI, you must first obtain a commercial
011: * license to this software product from Volker Bergmann.
012: *
013: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
014: * WITHOUT A WARRANTY OF ANY KIND. ALL EXPRESS OR IMPLIED CONDITIONS,
015: * REPRESENTATIONS AND WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF
016: * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE
017: * HEREBY EXCLUDED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
018: * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
019: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
020: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
021: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
022: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
023: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
024: * POSSIBILITY OF SUCH DAMAGE.
025: */
026:
027: package org.databene.region;
028:
029: import org.databene.document.csv.BeanCSVWriter;
030: import org.databene.document.csv.CSVLineIterator;
031: import org.databene.commons.*;
032: import org.apache.commons.logging.LogFactory;
033: import org.apache.commons.logging.Log;
034:
035: import java.util.*;
036: import java.io.IOException;
037: import java.io.FileWriter;
038:
039: /**
040: * Reads and perists city files in CSV format (column header = property name).<br/>
041: * <br/>
042: * Created: 28.07.2007 15:21:12
043: * @author Volker Bergmann
044: */
045: public class CityManager {
046:
047: private static final Log logger = LogFactory
048: .getLog(CityManager.class);
049:
050: private static Set<String> simpleLocatorWords = ArrayUtil.toSet(
051: "b.", "bei", "im", "am", "ob", "zum", "sopra", "di", "in");
052: private static Set<String[]> complexLocatorWords = ArrayUtil.toSet(
053: new String[] { "in", "der" }, new String[] { "an", "der" },
054: new String[] { "ob", "der" });
055: private static Set<String> prefixes = ArrayUtil.toSet("St.",
056: "S.",
057: "Alt",
058: "Bad", // CH
059: "Markt", "Hofamt", "Maria", "Deutsch", "Moorbad",
060: "Bairisch", "Klein", "Hohe",
061: "Groß", // AT
062: "La", "Le",
063: "Les", // CH
064: "San", "Santa", "Val", "Monte", "Ponte", "Castel", "Riva",
065: "Villa", // CH
066: "Santa Maria"); // TODO v0.5 "Santa Maria" are two words
067: private static Set<String> suffixes = ArrayUtil.toSet("Stadt",
068: "Land", // CH
069: "Umgebung", "Kurort", "Markt", "Neustadt", "Neudorf", "II", // AT
070: "Inferiore", "Superiore"); // CH
071:
072: private static Set<String> suspectiveNames = new HashSet<String>();
073:
074: public static void readCities(Country country, String filename)
075: throws IOException {
076: readCities(country, filename, new HashMap<String, String>());
077: }
078:
079: public static void readCities(Country country, String filename,
080: Map<String, String> defaults) throws IOException {
081: CSVLineIterator iterator = new CSVLineIterator(filename, ';');
082: String[] header = iterator.next();
083: int warnCount = 0;
084: while (iterator.hasNext()) {
085: String[] cells = iterator.next();
086: if (cells.length == 0)
087: continue;
088: if (logger.isDebugEnabled())
089: logger.debug(ArrayFormat.format(";", cells));
090: if (cells.length == 1)
091: continue;
092: Map<String, String> instance = new HashMap<String, String>();
093: for (int i = 0; i < cells.length; i++) {
094: instance.put(header[i], cells[i]);
095: }
096: if (logger.isDebugEnabled())
097: logger.debug(instance);
098:
099: // create/setup state
100: String stateId = instance.get("state");
101: State state = country.getState(stateId);
102: if (state == null) {
103: state = new State(stateId);
104: country.addState(stateId, state);
105: }
106:
107: String cityIdString = instance.get("municipality");
108: CityId cityId;
109: if (StringUtil.isEmpty(cityIdString))
110: cityIdString = instance.get("city");
111: if (!StringUtil.isEmpty(cityIdString)) {
112: cityId = parseCityName(cityIdString, stateId, true);
113: } else {
114: String cityName = instance.get("name");
115: String cityNameExtension = instance
116: .get("nameExtension");
117: cityId = new CityId(cityName, cityNameExtension);
118: }
119:
120: // create/setup city
121: CityHelper city = (CityHelper) state.getCity(cityId);
122: String zipCode = instance.get("zipCode");
123: String lang = getValue(instance, "language", defaults);
124: if (city == null) {
125: String phoneCode = getValue(instance, "phoneCode",
126: defaults);
127: if (StringUtil.isEmpty(phoneCode)) {
128: warnCount++;
129: logger
130: .warn("Leaving out '"
131: + cityId
132: + "' since its phone code is not specified");
133: continue;
134: }
135:
136: city = new CityHelper(state, cityId, ArrayUtil
137: .toList(zipCode), phoneCode);
138: city.setLanguage(LocaleUtil.getLocale(lang));
139: state.addCity(cityId, city);
140: } else
141: city.addZipCode(zipCode);
142: }
143: if (warnCount > 0)
144: System.out.println(warnCount + " warnings");
145: if (suspectiveNames.size() > 0)
146: logger.warn("Suspective names: " + suspectiveNames);
147: }
148:
149: public static void persistCities(Country country, String filename)
150: throws IOException {
151: // persist city data in standard format
152: BeanCSVWriter<City> writer = new BeanCSVWriter<City>(
153: new FileWriter(filename), ';', "state.country.isoCode",
154: "state.id", "name", "nameExtension", "zipCode",
155: "phoneCode", "language");
156: for (State state : country.getStates()) {
157: for (City city : state.getCities())
158: for (String zipCode : city.getZipCodes()) {
159: ((CityHelper) city).setZipCode(zipCode);
160: writer.writeElement(city);
161: }
162: }
163: writer.close();
164: }
165:
166: // private helpers -------------------------------------------------------------------------------------------------
167:
168: private static String getValue(Map<String, String> instance,
169: String key, Map<String, String> defaults) {
170: String value = instance.get(key);
171: if (value == null)
172: value = defaults.get(key);
173: return value;
174: }
175:
176: /*
177: private static CityId parseCityName(String cityName, String stateId) {
178: return parseCityName(cityName, stateId, false);
179: }
180: */
181: private static CityId parseCityName(String cityName,
182: String stateId, boolean quiet) {
183: // parse city id by pattern
184: // Cityname = [Prefix] Name [Extension] [district] [institution]
185: // Prefix = 'St.' | 'S.' | 'La' | 'Le' | 'Les' ...
186: // Extension = State | '(' Text ')' | Locator
187: // Locator = ('b.' | 'im' | 'am' | 'in der' | 'an der' | 'ob' | 'sopra' | 'di') (Word | Words)
188:
189: // TODO v0.5 check for double names like Frantschach-St. Gertraud
190: // TODO v0.5 make use of district and institution info
191:
192: String[] nameParts = StringUtil.tokenize(cityName, ' ');
193: // check prefix
194: String name = "";
195: String extension = "";
196:
197: // String district = null;
198: // String institution = null;
199: // int warnCount = 0;
200:
201: // process prefixes
202: while (nameParts.length > 1 && prefixes.contains(nameParts[0])) {
203: name = append(name, nameParts[0]);
204: nameParts = ArrayUtil.remove(nameParts, 0);
205: }
206:
207: // check for district and institution
208: for (int i = 0; i < nameParts.length; i++) {
209: if (ParseUtil.isPositiveNumber(nameParts[i])) {
210: //district = nameParts[i];
211: if (i < nameParts.length - 1) {
212: String[] institutionParts = ArrayUtil.copyOfRange(
213: nameParts, i + 1, nameParts.length - i - 1);
214: //institution = ArrayFormat.format(" ", institutionParts);
215: nameParts = ArrayUtil.copyOfRange(nameParts, 0, i);
216: } else
217: nameParts = ArrayUtil.remove(nameParts,
218: nameParts.length - 1);
219: break;
220: }
221: }
222:
223: // check extension
224: if (nameParts.length > 1
225: && nameParts[nameParts.length - 1].equals(stateId)) {
226: // state pattern
227: extension = nameParts[nameParts.length - 1];
228: nameParts = ArrayUtil.remove(nameParts,
229: nameParts.length - 1);
230: } else {
231: // check for '(' ... ')'
232: int bracketStart = -1;
233: for (int i = 1; i < nameParts.length; i++) {
234: if (nameParts[i].charAt(0) == '(') {
235: bracketStart = i;
236: break;
237: }
238: }
239: if (bracketStart > 0
240: && nameParts[nameParts.length - 1].endsWith(")")) {
241: extension = ArrayFormat.format(" ", ArrayUtil
242: .copyOfRange(nameParts, bracketStart,
243: nameParts.length - bracketStart));
244: nameParts = ArrayUtil.copyOfRange(nameParts, 0,
245: bracketStart);
246: } else if (nameParts.length >= 4) {
247: // check each defined complex locator
248: for (String[] locator : complexLocatorWords) {
249: int locatorStartIndex = -1;
250: boolean match = false;
251: // check through each start index
252: for (int startIndex = 1; !match
253: && startIndex < nameParts.length - 2; startIndex++) {
254: // check each locator part from start index
255: if (nameParts.length - startIndex > locator.length) {
256: match = true;
257: for (int i = 0; i < locator.length; i++) {
258: if (!nameParts[startIndex + i]
259: .equals(locator[i]))
260: match = false;
261: }
262: }
263: if (match)
264: locatorStartIndex = startIndex;
265: }
266: if (match) {
267: String[] locatorParts = ArrayUtil.copyOfRange(
268: nameParts, locatorStartIndex,
269: nameParts.length - locatorStartIndex);
270: extension = ArrayFormat.format(" ",
271: locatorParts);
272: nameParts = ArrayUtil.copyOfRange(nameParts, 0,
273: locatorStartIndex);
274: break;
275: }
276: }
277: }
278: if (nameParts.length >= 3) {
279: // check for simple locator
280: for (int startIndex = 1; startIndex < nameParts.length - 1; startIndex++) {
281: if (simpleLocatorWords
282: .contains(nameParts[startIndex])) {
283: String locationString = ArrayFormat.format(" ",
284: ArrayUtil.copyOfRange(nameParts,
285: startIndex, nameParts.length
286: - startIndex));
287: extension = append(locationString, extension);
288: nameParts = ArrayUtil.copyOfRange(nameParts, 0,
289: startIndex);
290: break;
291: }
292: }
293: }
294: }
295: // check for suffix
296: if (nameParts.length > 1
297: && suffixes.contains(nameParts[nameParts.length - 1])) {
298: extension = append(extension,
299: nameParts[nameParts.length - 1]);
300: nameParts = ArrayUtil.remove(nameParts,
301: nameParts.length - 1);
302: }
303:
304: // put together the parts
305: name = append(name, ArrayFormat.format(" ", nameParts));
306: if (nameParts.length != 1) {
307: suspectiveNames.add(name);
308: if (!quiet)
309: logger.info("Double name or possible parsing error: "
310: + name);
311: }
312: CityId cityId = new CityId(name, extension);
313: // check recomposition against original name
314: if (!cityId.toString().equals(cityName))
315: throw new RuntimeException("Error in city name parsing: "
316: + cityName + " -> " + cityId);
317: return cityId;
318: }
319:
320: private static String append(String name, String namePart) {
321: return append(name, namePart, " ");
322: }
323:
324: private static String append(String name, String namePart,
325: String separator) {
326: if (StringUtil.isEmpty(name)) {
327: if (StringUtil.isEmpty(namePart))
328: return "";
329: else
330: return namePart;
331: } else {
332: if (StringUtil.isEmpty(namePart))
333: return name;
334: else
335: return name + separator + namePart;
336: }
337: }
338:
339: public static class CityHelper extends City {
340:
341: private String zipCode;
342:
343: public CityHelper(State state, CityId cityId,
344: List<String> zipCodes, String phoneCode) {
345: super (state, cityId.getName(), cityId.getNameExtension(),
346: zipCodes, phoneCode);
347: }
348:
349: public String getZipCode() {
350: return zipCode;
351: }
352:
353: public void setZipCode(String zipCode) {
354: this.zipCode = zipCode;
355: }
356: }
357:
358: }
|