001: // kelondroMSetTools.java
002: // -------------------------------------
003: // (C) by Michael Peter Christen; mc@anomic.de
004: // first published on http://www.anomic.de
005: // Frankfurt, Germany, 2004
006: // last major change: 28.12.2004
007: //
008: // This program is free software; you can redistribute it and/or modify
009: // it under the terms of the GNU General Public License as published by
010: // the Free Software Foundation; either version 2 of the License, or
011: // (at your option) any later version.
012: //
013: // This program is distributed in the hope that it will be useful,
014: // but WITHOUT ANY WARRANTY; without even the implied warranty of
015: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016: // GNU General Public License for more details.
017: //
018: // You should have received a copy of the GNU General Public License
019: // along with this program; if not, write to the Free Software
020: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
021: //
022: // Using this software in any meaning (reading, learning, copying, compiling,
023: // running) means that you agree that the Author(s) is (are) not responsible
024: // for cost, loss of data or any harm that may be caused directly or indirectly
025: // by usage of this softare or this documentation. The usage of this software
026: // is on your own risk. The installation and usage (starting/running) of this
027: // software may allow other people or application to access your computer and
028: // any attached devices and is highly dependent on the configuration of the
029: // software which must be done by the user of the software; the author(s) is
030: // (are) also not responsible for proper configuration and usage of the
031: // software, even if provoked by documentation provided together with
032: // the software.
033: //
034: // Any changes to this file according to the GPL as documented in the file
035: // gpl.txt aside this file in the shipment you received can be done to the
036: // lines that follows this copyright notice here, but changes must not be
037: // done inside the copyright notive above. A re-distribution must contain
038: // the intact and unchanged copyright notice.
039: // Contributions and changes to the program code must be marked as such.
040:
041: package de.anomic.kelondro;
042:
043: import java.io.BufferedReader;
044: import java.io.File;
045: import java.io.FileInputStream;
046: import java.io.IOException;
047: import java.io.InputStreamReader;
048: import java.util.ArrayList;
049: import java.util.Collection;
050: import java.util.Comparator;
051: import java.util.Iterator;
052: import java.util.Map;
053: import java.util.Set;
054: import java.util.TreeMap;
055: import java.util.TreeSet;
056:
057: public class kelondroMSetTools {
058:
059: //public static Comparator fastStringComparator = fastStringComparator(true);
060:
061: // ------------------------------------------------------------------------------------------------
062: // helper methods
063:
064: public static int log2a(int x) {
065: // this computes 1 + log2
066: // it is the number of bits in x, not the logarithmus by 2
067: int l = 0;
068: while (x > 0) {
069: x = x >>> 1;
070: l++;
071: }
072: return l;
073: }
074:
075: // ------------------------------------------------------------------------------------------------
076: // join
077: // We distinguish two principal solutions
078: // - constructive join (generate new data structure)
079: // - destructive join (remove non-valid elements from given data structure)
080: // The algorithm to perform the join can be also of two kind:
081: // - join by pairwise enumeration
082: // - join by iterative tests (where we distinguish left-right and right-left tests)
083:
084: public static <A, B> TreeMap<A, B> joinConstructive(
085: Collection<TreeMap<A, B>> maps, boolean concatStrings) {
086: // this joins all TreeMap(s) contained in maps
087:
088: // first order entities by their size
089: TreeMap<Long, TreeMap<A, B>> orderMap = new TreeMap<Long, TreeMap<A, B>>();
090: TreeMap<A, B> singleMap;
091: Iterator<TreeMap<A, B>> i = maps.iterator();
092: int count = 0;
093: while (i.hasNext()) {
094: // get next entity:
095: singleMap = i.next();
096:
097: // check result
098: if ((singleMap == null) || (singleMap.size() == 0))
099: return new TreeMap<A, B>();
100:
101: // store result in order of result size
102: orderMap.put(new Long(singleMap.size() * 1000 + count),
103: singleMap);
104: count++;
105: }
106:
107: // check if there is any result
108: if (orderMap.size() == 0)
109: return new TreeMap<A, B>();
110:
111: // we now must pairwise build up a conjunction of these maps
112: Long k = (Long) orderMap.firstKey(); // the smallest, which means, the one with the least entries
113: TreeMap<A, B> mapA, mapB, joinResult = (TreeMap<A, B>) orderMap
114: .remove(k);
115: while ((orderMap.size() > 0) && (joinResult.size() > 0)) {
116: // take the first element of map which is a result and combine it with result
117: k = (Long) orderMap.firstKey(); // the next smallest...
118: mapA = joinResult;
119: mapB = (TreeMap<A, B>) orderMap.remove(k);
120: joinResult = joinConstructiveByTest(mapA, mapB,
121: concatStrings); // TODO: better with enumeration?
122: // free resources
123: mapA = null;
124: mapB = null;
125: }
126:
127: // in 'searchResult' is now the combined search result
128: if (joinResult.size() == 0)
129: return new TreeMap<A, B>();
130: return joinResult;
131: }
132:
133: public static <A, B> TreeMap<A, B> joinConstructive(
134: TreeMap<A, B> map1, TreeMap<A, B> map2,
135: boolean concatStrings) {
136: // comparators must be equal
137: if ((map1 == null) || (map2 == null))
138: return null;
139: if (map1.comparator() != map2.comparator())
140: return null;
141: if ((map1.size() == 0) || (map2.size() == 0))
142: return new TreeMap<A, B>(map1.comparator());
143:
144: // decide which method to use
145: int high = ((map1.size() > map2.size()) ? map1.size() : map2
146: .size());
147: int low = ((map1.size() > map2.size()) ? map2.size() : map1
148: .size());
149: int stepsEnum = 10 * (high + low - 1);
150: int stepsTest = 12 * log2a(high) * low;
151:
152: // start most efficient method
153: if (stepsEnum > stepsTest) {
154: if (map1.size() > map2.size())
155: return joinConstructiveByTest(map2, map1, concatStrings);
156: return joinConstructiveByTest(map1, map2, concatStrings);
157: }
158: return joinConstructiveByEnumeration(map1, map2, concatStrings);
159: }
160:
161: @SuppressWarnings("unchecked")
162: private static <A, B> TreeMap<A, B> joinConstructiveByTest(
163: TreeMap<A, B> small, TreeMap<A, B> large,
164: boolean concatStrings) {
165: Iterator<Map.Entry<A, B>> mi = small.entrySet().iterator();
166: TreeMap<A, B> result = new TreeMap<A, B>(large.comparator());
167: Map.Entry<A, B> mentry1;
168: B mobj2;
169: while (mi.hasNext()) {
170: mentry1 = mi.next();
171: mobj2 = large.get(mentry1.getKey());
172: if (mobj2 != null) {
173: if (mentry1.getValue() instanceof String) {
174: result
175: .put(
176: mentry1.getKey(),
177: (B) ((concatStrings) ? (mentry1
178: .getValue() + (String) mobj2)
179: : mentry1.getValue()));
180: } else {
181: result.put(mentry1.getKey(), mentry1.getValue());
182: }
183: }
184: }
185: return result;
186: }
187:
188: @SuppressWarnings("unchecked")
189: private static <A, B> TreeMap<A, B> joinConstructiveByEnumeration(
190: TreeMap<A, B> map1, TreeMap<A, B> map2,
191: boolean concatStrings) {
192: // implement pairwise enumeration
193: Comparator<? super A> comp = map1.comparator();
194: Iterator<Map.Entry<A, B>> mi1 = map1.entrySet().iterator();
195: Iterator<Map.Entry<A, B>> mi2 = map2.entrySet().iterator();
196: TreeMap<A, B> result = new TreeMap<A, B>(map1.comparator());
197: int c;
198: if ((mi1.hasNext()) && (mi2.hasNext())) {
199: Map.Entry<A, B> mentry1 = mi1.next();
200: Map.Entry<A, B> mentry2 = mi2.next();
201: while (true) {
202: c = comp.compare(mentry1.getKey(), mentry2.getKey());
203: if (c < 0) {
204: if (mi1.hasNext())
205: mentry1 = mi1.next();
206: else
207: break;
208: } else if (c > 0) {
209: if (mi2.hasNext())
210: mentry2 = mi2.next();
211: else
212: break;
213: } else {
214: if (mentry1.getValue() instanceof String) {
215: result
216: .put(
217: mentry1.getKey(),
218: (B) ((concatStrings) ? ((String) mentry1
219: .getValue() + (String) mentry2
220: .getValue())
221: : (String) mentry1
222: .getValue()));
223: } else {
224: result
225: .put(mentry1.getKey(), mentry1
226: .getValue());
227: }
228: if (mi1.hasNext())
229: mentry1 = mi1.next();
230: else
231: break;
232: if (mi2.hasNext())
233: mentry2 = mi2.next();
234: else
235: break;
236: }
237: }
238: }
239: return result;
240: }
241:
242: // now the same for set-set
243: public static <A> TreeSet<A> joinConstructive(TreeSet<A> set1,
244: TreeSet<A> set2) {
245: // comparators must be equal
246: if ((set1 == null) || (set2 == null))
247: return null;
248: if (set1.comparator() != set2.comparator())
249: return null;
250: if ((set1.size() == 0) || (set2.size() == 0))
251: return new TreeSet<A>(set1.comparator());
252:
253: // decide which method to use
254: int high = ((set1.size() > set2.size()) ? set1.size() : set2
255: .size());
256: int low = ((set1.size() > set2.size()) ? set2.size() : set1
257: .size());
258: int stepsEnum = 10 * (high + low - 1);
259: int stepsTest = 12 * log2a(high) * low;
260:
261: // start most efficient method
262: if (stepsEnum > stepsTest) {
263: if (set1.size() < set2.size())
264: return joinConstructiveByTest(set1, set2);
265: return joinConstructiveByTest(set2, set1);
266: }
267: return joinConstructiveByEnumeration(set1, set2);
268: }
269:
270: private static <A> TreeSet<A> joinConstructiveByTest(
271: TreeSet<A> small, TreeSet<A> large) {
272: Iterator<A> mi = small.iterator();
273: TreeSet<A> result = new TreeSet<A>(small.comparator());
274: A o;
275: while (mi.hasNext()) {
276: o = mi.next();
277: if (large.contains(o))
278: result.add(o);
279: }
280: return result;
281: }
282:
283: private static <A> TreeSet<A> joinConstructiveByEnumeration(
284: TreeSet<A> set1, TreeSet<A> set2) {
285: // implement pairvise enumeration
286: Comparator<? super A> comp = set1.comparator();
287: Iterator<A> mi = set1.iterator();
288: Iterator<A> si = set2.iterator();
289: TreeSet<A> result = new TreeSet<A>(set1.comparator());
290: int c;
291: if ((mi.hasNext()) && (si.hasNext())) {
292: A mobj = mi.next();
293: A sobj = si.next();
294: while (true) {
295: c = comp.compare(mobj, sobj);
296: if (c < 0) {
297: if (mi.hasNext())
298: mobj = mi.next();
299: else
300: break;
301: } else if (c > 0) {
302: if (si.hasNext())
303: sobj = si.next();
304: else
305: break;
306: } else {
307: result.add(mobj);
308: if (mi.hasNext())
309: mobj = mi.next();
310: else
311: break;
312: if (si.hasNext())
313: sobj = si.next();
314: else
315: break;
316: }
317: }
318: }
319: return result;
320: }
321:
322: // now the same for set-set
323: public static <A> boolean anymatch(TreeSet<A> set1, TreeSet<A> set2) {
324: // comparators must be equal
325: if ((set1 == null) || (set2 == null))
326: return false;
327: if (set1.comparator() != set2.comparator())
328: return false;
329: if ((set1.size() == 0) || (set2.size() == 0))
330: return false;
331:
332: // decide which method to use
333: int high = ((set1.size() > set2.size()) ? set1.size() : set2
334: .size());
335: int low = ((set1.size() > set2.size()) ? set2.size() : set1
336: .size());
337: int stepsEnum = 10 * (high + low - 1);
338: int stepsTest = 12 * log2a(high) * low;
339:
340: // start most efficient method
341: if (stepsEnum > stepsTest) {
342: if (set1.size() < set2.size())
343: return anymatchByTest(set1, set2);
344: return anymatchByTest(set2, set1);
345: }
346: return anymatchByEnumeration(set1, set2);
347: }
348:
349: private static <A> boolean anymatchByTest(TreeSet<A> small,
350: TreeSet<A> large) {
351: Iterator<A> mi = small.iterator();
352: A o;
353: while (mi.hasNext()) {
354: o = mi.next();
355: if (large.contains(o))
356: return true;
357: }
358: return false;
359: }
360:
361: private static <A> boolean anymatchByEnumeration(TreeSet<A> set1,
362: TreeSet<A> set2) {
363: // implement pairvise enumeration
364: Comparator<? super A> comp = set1.comparator();
365: Iterator<A> mi = set1.iterator();
366: Iterator<A> si = set2.iterator();
367: int c;
368: if ((mi.hasNext()) && (si.hasNext())) {
369: A mobj = mi.next();
370: A sobj = si.next();
371: while (true) {
372: c = comp.compare(mobj, sobj);
373: if (c < 0) {
374: if (mi.hasNext())
375: mobj = mi.next();
376: else
377: break;
378: } else if (c > 0) {
379: if (si.hasNext())
380: sobj = si.next();
381: else
382: break;
383: } else {
384: return true;
385: }
386: }
387: }
388: return false;
389: }
390:
391: // ------------------------------------------------------------------------------------------------
392: // exclude
393:
394: public static <A, B> TreeMap<A, B> excludeConstructive(
395: TreeMap<A, B> map, TreeSet<A> set) {
396: // comparators must be equal
397: if (map == null)
398: return null;
399: if (set == null)
400: return map;
401: if ((map.size() == 0) || (set.size() == 0))
402: return map;
403: if (map.comparator() != set.comparator())
404: return excludeConstructiveByTestMapInSet(map, set);
405: return excludeConstructiveByTestMapInSet(map, set);
406: // return excludeConstructiveByEnumeration(map, set);
407: }
408:
409: private static <A, B> TreeMap<A, B> excludeConstructiveByTestMapInSet(
410: TreeMap<A, B> map, TreeSet<A> set) {
411: Iterator<A> mi = map.keySet().iterator();
412: TreeMap<A, B> result = new TreeMap<A, B>(map.comparator());
413: A o;
414: while (mi.hasNext()) {
415: o = mi.next();
416: if (!(set.contains(o)))
417: result.put(o, map.get(o));
418: }
419: return result;
420: }
421:
422: public static <A, B> void excludeDestructive(TreeMap<A, B> map,
423: TreeSet<A> set) {
424: // comparators must be equal
425: if (map == null)
426: return;
427: if (set == null)
428: return;
429: if (map.comparator() != set.comparator())
430: return;
431: if ((map.size() == 0) || (set.size() == 0))
432: return;
433:
434: if (map.size() < set.size())
435: excludeDestructiveByTestMapInSet(map, set);
436: else
437: excludeDestructiveByTestSetInMap(map, set);
438: }
439:
440: private static <A, B> void excludeDestructiveByTestMapInSet(
441: TreeMap<A, B> map, TreeSet<A> set) {
442: Iterator<A> mi = map.keySet().iterator();
443: while (mi.hasNext())
444: if (set.contains(mi.next()))
445: mi.remove();
446: }
447:
448: private static <A, B> void excludeDestructiveByTestSetInMap(
449: TreeMap<A, B> map, TreeSet<A> set) {
450: Iterator<A> si = set.iterator();
451: while (si.hasNext())
452: map.remove(si.next());
453: }
454:
455: // and the same again with set-set
456: public static <A> void excludeDestructive(TreeSet<A> set1,
457: TreeSet<A> set2) {
458: // comparators must be equal
459: if (set1 == null)
460: return;
461: if (set2 == null)
462: return;
463: if (set1.comparator() != set2.comparator())
464: return;
465: if ((set1.size() == 0) || (set2.size() == 0))
466: return;
467:
468: if (set1.size() < set2.size())
469: excludeDestructiveByTestSmallInLarge(set1, set2);
470: else
471: excludeDestructiveByTestLargeInSmall(set1, set2);
472: }
473:
474: private static <A> void excludeDestructiveByTestSmallInLarge(
475: TreeSet<A> small, TreeSet<A> large) {
476: Iterator<A> mi = small.iterator();
477: while (mi.hasNext())
478: if (large.contains(mi.next()))
479: mi.remove();
480: }
481:
482: private static <A> void excludeDestructiveByTestLargeInSmall(
483: TreeSet<A> large, TreeSet<A> small) {
484: Iterator<A> si = small.iterator();
485: while (si.hasNext())
486: large.remove(si.next());
487: }
488:
489: // ------------------------------------------------------------------------------------------------
490:
491: public static TreeMap<String, String> loadMap(String filename,
492: String sep) {
493: TreeMap<String, String> map = new TreeMap<String, String>();
494: BufferedReader br = null;
495: try {
496: br = new BufferedReader(new InputStreamReader(
497: new FileInputStream(filename)));
498: String line;
499: int pos;
500: while ((line = br.readLine()) != null) {
501: line = line.trim();
502: if ((line.length() > 0) && (!(line.startsWith("#")))
503: && ((pos = line.indexOf(sep)) > 0))
504: map
505: .put(line.substring(0, pos).trim()
506: .toLowerCase(), line.substring(
507: pos + sep.length()).trim());
508: }
509: } catch (IOException e) {
510: } finally {
511: if (br != null)
512: try {
513: br.close();
514: } catch (Exception e) {
515: }
516: }
517: return map;
518: }
519:
520: public static TreeMap<String, ArrayList<String>> loadMapMultiValsPerKey(
521: String filename, String sep) {
522: TreeMap<String, ArrayList<String>> map = new TreeMap<String, ArrayList<String>>();
523: BufferedReader br = null;
524: try {
525: br = new BufferedReader(new InputStreamReader(
526: new FileInputStream(filename)));
527: String line, key, value;
528: int pos;
529: while ((line = br.readLine()) != null) {
530: line = line.trim();
531: if ((line.length() > 0) && (!(line.startsWith("#")))
532: && ((pos = line.indexOf(sep)) > 0)) {
533: key = line.substring(0, pos).trim().toLowerCase();
534: value = line.substring(pos + sep.length()).trim();
535: if (!map.containsKey(key))
536: map.put(key, new ArrayList<String>());
537: map.get(key).add(value);
538: }
539: }
540: } catch (IOException e) {
541: } finally {
542: if (br != null)
543: try {
544: br.close();
545: } catch (Exception e) {
546: }
547: }
548: return map;
549: }
550:
551: public static TreeSet<String> loadList(File file,
552: Comparator<String> c) {
553: TreeSet<String> list = new TreeSet<String>(c);
554: if (!(file.exists()))
555: return list;
556:
557: BufferedReader br = null;
558: try {
559: br = new BufferedReader(new InputStreamReader(
560: new FileInputStream(file)));
561: String line;
562: while ((line = br.readLine()) != null) {
563: line = line.trim();
564: if ((line.length() > 0) && (!(line.startsWith("#"))))
565: list.add(line.trim().toLowerCase());
566: }
567: br.close();
568: } catch (IOException e) {
569: } finally {
570: if (br != null)
571: try {
572: br.close();
573: } catch (Exception e) {
574: }
575: }
576: return list;
577: }
578:
579: public static String setToString(Set<String> set, char separator) {
580: Iterator<String> i = set.iterator();
581: StringBuffer sb = new StringBuffer(set.size() * 7);
582: if (i.hasNext())
583: sb.append(i.next());
584: while (i.hasNext()) {
585: sb.append(separator).append(i.next());
586: }
587: return new String(sb);
588: }
589:
590: // ------------------------------------------------------------------------------------------------
591:
592: public static void main(String[] args) {
593: TreeMap<String, String> m = new TreeMap<String, String>();
594: TreeMap<String, String> s = new TreeMap<String, String>();
595: m.put("a", "a");
596: m.put("x", "x");
597: m.put("f", "f");
598: m.put("h", "h");
599: m.put("w", "w");
600: m.put("7", "7");
601: m.put("t", "t");
602: m.put("k", "k");
603: m.put("y", "y");
604: m.put("z", "z");
605: s.put("a", "a");
606: s.put("b", "b");
607: s.put("c", "c");
608: s.put("k", "k");
609: s.put("l", "l");
610: s.put("m", "m");
611: s.put("n", "n");
612: s.put("o", "o");
613: s.put("p", "p");
614: s.put("q", "q");
615: s.put("r", "r");
616: s.put("s", "s");
617: s.put("t", "t");
618: s.put("x", "x");
619: System.out.println("Compare " + m.toString() + " with "
620: + s.toString());
621: System.out.println("Join="
622: + joinConstructiveByEnumeration(m, s, true));
623: System.out
624: .println("Join=" + joinConstructiveByTest(m, s, true));
625: System.out
626: .println("Join=" + joinConstructiveByTest(m, s, true));
627: System.out.println("Join=" + joinConstructive(m, s, true));
628: //System.out.println("Exclude=" + excludeConstructiveByTestMapInSet(m, s.keySet()));
629:
630: /*
631: for (int low = 0; low < 10; low++)
632: for (int high = 0; high < 100; high=high + 10) {
633: int stepsEnum = 10 * high;
634: int stepsTest = 12 * log2(high) * low;
635: System.out.println("low=" + low + ", high=" + high + ", stepsEnum=" + stepsEnum + ", stepsTest=" + stepsTest + "; best method is " + ((stepsEnum < stepsTest) ? "joinByEnumeration" : "joinByTest"));
636: }
637: */
638:
639: }
640:
641: }
|