001: /*
002: * Copyright 2001-2005 The Apache Software Foundation
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.apache.commons.net.nntp;
018:
019: /**
020: * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
021: * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
022: * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
023: *
024: * @author rwinston <rwinston@checkfree.com>
025: *
026: */
027:
028: import java.util.HashMap;
029: import java.util.Iterator;
030:
031: public class Threader {
032: private ThreadContainer root;
033: private HashMap idTable;
034: private int bogusIdCount = 0;
035:
036: /**
037: * The main threader entry point - The client passes in an array of Threadable objects, and
038: * the Threader constructs a connected 'graph' of messages
039: * @param messages
040: * @return
041: */
042: public Threadable thread(Threadable[] messages) {
043: if (messages == null)
044: return null;
045:
046: idTable = new HashMap();
047:
048: // walk through each Threadable element
049: for (int i = 0; i < messages.length; ++i) {
050: if (!messages[i].isDummy())
051: buildContainer(messages[i]);
052: }
053:
054: root = findRootSet();
055: idTable.clear();
056: idTable = null;
057:
058: pruneEmptyContainers(root);
059:
060: root.reverseChildren();
061: gatherSubjects();
062:
063: if (root.next != null)
064: throw new RuntimeException("root node has a next:" + root);
065:
066: for (ThreadContainer r = root.child; r != null; r = r.next) {
067: if (r.threadable == null)
068: r.threadable = r.child.threadable.makeDummy();
069: }
070:
071: Threadable result = (root.child == null ? null
072: : root.child.threadable);
073: root.flush();
074: root = null;
075:
076: return result;
077: }
078:
079: /**
080: *
081: * @param threadable
082: */
083: private void buildContainer(Threadable threadable) {
084: String id = threadable.messageThreadId();
085: ThreadContainer container = (ThreadContainer) idTable.get(id);
086:
087: // A ThreadContainer exists for this id already. This should be a forward reference, but may
088: // be a duplicate id, in which case we will need to generate a bogus placeholder id
089: if (container != null) {
090: if (container.threadable != null) { // oops! duplicate ids...
091: id = "<Bogus-id:" + (bogusIdCount++) + ">";
092: container = null;
093: } else {
094: // The container just contained a forward reference to this message, so let's
095: // fill in the threadable field of the container with this message
096: container.threadable = threadable;
097: }
098: }
099:
100: // No container exists for that message Id. Create one and insert it into the hash table.
101: if (container == null) {
102: container = new ThreadContainer();
103: container.threadable = threadable;
104: idTable.put(id, container);
105: }
106:
107: // Iterate through all of the references and create ThreadContainers for any references that
108: // don't have them.
109: ThreadContainer parentRef = null;
110: {
111: String[] references = threadable.messageThreadReferences();
112: for (int i = 0; i < references.length; ++i) {
113: String refString = references[i];
114: ThreadContainer ref = (ThreadContainer) idTable
115: .get(refString);
116:
117: // if this id doesnt have a container, create one
118: if (ref == null) {
119: ref = new ThreadContainer();
120: idTable.put(refString, ref);
121: }
122:
123: // Link references together in the order they appear in the References: header,
124: // IF they dont have a have a parent already &&
125: // IF it will not cause a circular reference
126: if ((parentRef != null) && (ref.parent == null)
127: && (parentRef != ref)
128: && !(parentRef.findChild(ref))) {
129: // Link ref into the parent's child list
130: ref.parent = parentRef;
131: ref.next = parentRef.child;
132: parentRef.child = ref;
133: }
134: parentRef = ref;
135: }
136: }
137:
138: // parentRef is now set to the container of the last element in the references field. make that
139: // be the parent of this container, unless doing so causes a circular reference
140: if (parentRef != null
141: && (parentRef == container || container
142: .findChild(parentRef)))
143: parentRef = null;
144:
145: // if it has a parent already, its because we saw this message in a References: field, and presumed
146: // a parent based on the other entries in that field. Now that we have the actual message, we can
147: // throw away the old parent and use this new one
148: if (container.parent != null) {
149: ThreadContainer rest, prev;
150:
151: for (prev = null, rest = container.parent.child; rest != null; prev = rest, rest = rest.next) {
152: if (rest == container)
153: break;
154: }
155:
156: if (rest == null) {
157: throw new RuntimeException("Didnt find " + container
158: + " in parent" + container.parent);
159: }
160:
161: // Unlink this container from the parent's child list
162: if (prev == null)
163: container.parent.child = container.next;
164: else
165: prev.next = container.next;
166:
167: container.next = null;
168: container.parent = null;
169: }
170:
171: // If we have a parent, link container into the parents child list
172: if (parentRef != null) {
173: container.parent = parentRef;
174: container.next = parentRef.child;
175: parentRef.child = container;
176: }
177: }
178:
179: /**
180: * Find the root set of all existing ThreadContainers
181: * @return root the ThreadContainer representing the root node
182: */
183: private ThreadContainer findRootSet() {
184: ThreadContainer root = new ThreadContainer();
185: Iterator iter = idTable.keySet().iterator();
186:
187: while (iter.hasNext()) {
188: Object key = iter.next();
189: ThreadContainer c = (ThreadContainer) idTable.get(key);
190: if (c.parent == null) {
191: if (c.next != null)
192: throw new RuntimeException("c.next is "
193: + c.next.toString());
194: c.next = root.child;
195: root.child = c;
196: }
197: }
198: return root;
199: }
200:
201: /**
202: * Delete any empty or dummy ThreadContainers
203: * @param parent
204: */
205: private void pruneEmptyContainers(ThreadContainer parent) {
206: ThreadContainer container, prev, next;
207: for (prev = null, container = parent.child, next = container.next; container != null; prev = container, container = next, next = (container == null ? null
208: : container.next)) {
209:
210: // Is it empty and without any children? If so,delete it
211: if (container.threadable == null && container.child == null) {
212: if (prev == null)
213: parent.child = container.next;
214: else
215: prev.next = container.next;
216:
217: // Set container to prev so that prev keeps its same value the next time through the loop
218: container = prev;
219: }
220:
221: // Else if empty, with kids, and (not at root or only one kid)
222: else if (container.threadable == null
223: && container.child != null
224: && (container.parent != null || container.child.next == null)) {
225: // We have an invalid/expired message with kids. Promote the kids to this level.
226: ThreadContainer tail;
227: ThreadContainer kids = container.child;
228:
229: // Remove this container and replace with 'kids'.
230: if (prev == null)
231: parent.child = kids;
232: else
233: prev.next = kids;
234:
235: // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next
236: // i.e. splice kids into the list in place of container
237: for (tail = kids; tail.next != null; tail = tail.next)
238: tail.parent = container.parent;
239:
240: tail.parent = container.parent;
241: tail.next = container.next;
242:
243: // next currently points to the item after the inserted items in the chain - reset that so we process the newly
244: // promoted items next time round
245: next = kids;
246:
247: // Set container to prev so that prev keeps its same value the next time through the loop
248: container = prev;
249: } else if (container.child != null) {
250: // A real message , with kids
251: // Iterate over the children
252: pruneEmptyContainers(container);
253: }
254: }
255: }
256:
257: /**
258: * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers.
259: */
260: private void gatherSubjects() {
261:
262: int count = 0;
263:
264: for (ThreadContainer c = root.child; c != null; c = c.next)
265: count++;
266:
267: // TODO verify this will avoid rehashing
268: HashMap subjectTable = new HashMap((int) (count * 1.2),
269: (float) 0.9);
270: count = 0;
271:
272: for (ThreadContainer c = root.child; c != null; c = c.next) {
273: Threadable threadable = c.threadable;
274:
275: // No threadable? If so, it is a dummy node in the root set.
276: // Only root set members may be dummies, and they alway have at least 2 kids
277: // Take the first kid as representative of the subject
278: if (threadable == null)
279: threadable = c.child.threadable;
280:
281: String subj = threadable.simplifiedSubject();
282:
283: if (subj == null || subj == "")
284: continue;
285:
286: ThreadContainer old = (ThreadContainer) subjectTable
287: .get(subj);
288:
289: // Add this container to the table iff:
290: // - There exists no container with this subject
291: // - or this is a dummy container and the old one is not - the dummy one is
292: // more interesting as a root, so put it in the table instead
293: // - The container in the table has a "Re:" version of this subject, and
294: // this container has a non-"Re:" version of this subject. The non-"Re:" version
295: // is the more interesting of the two.
296: if (old == null
297: || (c.threadable == null && old.threadable != null)
298: || (old.threadable != null
299: && old.threadable.subjectIsReply()
300: && c.threadable != null && !c.threadable
301: .subjectIsReply())) {
302: subjectTable.put(subj, c);
303: count++;
304: }
305: }
306:
307: // If the table is empty, we're done
308: if (count == 0)
309: return;
310:
311: // subjectTable is now populated with one entry for each subject which occurs in the
312: // root set. Iterate over the root set, and gather together the difference.
313: ThreadContainer prev, c, rest;
314: for (prev = null, c = root.child, rest = c.next; c != null; prev = c, c = rest, rest = (rest == null ? null
315: : rest.next)) {
316: Threadable threadable = c.threadable;
317:
318: // is it a dummy node?
319: if (threadable == null)
320: threadable = c.child.threadable;
321:
322: String subj = threadable.simplifiedSubject();
323:
324: // Dont thread together all subjectless messages
325: if (subj == null || subj == "")
326: continue;
327:
328: ThreadContainer old = (ThreadContainer) subjectTable
329: .get(subj);
330:
331: if (old == c) // That's us
332: continue;
333:
334: // We have now found another container in the root set with the same subject
335: // Remove the "second" message from the root set
336: if (prev == null)
337: root.child = c.next;
338: else
339: prev.next = c.next;
340: c.next = null;
341:
342: if (old.threadable == null && c.threadable == null) {
343: // both dummies - merge them
344: ThreadContainer tail;
345: for (tail = old.child; tail != null
346: && tail.next != null; tail = tail.next)
347: ;
348:
349: tail.next = c.child;
350:
351: for (tail = c.child; tail != null; tail = tail.next)
352: tail.parent = old;
353:
354: c.child = null;
355: } else if (old.threadable == null
356: || (c.threadable != null
357: && c.threadable.subjectIsReply() && !old.threadable
358: .subjectIsReply())) {
359: // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old
360: c.parent = old;
361: c.next = old.child;
362: old.child = c;
363: } else {
364: // else make the old and new messages be children of a new dummy container.
365: // We create a new container object for old.msg and empty the old container
366: ThreadContainer newc = new ThreadContainer();
367: newc.threadable = old.threadable;
368: newc.child = old.child;
369:
370: for (ThreadContainer tail = newc.child; tail != null; tail = tail.next)
371: tail.parent = newc;
372:
373: old.threadable = null;
374: old.child = null;
375:
376: c.parent = old;
377: newc.parent = old;
378:
379: // Old is now a dummy- give it 2 kids , c and newc
380: old.child = c;
381: c.next = newc;
382: }
383: // We've done a merge, so keep the same prev
384: c = prev;
385: }
386:
387: subjectTable.clear();
388: subjectTable = null;
389:
390: }
391: }
392:
393: /**
394: * A placeholder utility class, used for constructing a tree of Threadables
395: * Originall implementation by Jamie Zawinski.
396: * See the Grendel source for more details <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java#511">here</a>
397: * Threadable objects
398: * @author Rory Winston <rwinston@checkfree.com>
399: */
400: class ThreadContainer {
401: Threadable threadable;
402: ThreadContainer parent;
403: ThreadContainer prev;
404: ThreadContainer next;
405: ThreadContainer child;
406:
407: /**
408: *
409: * @param container
410: * @return true if child is under self's tree. Detects circular references
411: */
412: boolean findChild(ThreadContainer target) {
413: if (child == null)
414: return false;
415:
416: else if (child == target)
417: return true;
418: else
419: return child.findChild(target);
420: }
421:
422: // Copy the ThreadContainer tree structure down into the underlying Threadable objects
423: // (Make the Threadable tree look like the ThreadContainer tree)
424: void flush() {
425: if (parent != null && threadable == null)
426: throw new RuntimeException("no threadable in "
427: + this .toString());
428:
429: parent = null;
430:
431: if (threadable != null)
432: threadable
433: .setChild(child == null ? null : child.threadable);
434:
435: if (child != null) {
436: child.flush();
437: child = null;
438: }
439:
440: if (threadable != null)
441: threadable.setNext(next == null ? null : next.threadable);
442:
443: if (next != null) {
444: next.flush();
445: next = null;
446: }
447:
448: threadable = null;
449: }
450:
451: /**
452: * Reverse the entire set of children
453: *
454: */
455: void reverseChildren() {
456: if (child != null) {
457: ThreadContainer kid, prev, rest;
458: for (prev = null, kid = child, rest = kid.next; kid != null; prev = kid, kid = rest, rest = (rest == null ? null
459: : rest.next))
460: kid.next = prev;
461:
462: child = prev;
463:
464: // Do it for the kids
465: for (kid = child; kid != null; kid = kid.next)
466: kid.reverseChildren();
467: }
468: }
469: }
|