001: /*
002: * Copyright 2001 Sun Microsystems, Inc. All rights reserved.
003: * PROPRIETARY/CONFIDENTIAL. Use of this product is subject to license terms.
004: */
005:
006: package com.sun.portal.discussions.providers;
007:
008: import java.util.*;
009: import java.lang.*;
010: import java.io.*;
011:
012: import com.sun.portal.search.soif.*;
013: import com.sun.portal.search.demo.*;
014:
015: /**
016: * Class which mainly deals with clustering of comments and storing
017: * and retrieving of comment properties
018: * Input is one or more discussion streams which is the SOIFInputStream
019: * Discussion stream is normally be sorted by a particular field handled
020: * by the search request. The Comments class maintains the sorting hence
021: * each subtree is sorted. Currently Comments class handles a single cluster
022: * given an input comment url. The comment url may or may not be the main item
023: * or the discussion id. Methods based on properties of a comment stream.
024: * <p>
025: * Main functions available are to cluster the nodes and
026: * create a tree structure by fixing the depth for display purpose
027: * </p>
028: */
029: public class Comments {
030: private SOIFInputStream commentstream;
031: private ArrayList unclusteredComments = new ArrayList();
032: private ArrayList comments = new ArrayList();
033:
034: /** Works only for single cluster */
035: private String rooturl;
036: private String discussionID;
037: private SOIF rootSOIF;
038: private boolean sortByDate = true;
039:
040: /** REMOVE - server url is attached to comment url for absolute url */
041: private String server;
042:
043: /**
044: * public constructor
045: */
046: public Comments() {
047: }
048:
049: /**
050: * get the raw stream
051: * Good for debugging or if extra manipulation is needed
052: */
053: public SOIFInputStream getCommentstream() {
054: return commentstream;
055: }
056:
057: /**
058: * For multiple clusters
059: * Find all the main items in the SOIF stream and store in hashmap
060: * cluster the comments for each main item and store in list
061: * like in a single cluster
062: */
063: public void clusterComments(SOIFInputStream s) {
064: // multiple clusters
065: // Iterate through the list and find all the main items
066: // Store main items/discussion ids in a hashmap
067: // Cluster each discussion
068: }
069:
070: /**
071: * Input is a SOIF stream which may be sorted by anything
072: * url is the the discussion url which may not be the main item
073: * Can be used to get the discussion subtree from search results
074: * e.g url = xyz
075: * SOIF can have reference_id= ROOT xyz(main item or discussionID is xyz)
076: * OR SOIF can have reference_id=abc pqr xyz(main item has url=abc) subtree
077: *
078: * cluster the comments by their reference-id
079: * may be sorted by last-modified date most of the time
080: */
081: public void clusterComments(SOIFInputStream s, String rooturl)
082: throws Exception {
083:
084: this .commentstream = s;
085: this .rooturl = rooturl;
086: /* do something to comments */
087:
088: // First transfer contents to a list
089: ArrayList v = new ArrayList();
090: SOIF soif;
091: try {
092: for (soif = commentstream.readSOIF(); soif != null; soif = commentstream
093: .readSOIF())
094: v.add(soif);
095: } catch (IOException ie) {
096: }
097:
098: // mainly for debugging
099: this .unclusteredComments = v;
100: // System.out.println("** clusterComments");
101: if ((v != null) && (!v.isEmpty())) {
102: // assume 0 is the root as we have sorted the list by date
103: // root node can be anywhere if sorting is by author or rating
104: int indx = findRoot(v, rooturl);
105: if (indx >= 0) {
106: this .rootSOIF = (SOIF) v.get(indx);
107: //comments.add(rootSOIF);
108: // System.out.println("** clusterComments: ROOT at "+indx+" Added "+rootSOIF.getURL());
109: v.remove(indx);
110: } else {
111: throw new Exception("Cannot find ROOT");
112: }
113:
114: // input is modified list which is minus the root node
115: cluster(v, rooturl);
116: }
117: }
118:
119: /**
120: * Given a list of SOIFs, returns a list minus the root node.
121: * root node is stored in RootSOIF
122: */
123: public int findRoot(List l, String url) {
124: // Given list l, find the root node
125: // reference id of the root Node will be 'ROOT rooturl' i.e just ends with 'rooturl'
126: // if comment url is rooturl then it is the root node too
127: // System.out.println("** FindRoot");
128: for (int i = 0; i < l.size(); i++) {
129: SOIF comment = (SOIF) l.get(i);
130: if (comment.getURL().equals(url)) {
131: return i;
132: }
133: }
134: return -1;
135: }
136:
137: /**
138: * recursive method to generate the sorted comment tree
139: * optimized for a list which is sorted by date
140: * For an unsorted list, the entire list has to be passed instead
141: * of a sublist
142: */
143: private void cluster(List v, String parenturl) {
144: // System.out.println("** cluster: finding children of "+parenturl+" size= "+v.size());
145: for (int i = 0; i < v.size(); i++) {
146: SOIF comment = (SOIF) v.get(i);
147: if (getParentID(comment).equals(parenturl)) {
148: // add child to saved list
149: comments.add(v.get(i));
150: String newurl = comment.getURL();
151: // System.out.println("** cluster: Added: "+ newurl);
152: List newlist;
153: if (sortByDate) {
154: // extract portion of the list after the matching node
155: // optimization for list which is sorted by date
156: // helps during searching, will find children fast
157: // Also helps for leaf nodes, empty sublist mostly
158: if (i + 1 < v.size()) {
159: newlist = v.subList(i + 1, v.size());
160: } else {
161: return;
162: }
163: } else {
164: // extract entire list minus the matching node
165: v.remove(i);
166: newlist = v;
167: }
168: cluster(newlist, newurl);
169: }
170: }
171: }
172:
173: public List getComments(String url) {
174: // index into a hashmap and return the specific list
175: return comments;
176: }
177:
178: public List getComments() {
179: return comments;
180: }
181:
182: /**
183: * Extracts the discussionID from rootSOIF
184: * Not really valid for multiple clusters
185: */
186: public String getDiscussionID() throws Exception {
187: // Not valid exception - REMOVE
188: // for multiple clusters there will be multiple discussion ids
189:
190: if (rootSOIF != null) {
191: String ref_id = rootSOIF.getValue("rd-reference-id");
192: if (ref_id != null) {
193: StringTokenizer st = new StringTokenizer(ref_id, " ");
194: if (st.countTokens() >= 2) {
195: String tok = st.nextToken();
196: if (tok.equals("ROOT")) {
197: this .discussionID = st.nextToken();
198: } else {
199: // subtree - xyz abc pqr where discussionID is first token
200: this .discussionID = tok;
201: }
202: }
203: }
204: }
205:
206: if (discussionID == null)
207: throw new Exception("Cannot get discussionID");
208: return discussionID;
209: }
210:
211: /**
212: * Get the Root of the comment tree
213: * may or may not be the main item
214: * rootSOIF value is set during clustering.
215: * @returns the SOIF object with the discussion url
216: * @throws exception if rootSOIF is null
217: */
218: public SOIF getRootSOIF() throws Exception {
219: // Not valid exception - REMOVE
220: // for multiple cluster there will be multiple root nodes
221: if (rootSOIF == null)
222: throw new Exception("No Comment Root found");
223: return rootSOIF;
224: }
225:
226: /**
227: * Find the depth of root node. Convenience method
228: * REMOVE if unnecessary
229: */
230: public int findRootSOIFDepth() {
231: if (rootSOIF != null) {
232: return findDepth(rootSOIF);
233: }
234: return -1;
235: }
236:
237: /**
238: * given a soif find the depth based on
239: * reference-id.
240: * convenience method
241: */
242: public int findDepth(SOIF comment) {
243: // calculate by reference id
244: String ref_id = comment.getValue("rd-reference-id");
245: if (ref_id != null) {
246: if (ref_id.startsWith("ROOT "))
247: return 1;
248: StringTokenizer st = new StringTokenizer(ref_id, " ");
249: int d = st.countTokens();
250: return d;
251: } else {
252: return 0;
253: }
254: }
255:
256: /**
257: * @returns parent url by parsing the reference id
258: * where does it get the search server url from ?
259: */
260: public String getParentID(SOIF comment) {
261: String parentId = "";
262: int i = -1, j = -1;
263:
264: String url = comment.getURL();
265: String reference_id = comment.getValue("rd-reference-id");
266: if (reference_id == null)
267: return parentId;
268:
269: //System.out.println("reference-id= "+reference_id+"\n");
270: if ((i = reference_id.indexOf(url)) >= 0) {
271: // previous token is parent. reference id has space seperated tokens
272: // value = x y z child p q r - parent is z
273: // value = y child - parent is y
274: // value = ROOT child - parent is ROOT
275: String t = reference_id;
276: if (i - 1 > 0)
277: t = reference_id.substring(0, i - 1);
278:
279: if (((j = t.lastIndexOf(" ")) >= 0) && (j + 1 < t.length())) {
280: parentId = t.substring(j + 1);
281: } else {
282: parentId = t;
283: }
284: }
285: return parentId;
286: }
287:
288: /*
289: * return true if discussion has comments
290: */
291: public boolean hasComments() {
292: // if comments has more than 1 soif then return true
293: if (comments.size() > 1)
294: return true;
295: else
296: return false;
297: }
298:
299: /**
300: * Default value of sortByDate is true
301: * If the input soif stream is sorted by field other than date
302: * then this method should be called.
303: * clustering uses an optimization if the stream is sorted by date
304: */
305: public void resetSortByDate() {
306: this .sortByDate = false;
307: }
308:
309: /**
310: * temporary method to create comment url
311: * REMOVE later
312: */
313: public void setSearchServer(String servername) {
314: this .server = servername;
315: }
316:
317: /*
318: * add server name to the url
319: */
320: public String getCommentURL(SOIF comment) {
321: return server + "/" + comment.getURL();
322: }
323:
324: /**
325: * Method mainly for debugging
326: * @returns the input soif stream in list format
327: */
328: public List getUnclusteredComments() {
329: return unclusteredComments;
330: }
331: }
|