001: /*
002: JSPWiki - a JSP-based WikiWiki clone.
003:
004: Copyright (C) 2001-2004 Janne Jalkanen (Janne.Jalkanen@iki.fi),
005: Erik Bunn (ebu@memecry.net)
006:
007: This program is free software; you can redistribute it and/or modify
008: it under the terms of the GNU Lesser General Public License as published by
009: the Free Software Foundation; either version 2.1 of the License, or
010: (at your option) any later version.
011:
012: This program is distributed in the hope that it will be useful,
013: but WITHOUT ANY WARRANTY; without even the implied warranty of
014: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
015: GNU Lesser General Public License for more details.
016:
017: You should have received a copy of the GNU Lesser General Public License
018: along with this program; if not, write to the Free Software
019: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
020: */
021: package com.ecyrd.jspwiki;
022:
023: import java.util.*;
024: import java.io.*;
025: import org.apache.log4j.*;
026:
027: import com.ecyrd.jspwiki.filters.BasicPageFilter;
028: import com.ecyrd.jspwiki.attachment.*;
029: import com.ecyrd.jspwiki.providers.*;
030:
031: /*
032: BUGS
033:
034: - if a wikilink is added to a page, then removed, RefMan still thinks that
035: the page refers to the wikilink page. Hm.
036:
037: - if a page is deleted, gets very confused.
038:
039: - Serialization causes page attributes to be missing, when InitializablePlugins
040: are not executed properly. Thus, serialization should really also mark whether
041: a page is serializable or not...
042: */
043:
044: /*
045: A word about synchronizing:
046:
047: I expect this object to be accessed in three situations:
048: - when a WikiEngine is created and it scans its wikipages
049: - when the WE saves a page
050: - when a JSP page accesses one of the WE's ReferenceManagers
051: to display a list of (un)referenced pages.
052:
053: So, access to this class is fairly rare, and usually triggered by
054: user interaction. OTOH, the methods in this class use their storage
055: objects intensively (and, sorry to say, in an unoptimized manner =).
056: My deduction: using unsynchronized HashMaps etc and syncing methods
057: or code blocks is preferrable to using slow, synced storage objects.
058: We don't have iterative code here, so I'm going to use synced methods
059: for now.
060:
061: Please contact me if you notice problems with ReferenceManager, and
062: especially with synchronization, or if you have suggestions about
063: syncing.
064:
065: ebu@memecry.net
066: */
067:
068: /**
069: * Keeps track of wikipage references:
070: * <UL>
071: * <LI>What pages a given page refers to
072: * <LI>What pages refer to a given page
073: * </UL>
074: *
075: * This is a quick'n'dirty approach without any finesse in storage and
076: * searching algorithms; we trust java.util.*.
077: * <P>
078: * This class contains two HashMaps, m_refersTo and m_referredBy. The
079: * first is indexed by WikiPage names and contains a Collection of all
080: * WikiPages the page refers to. (Multiple references are not counted,
081: * naturally.) The second is indexed by WikiPage names and contains
082: * a Set of all pages that refer to the indexing page. (Notice -
083: * the keys of both Maps should be kept in sync.)
084: * <P>
085: * When a page is added or edited, its references are parsed, a Collection
086: * is received, and we crudely replace anything previous with this new
087: * Collection. We then check each referenced page name and make sure they
088: * know they are referred to by the new page.
089: * <P>
090: * Based on this information, we can perform non-optimal searches for
091: * e.g. unreferenced pages, top ten lists, etc.
092: * <P>
093: * The owning class must take responsibility of filling in any pre-existing
094: * information, probably by loading each and every WikiPage and calling this
095: * class to update the references when created.
096: *
097: * @author ebu@memecry.net
098: * @since 1.6.1
099: */
100:
101: public class ReferenceManager extends BasicPageFilter {
102: /** Maps page wikiname to a Collection of pages it refers to. The Collection
103: * must contain Strings. The Collection may contain names of non-existing
104: * pages.
105: */
106: private Map m_refersTo;
107: /** Maps page wikiname to a Set of referring pages. The Set must
108: * contain Strings. Non-existing pages (a reference exists, but not a file
109: * for the page contents) may have an empty Set in m_referredBy.
110: */
111: private Map m_referredBy;
112: /** The WikiEngine that owns this object. */
113: private WikiEngine m_engine;
114:
115: private boolean m_matchEnglishPlurals = false;
116:
117: private static Logger log = Logger
118: .getLogger(ReferenceManager.class);
119:
120: private static final String SERIALIZATION_FILE = "refmgr.ser";
121:
122: /**
123: * Builds a new ReferenceManager.
124: *
125: * @param engine The WikiEngine to which this is meeting.
126: */
127: public ReferenceManager(WikiEngine engine) {
128: m_refersTo = new HashMap();
129: m_referredBy = new HashMap();
130: m_engine = engine;
131:
132: m_matchEnglishPlurals = TextUtil.getBooleanProperty(engine
133: .getWikiProperties(), WikiEngine.PROP_MATCHPLURALS,
134: m_matchEnglishPlurals);
135:
136: }
137:
138: /**
139: * Does a full reference update.
140: */
141: private void updatePageReferences(WikiPage page)
142: throws ProviderException {
143: String content = m_engine.getPageManager().getPageText(
144: page.getName(), WikiPageProvider.LATEST_VERSION);
145: Collection links = m_engine.scanWikiLinks(page, content);
146: Collection attachments = m_engine.getAttachmentManager()
147: .listAttachments(page);
148:
149: for (Iterator atti = attachments.iterator(); atti.hasNext();) {
150: links.add(((Attachment) (atti.next())).getName());
151: }
152:
153: updateReferences(page.getName(), links);
154: }
155:
156: /**
157: * Initializes the entire reference manager with the initial set of pages
158: * from the collection.
159: *
160: * @param pages A collection of all pages you want to be included in the reference
161: * count.
162: * @since 2.2
163: */
164: public void initialize(Collection pages) throws ProviderException {
165: log.debug("Initializing new ReferenceManager with "
166: + pages.size() + " initial pages.");
167: long start = System.currentTimeMillis();
168: log.info("Starting cross reference scan of WikiPages");
169:
170: //
171: // First, try to serialize old data from disk. If that fails,
172: // we'll go and update the entire reference lists (which'll take
173: // time)
174: //
175: try {
176: long saved = unserializeFromDisk();
177:
178: //
179: // Now we must check if any of the pages have been changed
180: // while we were in the electronic la-la-land, and update
181: // the references for them.
182: //
183:
184: Iterator it = pages.iterator();
185:
186: while (it.hasNext()) {
187: WikiPage page = (WikiPage) it.next();
188:
189: if (page instanceof Attachment) {
190: // Skip attachments
191: } else {
192: // Refresh with the latest copy
193: page = m_engine.getPage(page.getName());
194: if (page.getLastModified() == null) {
195: log
196: .fatal("Provider returns null lastModified. Please submit a bug report.");
197: } else if (page.getLastModified().getTime() > saved) {
198: updatePageReferences(page);
199: }
200: }
201: }
202:
203: } catch (Exception e) {
204: log
205: .info("Unable to unserialize old refmgr information, rebuilding database: "
206: + e.getMessage());
207: buildKeyLists(pages);
208:
209: // Scan the existing pages from disk and update references in the manager.
210: Iterator it = pages.iterator();
211: while (it.hasNext()) {
212: WikiPage page = (WikiPage) it.next();
213:
214: if (page instanceof Attachment) {
215: // We cannot build a reference list from the contents
216: // of attachments, so we skip them.
217: } else {
218: updatePageReferences(page);
219: }
220: }
221:
222: serializeToDisk();
223: }
224:
225: log.info("Cross reference scan done ("
226: + (System.currentTimeMillis() - start) + " ms)");
227:
228: }
229:
230: /**
231: * Reads the serialized data from the disk back to memory.
232: * Returns the date when the data was last written on disk
233: */
234: private synchronized long unserializeFromDisk() throws IOException,
235: ClassNotFoundException {
236: ObjectInputStream in = null;
237: long saved = 0L;
238:
239: try {
240: long start = System.currentTimeMillis();
241:
242: File f = new File(m_engine.getWorkDir(), SERIALIZATION_FILE);
243:
244: in = new ObjectInputStream(new BufferedInputStream(
245: new FileInputStream(f)));
246:
247: saved = in.readLong();
248: m_refersTo = (Map) in.readObject();
249: m_referredBy = (Map) in.readObject();
250:
251: in.close();
252:
253: long finish = System.currentTimeMillis();
254: log.debug("Read serialized data successfully in "
255: + (finish - start) + "ms");
256: } finally {
257: try {
258: if (in != null)
259: in.close();
260: } catch (IOException ex) {
261: }
262: }
263:
264: return saved;
265: }
266:
267: /**
268: * Serializes hashmaps to disk. The format is private, don't touch it.
269: */
270: private synchronized void serializeToDisk() {
271: ObjectOutputStream out = null;
272:
273: try {
274: long start = System.currentTimeMillis();
275:
276: File f = new File(m_engine.getWorkDir(), SERIALIZATION_FILE);
277:
278: out = new ObjectOutputStream(new BufferedOutputStream(
279: new FileOutputStream(f)));
280:
281: out.writeLong(System.currentTimeMillis()); // Timestamp
282: out.writeObject(m_refersTo);
283: out.writeObject(m_referredBy);
284:
285: out.close();
286:
287: long finish = System.currentTimeMillis();
288:
289: log.debug("serialization done - took " + (finish - start)
290: + "ms");
291: } catch (IOException e) {
292: log.error("Unable to serialize!");
293:
294: try {
295: if (out != null)
296: out.close();
297: } catch (IOException ex) {
298: }
299: }
300: }
301:
302: /**
303: * After the page has been saved, updates the reference lists.
304: */
305: public void postSave(WikiContext context, String content) {
306: WikiPage page = context.getPage();
307:
308: updateReferences(page.getName(), context.getEngine()
309: .scanWikiLinks(page, content));
310:
311: serializeToDisk();
312: }
313:
314: public synchronized void pageRemoved(WikiPage page) {
315: String pageName = page.getName();
316:
317: m_refersTo.remove(pageName);
318: clearPageEntries(pageName);
319: }
320:
321: /**
322: * Updates the referred pages of a new or edited WikiPage. If a refersTo
323: * entry for this page already exists, it is removed and a new one is built
324: * from scratch. Also calls updateReferredBy() for each referenced page.
325: * <P>
326: * This is the method to call when a new page has been created and we
327: * want to a) set up its references and b) notify the referred pages
328: * of the references. Use this method during run-time.
329: *
330: * @param page Name of the page to update.
331: * @param references A Collection of Strings, each one pointing to a page this page references.
332: */
333: public synchronized void updateReferences(String page,
334: Collection references) {
335: //
336: // Create a new entry in m_refersTo.
337: //
338: Collection oldRefTo = (Collection) m_refersTo.get(page);
339: m_refersTo.remove(page);
340: m_refersTo.put(page, references);
341:
342: //
343: // We know the page exists, since it's making references somewhere.
344: // If an entry for it didn't exist previously in m_referredBy, make
345: // sure one is added now.
346: //
347: if (!m_referredBy.containsKey(page)) {
348: m_referredBy.put(page, new TreeSet());
349: }
350:
351: //
352: // Get all pages that used to be referred to by 'page' and
353: // remove that reference. (We don't want to try to figure out
354: // which particular references were removed...)
355: //
356: cleanReferredBy(page, oldRefTo, references);
357:
358: //
359: // Notify all referred pages of their referinesshoodicity.
360: //
361: Iterator it = references.iterator();
362: while (it.hasNext()) {
363: String referredPageName = (String) it.next();
364: updateReferredBy(referredPageName, page);
365: }
366: }
367:
368: /**
369: * Returns the refers-to list. For debugging.
370: */
371: protected Map getRefersTo() {
372: return (m_refersTo);
373: }
374:
375: /**
376: * Returns the referred-by list. For debugging.
377: */
378: protected Map getReferredBy() {
379: return (m_referredBy);
380: }
381:
382: /**
383: * Cleans the 'referred by' list, removing references by 'referrer' to
384: * any other page. Called after 'referrer' is removed.
385: */
386: private void cleanReferredBy(String referrer,
387: Collection oldReferred, Collection newReferred) {
388: // Two ways to go about this. One is to look up all pages previously
389: // referred by referrer and remove referrer from their lists, and let
390: // the update put them back in (except possibly removed ones).
391: // The other is to get the old referred to list, compare to the new,
392: // and tell the ones missing in the latter to remove referrer from
393: // their list. Hm. We'll just try the first for now. Need to come
394: // back and optimize this a bit.
395:
396: if (oldReferred == null)
397: return;
398:
399: Iterator it = oldReferred.iterator();
400: while (it.hasNext()) {
401: String referredPage = (String) it.next();
402: Set oldRefBy = (Set) m_referredBy.get(referredPage);
403: if (oldRefBy != null) {
404: oldRefBy.remove(referrer);
405: }
406:
407: // If the page is referred to by no one AND it doesn't even
408: // exist, we might just as well forget about this entry.
409: // It will be added again elsewhere if new references appear.
410: if (((oldRefBy == null) || (oldRefBy.isEmpty()))
411: && (m_engine.pageExists(referredPage) == false)) {
412: m_referredBy.remove(referredPage);
413: }
414: }
415:
416: }
417:
418: /**
419: * When initially building a ReferenceManager from scratch, call this method
420: * BEFORE calling updateReferences() with a full list of existing page names.
421: * It builds the refersTo and referredBy key lists, thus enabling
422: * updateReferences() to function correctly.
423: * <P>
424: * This method should NEVER be called after initialization. It clears all mappings
425: * from the reference tables.
426: *
427: * @param pages a Collection containing WikiPage objects.
428: */
429: private synchronized void buildKeyLists(Collection pages) {
430: m_refersTo.clear();
431: m_referredBy.clear();
432:
433: if (pages == null)
434: return;
435:
436: Iterator it = pages.iterator();
437: try {
438: while (it.hasNext()) {
439: WikiPage page = (WikiPage) it.next();
440: // We add a non-null entry to referredBy to indicate the referred page exists
441: m_referredBy.put(page.getName(), new TreeSet());
442: // Just add a key to refersTo; the keys need to be in sync with referredBy.
443: m_refersTo.put(page.getName(), null);
444: }
445: } catch (ClassCastException e) {
446: log
447: .fatal(
448: "Invalid collection entry in ReferenceManager.buildKeyLists().",
449: e);
450: }
451: }
452:
453: /**
454: * Marks the page as referred to by the referrer. If the page does not
455: * exist previously, nothing is done. (This means that some page, somewhere,
456: * has a link to a page that does not exist.)
457: * <P>
458: * This method is NOT synchronized. It should only be referred to from
459: * within a synchronized method, or it should be made synced if necessary.
460: */
461: private void updateReferredBy(String page, String referrer) {
462: // We're not really interested in first level self-references.
463: if (page.equals(referrer)) {
464: return;
465: }
466:
467: // Neither are we interested if plural forms refer to each other.
468: if (m_matchEnglishPlurals) {
469: String p2 = page.endsWith("s") ? page.substring(0, page
470: .length() - 1) : page + "s";
471:
472: if (referrer.equals(p2)) {
473: return;
474: }
475: }
476:
477: Set referrers = (Set) m_referredBy.get(page);
478:
479: // Even if 'page' has not been created yet, it can still be referenced.
480: // This requires we don't use m_referredBy keys when looking up missing
481: // pages, of course.
482: if (referrers == null) {
483: referrers = new TreeSet();
484: m_referredBy.put(page, referrers);
485: }
486: referrers.add(referrer);
487: }
488:
489: /**
490: * Clears the references to a certain page so it's no longer in the map.
491: *
492: * @param pagename Name of the page to clear references for.
493: */
494: public synchronized void clearPageEntries(String pagename) {
495: m_referredBy.remove(pagename);
496: }
497:
498: private boolean wikimatch(String wikiname, String pagename) {
499: if ("*".equals(wikiname))
500: return true;
501: if (wikiname == null)
502: wikiname = WikiContext.getWikiName();
503: WikiEngine _tmp = m_engine;
504: String pagewiki = WikiEngine.getWikiName(pagename);
505: return pagewiki == null && wikiname == null || pagewiki != null
506: && pagewiki.equals(wikiname);
507: }
508:
509: /**
510: * Finds all unreferenced pages. This requires a linear scan through
511: * m_referredBy to locate keys with null or empty values.
512: */
513: public synchronized Collection findUnreferenced(String wikiname) {
514: ArrayList unref = new ArrayList();
515:
516: Set keys = m_referredBy.keySet();
517: Iterator it = keys.iterator();
518:
519: while (it.hasNext()) {
520: String key = (String) it.next();
521: if (wikimatch(wikiname, key)) {
522: //Set refs = (Set) m_referredBy.get( key );
523: Set refs = getReferenceList(m_referredBy, key);
524: if (refs == null || refs.isEmpty()) {
525: unref.add(key);
526: }
527: }
528: }
529:
530: return unref;
531: }
532:
533: public synchronized Collection findUnreferenced() {
534: return findUnreferenced(WikiContext.getWikiName());
535: }
536:
537: /**
538: * Finds all references to non-existant pages. This requires a linear
539: * scan through m_refersTo values; each value must have a corresponding
540: * key entry in the reference Maps, otherwise such a page has never
541: * been created.
542: * <P>
543: * Returns a Collection containing Strings of unreferenced page names.
544: * Each non-existant page name is shown only once - we don't return information
545: * on who referred to it.
546: */
547: public synchronized Collection findUncreated(String wikiname) {
548: TreeSet uncreated = new TreeSet();
549:
550: // Go through m_refersTo values and check that m_refersTo has the corresponding keys.
551: // We want to reread the code to make sure our HashMaps are in sync...
552:
553: Collection allReferences = m_refersTo.values();
554: Iterator it = allReferences.iterator();
555:
556: while (it.hasNext()) {
557: Collection refs = (Collection) it.next();
558:
559: if (refs != null) {
560: Iterator rit = refs.iterator();
561:
562: while (rit.hasNext()) {
563: String aReference = (String) rit.next();
564:
565: int a;
566: if (aReference.indexOf("xyzzy") != -1)
567: a = 77;
568:
569: if (wikimatch(wikiname, aReference)
570: && !m_engine.pageExists(aReference)) {
571: uncreated.add(aReference);
572: }
573: }
574: }
575: }
576:
577: return uncreated;
578: }
579:
580: public synchronized Collection findUncreated() {
581: return findUncreated(WikiContext.getWikiName());
582: }
583:
584: /**
585: * Searches for the given page in the given Map.
586: */
587: private Set getReferenceList(Map coll, String pagename) {
588: pagename = WikiEngine.makeAbsolutePageName(pagename);
589: Set refs = (Set) coll.get(pagename);
590:
591: if (m_matchEnglishPlurals) {
592: //
593: // We'll add also matches from the "other" page.
594: //
595: Set refs2;
596:
597: if (pagename.endsWith("s")) {
598: refs2 = (Set) coll.get(pagename.substring(0, pagename
599: .length() - 1));
600: } else {
601: refs2 = (Set) coll.get(pagename + "s");
602: }
603:
604: if (refs2 != null) {
605: if (refs != null)
606: refs.addAll(refs2);
607: else
608: refs = refs2;
609: }
610: }
611:
612: return refs;
613: }
614:
615: /**
616: * Find all pages that refer to this page. Returns null if the page
617: * does not exist or is not referenced at all, otherwise returns a
618: * collection containing page names (String) that refer to this one.
619: */
620: public synchronized Collection findReferrers(String pagename) {
621: pagename = WikiEngine.makeAbsolutePageName(pagename);
622: Set refs = getReferenceList(m_referredBy, pagename);
623:
624: if (refs == null || refs.isEmpty()) {
625: return null;
626: } else {
627: return refs;
628: }
629: }
630:
631: }
|