0001: /*
0002: JSPWiki - a JSP-based WikiWiki clone.
0003:
0004: Copyright (C) 2001-2004 Janne Jalkanen (Janne.Jalkanen@iki.fi),
0005: Erik Bunn (ebu@memecry.net)
0006:
0007: This program is free software; you can redistribute it and/or modify
0008: it under the terms of the GNU Lesser General Public License as published by
0009: the Free Software Foundation; either version 2.1 of the License, or
0010: (at your option) any later version.
0011:
0012: This program is distributed in the hope that it will be useful,
0013: but WITHOUT ANY WARRANTY; without even the implied warranty of
0014: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0015: GNU Lesser General Public License for more details.
0016:
0017: You should have received a copy of the GNU Lesser General Public License
0018: along with this program; if not, write to the Free Software
0019: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0020: */
0021: package com.ecyrd.jspwiki;
0022:
0023: import java.io.*;
0024: import java.security.MessageDigest;
0025: import java.security.NoSuchAlgorithmException;
0026: import java.util.*;
0027:
0028: import org.apache.commons.lang.time.StopWatch;
0029: import org.apache.log4j.Logger;
0030:
0031: import com.ecyrd.jspwiki.attachment.Attachment;
0032: import com.ecyrd.jspwiki.event.WikiEvent;
0033: import com.ecyrd.jspwiki.event.WikiEventListener;
0034: import com.ecyrd.jspwiki.event.WikiEventUtils;
0035: import com.ecyrd.jspwiki.event.WikiPageEvent;
0036: import com.ecyrd.jspwiki.filters.BasicPageFilter;
0037: import com.ecyrd.jspwiki.modules.InternalModule;
0038: import com.ecyrd.jspwiki.providers.ProviderException;
0039: import com.ecyrd.jspwiki.providers.WikiPageProvider;
0040:
0041: /*
0042: BUGS
0043:
0044: - if a wikilink is added to a page, then removed, RefMan still thinks that
0045: the page refers to the wikilink page. Hm.
0046:
0047: - if a page is deleted, gets very confused.
0048:
0049: - Serialization causes page attributes to be missing, when InitializablePlugins
0050: are not executed properly. Thus, serialization should really also mark whether
0051: a page is serializable or not...
0052: */
0053:
0054: /*
0055: A word about synchronizing:
0056:
0057: I expect this object to be accessed in three situations:
0058: - when a WikiEngine is created and it scans its wikipages
0059: - when the WE saves a page
0060: - when a JSP page accesses one of the WE's ReferenceManagers
0061: to display a list of (un)referenced pages.
0062:
0063: So, access to this class is fairly rare, and usually triggered by
0064: user interaction. OTOH, the methods in this class use their storage
0065: objects intensively (and, sorry to say, in an unoptimized manner =).
0066: My deduction: using unsynchronized HashMaps etc and syncing methods
0067: or code blocks is preferrable to using slow, synced storage objects.
0068: We don't have iterative code here, so I'm going to use synced methods
0069: for now.
0070:
0071: Please contact me if you notice problems with ReferenceManager, and
0072: especially with synchronization, or if you have suggestions about
0073: syncing.
0074:
0075: ebu@memecry.net
0076: */
0077:
0078: /**
0079: * Keeps track of wikipage references:
0080: * <UL>
0081: * <LI>What pages a given page refers to
0082: * <LI>What pages refer to a given page
0083: * </UL>
0084: *
0085: * This is a quick'n'dirty approach without any finesse in storage and
0086: * searching algorithms; we trust java.util.*.
0087: * <P>
0088: * This class contains two HashMaps, m_refersTo and m_referredBy. The
0089: * first is indexed by WikiPage names and contains a Collection of all
0090: * WikiPages the page refers to. (Multiple references are not counted,
0091: * naturally.) The second is indexed by WikiPage names and contains
0092: * a Set of all pages that refer to the indexing page. (Notice -
0093: * the keys of both Maps should be kept in sync.)
0094: * <P>
0095: * When a page is added or edited, its references are parsed, a Collection
0096: * is received, and we crudely replace anything previous with this new
0097: * Collection. We then check each referenced page name and make sure they
0098: * know they are referred to by the new page.
0099: * <P>
0100: * Based on this information, we can perform non-optimal searches for
0101: * e.g. unreferenced pages, top ten lists, etc.
0102: * <P>
0103: * The owning class must take responsibility of filling in any pre-existing
0104: * information, probably by loading each and every WikiPage and calling this
0105: * class to update the references when created.
0106: *
0107: * @author ebu@memecry.net
0108: * @since 1.6.1
0109: */
0110:
0111: // FIXME: The way that we save attributes is now a major booboo, and must be
0112: // replace forthwith. However, this is a workaround for the great deal
0113: // of problems that occur here...
0114: public class ReferenceManager extends BasicPageFilter implements
0115: InternalModule, WikiEventListener {
0116: /** Maps page wikiname to a Collection of pages it refers to. The Collection
0117: * must contain Strings. The Collection may contain names of non-existing
0118: * pages.
0119: */
0120: private Map m_refersTo;
0121: private Map m_unmutableRefersTo;
0122:
0123: /** Maps page wikiname to a Set of referring pages. The Set must
0124: * contain Strings. Non-existing pages (a reference exists, but not a file
0125: * for the page contents) may have an empty Set in m_referredBy.
0126: */
0127: private Map m_referredBy;
0128: private Map m_unmutableReferredBy;
0129:
0130: /** The WikiEngine that owns this object. */
0131: private WikiEngine m_engine;
0132:
0133: private boolean m_matchEnglishPlurals = false;
0134:
0135: private static Logger log = Logger
0136: .getLogger(ReferenceManager.class);
0137:
0138: private static final String SERIALIZATION_FILE = "refmgr.ser";
0139: private static final String SERIALIZATION_DIR = "refmgr-attr";
0140:
0141: /** We use this also a generic serialization id */
0142: private static final long serialVersionUID = 2L;
0143:
0144: /**
0145: * Builds a new ReferenceManager.
0146: *
0147: * @param engine The WikiEngine to which this is managing references to.
0148: */
0149: public ReferenceManager(WikiEngine engine) {
0150: m_refersTo = new HashMap();
0151: m_referredBy = new HashMap();
0152: m_engine = engine;
0153:
0154: m_matchEnglishPlurals = TextUtil.getBooleanProperty(engine
0155: .getWikiProperties(), WikiEngine.PROP_MATCHPLURALS,
0156: m_matchEnglishPlurals);
0157:
0158: //
0159: // Create two maps that contain unmutable versions of the two basic maps.
0160: //
0161: m_unmutableReferredBy = Collections
0162: .unmodifiableMap(m_referredBy);
0163: m_unmutableRefersTo = Collections.unmodifiableMap(m_refersTo);
0164: }
0165:
0166: /**
0167: * Does a full reference update. Does not sync; assumes that you do it afterwards.
0168: */
0169: private void updatePageReferences(WikiPage page)
0170: throws ProviderException {
0171: String content = m_engine.getPageManager().getPageText(
0172: page.getName(), WikiPageProvider.LATEST_VERSION);
0173:
0174: TreeSet res = new TreeSet();
0175: Collection links = m_engine.scanWikiLinks(page, content);
0176:
0177: res.addAll(links);
0178: Collection attachments = m_engine.getAttachmentManager()
0179: .listAttachments(page);
0180:
0181: for (Iterator atti = attachments.iterator(); atti.hasNext();) {
0182: res.add(((Attachment) (atti.next())).getName());
0183: }
0184:
0185: internalUpdateReferences(page.getName(), res);
0186: }
0187:
0188: /**
0189: * Initializes the entire reference manager with the initial set of pages
0190: * from the collection.
0191: *
0192: * @param pages A collection of all pages you want to be included in the reference
0193: * count.
0194: * @since 2.2
0195: */
0196: public void initialize(Collection pages) throws ProviderException {
0197: log.debug("Initializing new ReferenceManager with "
0198: + pages.size() + " initial pages.");
0199: StopWatch sw = new StopWatch();
0200: sw.start();
0201: log.info("Starting cross reference scan of WikiPages");
0202:
0203: //
0204: // First, try to serialize old data from disk. If that fails,
0205: // we'll go and update the entire reference lists (which'll take
0206: // time)
0207: //
0208: try {
0209: //
0210: // Unserialize things. The loop below cannot be combined with
0211: // the other loop below, simply because engine.getPage() has
0212: // side effects such as loading initializing the user databases,
0213: // which in turn want all of the pages to be read already...
0214: //
0215: // Yes, this is a kludge. We know. Will be fixed.
0216: //
0217: long saved = unserializeFromDisk();
0218:
0219: for (Iterator it = pages.iterator(); it.hasNext();) {
0220: WikiPage page = (WikiPage) it.next();
0221:
0222: unserializeAttrsFromDisk(page);
0223: }
0224:
0225: //
0226: // Now we must check if any of the pages have been changed
0227: // while we were in the electronic la-la-land, and update
0228: // the references for them.
0229: //
0230:
0231: Iterator it = pages.iterator();
0232:
0233: while (it.hasNext()) {
0234: WikiPage page = (WikiPage) it.next();
0235:
0236: if (page instanceof Attachment) {
0237: // Skip attachments
0238: } else {
0239:
0240: // Refresh with the latest copy
0241: page = m_engine.getPage(page.getName());
0242:
0243: if (page.getLastModified() == null) {
0244: log
0245: .fatal("Provider returns null lastModified. Please submit a bug report.");
0246: } else if (page.getLastModified().getTime() > saved) {
0247: updatePageReferences(page);
0248: }
0249: }
0250: }
0251:
0252: } catch (Exception e) {
0253: log
0254: .info("Unable to unserialize old refmgr information, rebuilding database: "
0255: + e.getMessage());
0256: buildKeyLists(pages);
0257:
0258: // Scan the existing pages from disk and update references in the manager.
0259: Iterator it = pages.iterator();
0260: while (it.hasNext()) {
0261: WikiPage page = (WikiPage) it.next();
0262:
0263: if (page instanceof Attachment) {
0264: // We cannot build a reference list from the contents
0265: // of attachments, so we skip them.
0266: } else {
0267: updatePageReferences(page);
0268:
0269: serializeAttrsToDisk(page);
0270: }
0271:
0272: }
0273:
0274: serializeToDisk();
0275: }
0276:
0277: sw.stop();
0278: log.info("Cross reference scan done in " + sw);
0279:
0280: WikiEventUtils.addWikiEventListener(m_engine.getPageManager(),
0281: WikiPageEvent.PAGE_DELETED, this );
0282: }
0283:
0284: /**
0285: * Reads the serialized data from the disk back to memory.
0286: * Returns the date when the data was last written on disk
0287: */
0288: private synchronized long unserializeFromDisk() throws IOException,
0289: ClassNotFoundException {
0290: ObjectInputStream in = null;
0291: long saved = 0L;
0292:
0293: try {
0294: StopWatch sw = new StopWatch();
0295: sw.start();
0296:
0297: File f = new File(m_engine.getWorkDir(), SERIALIZATION_FILE);
0298:
0299: in = new ObjectInputStream(new BufferedInputStream(
0300: new FileInputStream(f)));
0301:
0302: long ver = in.readLong();
0303:
0304: if (ver != serialVersionUID) {
0305: throw new IOException(
0306: "File format has changed; I need to recalculate references.");
0307: }
0308:
0309: saved = in.readLong();
0310: m_refersTo = (Map) in.readObject();
0311: m_referredBy = (Map) in.readObject();
0312:
0313: in.close();
0314:
0315: m_unmutableReferredBy = Collections
0316: .unmodifiableMap(m_referredBy);
0317: m_unmutableRefersTo = Collections
0318: .unmodifiableMap(m_refersTo);
0319:
0320: sw.stop();
0321: log.debug("Read serialized data successfully in " + sw);
0322: } finally {
0323: try {
0324: if (in != null)
0325: in.close();
0326: } catch (IOException ex) {
0327: }
0328: }
0329:
0330: return saved;
0331: }
0332:
0333: /**
0334: * Serializes hashmaps to disk. The format is private, don't touch it.
0335: */
0336: private synchronized void serializeToDisk() {
0337: ObjectOutputStream out = null;
0338:
0339: try {
0340: StopWatch sw = new StopWatch();
0341: sw.start();
0342:
0343: File f = new File(m_engine.getWorkDir(), SERIALIZATION_FILE);
0344:
0345: out = new ObjectOutputStream(new BufferedOutputStream(
0346: new FileOutputStream(f)));
0347:
0348: out.writeLong(serialVersionUID);
0349: out.writeLong(System.currentTimeMillis()); // Timestamp
0350: out.writeObject(m_refersTo);
0351: out.writeObject(m_referredBy);
0352:
0353: out.close();
0354:
0355: sw.stop();
0356:
0357: log.debug("serialization done - took " + sw);
0358: } catch (IOException e) {
0359: log.error("Unable to serialize!");
0360:
0361: try {
0362: if (out != null)
0363: out.close();
0364: } catch (IOException ex) {
0365: }
0366: }
0367: }
0368:
0369: private String getHashFileName(String pageName)
0370: throws NoSuchAlgorithmException {
0371: MessageDigest digest = MessageDigest.getInstance("MD5");
0372:
0373: byte[] dig;
0374: try {
0375: dig = digest.digest(pageName.getBytes("UTF-8"));
0376: } catch (UnsupportedEncodingException e) {
0377: throw new InternalWikiException(
0378: "AAAAGH! UTF-8 is gone! My eyes! It burns...!");
0379: }
0380:
0381: return TextUtil.toHexString(dig) + ".cache";
0382: }
0383:
0384: /**
0385: * Reads the serialized data from the disk back to memory.
0386: * Returns the date when the data was last written on disk
0387: */
0388: private synchronized long unserializeAttrsFromDisk(WikiPage p)
0389: throws IOException, ClassNotFoundException {
0390: ObjectInputStream in = null;
0391: long saved = 0L;
0392:
0393: try {
0394: StopWatch sw = new StopWatch();
0395: sw.start();
0396:
0397: //
0398: // Find attribute cache, and check if it exists
0399: //
0400: File f = new File(m_engine.getWorkDir(), SERIALIZATION_DIR);
0401:
0402: f = new File(f, getHashFileName(p.getName()));
0403:
0404: if (!f.exists()) {
0405: return 0L;
0406: }
0407:
0408: log.debug("Deserializing attributes for " + p.getName());
0409:
0410: in = new ObjectInputStream(new BufferedInputStream(
0411: new FileInputStream(f)));
0412:
0413: long ver = in.readLong();
0414:
0415: if (ver != serialVersionUID) {
0416: log
0417: .debug("File format has changed; cannot deserialize.");
0418: return 0L;
0419: }
0420:
0421: saved = in.readLong();
0422:
0423: String name = in.readUTF();
0424:
0425: if (!name.equals(p.getName())) {
0426: log.debug("File name does not match (" + name
0427: + "), skipping...");
0428: return 0L; // Not here
0429: }
0430:
0431: long entries = in.readLong();
0432:
0433: for (int i = 0; i < entries; i++) {
0434: String key = in.readUTF();
0435: Object value = in.readObject();
0436:
0437: p.setAttribute(key, value);
0438:
0439: log.debug(" attr: " + key + "=" + value);
0440: }
0441:
0442: in.close();
0443:
0444: sw.stop();
0445: log.debug("Read serialized data for " + name
0446: + " successfully in " + sw);
0447: } catch (NoSuchAlgorithmException e) {
0448: log.fatal("No MD5!?!");
0449: } finally {
0450: try {
0451: if (in != null)
0452: in.close();
0453: } catch (IOException ex) {
0454: }
0455: }
0456:
0457: return saved;
0458: }
0459:
0460: /**
0461: * Serializes hashmaps to disk. The format is private, don't touch it.
0462: */
0463: private synchronized void serializeAttrsToDisk(WikiPage p) {
0464: ObjectOutputStream out = null;
0465:
0466: try {
0467: // FIXME: There is a concurrency issue here...
0468: Set entries = p.getAttributes().entrySet();
0469:
0470: if (entries.size() == 0)
0471: return;
0472:
0473: StopWatch sw = new StopWatch();
0474: sw.start();
0475:
0476: File f = new File(m_engine.getWorkDir(), SERIALIZATION_DIR);
0477:
0478: if (!f.exists())
0479: f.mkdirs();
0480:
0481: //
0482: // Create a digest for the name
0483: //
0484: f = new File(f, getHashFileName(p.getName()));
0485:
0486: out = new ObjectOutputStream(new BufferedOutputStream(
0487: new FileOutputStream(f)));
0488:
0489: out.writeLong(serialVersionUID);
0490: out.writeLong(System.currentTimeMillis()); // Timestamp
0491:
0492: out.writeUTF(p.getName());
0493: out.writeLong(entries.size());
0494:
0495: for (Iterator i = entries.iterator(); i.hasNext();) {
0496: Map.Entry e = (Map.Entry) i.next();
0497:
0498: if (e.getValue() instanceof Serializable) {
0499: out.writeUTF((String) e.getKey());
0500: out.writeObject(e.getValue());
0501: }
0502: }
0503:
0504: out.close();
0505:
0506: sw.stop();
0507:
0508: log.debug("serialization for " + p.getName()
0509: + " done - took " + sw);
0510: } catch (IOException e) {
0511: log.error("Unable to serialize!");
0512:
0513: try {
0514: if (out != null)
0515: out.close();
0516: } catch (IOException ex) {
0517: }
0518: } catch (NoSuchAlgorithmException e) {
0519: log.fatal("No MD5 algorithm!?!");
0520: }
0521: }
0522:
0523: /**
0524: * After the page has been saved, updates the reference lists.
0525: */
0526: public void postSave(WikiContext context, String content) {
0527: WikiPage page = context.getPage();
0528:
0529: updateReferences(page.getName(), context.getEngine()
0530: .scanWikiLinks(page, content));
0531:
0532: serializeAttrsToDisk(page);
0533: }
0534:
0535: /**
0536: * Updates the m_referedTo and m_referredBy hashmaps when a page has been
0537: * deleted.
0538: * <P>
0539: * Within the m_refersTo map the pagename is a key. The whole key-value-set
0540: * has to be removed to keep the map clean.
0541: * Within the m_referredBy map the name is stored as a value. Since a key
0542: * can have more than one value we have to delete just the key-value-pair
0543: * referring page:deleted page.
0544: *
0545: * @param page Name of the page to remove from the maps.
0546: */
0547: public synchronized void pageRemoved(WikiPage page) {
0548: String pageName = page.getName();
0549:
0550: pageRemoved(pageName);
0551: }
0552:
0553: private void pageRemoved(String pageName) {
0554: Collection refTo = (Collection) m_refersTo.get(pageName);
0555:
0556: if (refTo != null) {
0557: Iterator it_refTo = refTo.iterator();
0558: while (it_refTo.hasNext()) {
0559: String referredPageName = (String) it_refTo.next();
0560: Set refBy = (Set) m_referredBy.get(referredPageName);
0561:
0562: if (refBy == null)
0563: throw new InternalWikiException(
0564: "Refmgr out of sync: page " + pageName
0565: + " refers to " + referredPageName
0566: + ", which has null referrers.");
0567:
0568: refBy.remove(pageName);
0569:
0570: m_referredBy.remove(referredPageName);
0571:
0572: // We won't put it back again if it becomes empty and does not exist. It will be added
0573: // later on anyway, if it becomes referenced again.
0574: if (!(refBy.isEmpty() && !m_engine
0575: .pageExists(referredPageName))) {
0576: m_referredBy.put(referredPageName, refBy);
0577: }
0578: }
0579:
0580: log.debug("Removing from m_refersTo HashMap key:value "
0581: + pageName + ":" + m_refersTo.get(pageName));
0582: m_refersTo.remove(pageName);
0583: }
0584:
0585: Set refBy = (Set) m_referredBy.get(pageName);
0586: if (refBy == null || refBy.isEmpty()) {
0587: m_referredBy.remove(pageName);
0588: }
0589:
0590: //
0591: // Remove any traces from the disk, too
0592: //
0593: serializeToDisk();
0594:
0595: try {
0596: File f = new File(m_engine.getWorkDir(), SERIALIZATION_DIR);
0597:
0598: f = new File(f, getHashFileName(pageName));
0599:
0600: if (f.exists())
0601: f.delete();
0602: } catch (NoSuchAlgorithmException e) {
0603: log.error("What do you mean - no such algorithm?", e);
0604: }
0605: }
0606:
0607: /**
0608: * Updates the referred pages of a new or edited WikiPage. If a refersTo
0609: * entry for this page already exists, it is removed and a new one is built
0610: * from scratch. Also calls updateReferredBy() for each referenced page.
0611: * <P>
0612: * This is the method to call when a new page has been created and we
0613: * want to a) set up its references and b) notify the referred pages
0614: * of the references. Use this method during run-time.
0615: *
0616: * @param page Name of the page to update.
0617: * @param references A Collection of Strings, each one pointing to a page this page references.
0618: */
0619: public synchronized void updateReferences(String page,
0620: Collection references) {
0621: internalUpdateReferences(page, references);
0622:
0623: serializeToDisk();
0624: }
0625:
0626: /**
0627: * Updates the referred pages of a new or edited WikiPage. If a refersTo
0628: * entry for this page already exists, it is removed and a new one is built
0629: * from scratch. Also calls updateReferredBy() for each referenced page.
0630: * <p>
0631: * This method does not synchronize the database to disk.
0632: *
0633: * @param page Name of the page to update.
0634: * @param references A Collection of Strings, each one pointing to a page this page references.
0635: */
0636:
0637: private void internalUpdateReferences(String page,
0638: Collection references) {
0639: page = getFinalPageName(page);
0640:
0641: //
0642: // Create a new entry in m_refersTo.
0643: //
0644: Collection oldRefTo = (Collection) m_refersTo.get(page);
0645: m_refersTo.remove(page);
0646:
0647: TreeSet cleanedRefs = new TreeSet();
0648: for (Iterator i = references.iterator(); i.hasNext();) {
0649: String ref = (String) i.next();
0650:
0651: ref = getFinalPageName(ref);
0652:
0653: cleanedRefs.add(ref);
0654: }
0655:
0656: m_refersTo.put(page, cleanedRefs);
0657:
0658: //
0659: // We know the page exists, since it's making references somewhere.
0660: // If an entry for it didn't exist previously in m_referredBy, make
0661: // sure one is added now.
0662: //
0663: if (!m_referredBy.containsKey(page)) {
0664: m_referredBy.put(page, new TreeSet());
0665: }
0666:
0667: //
0668: // Get all pages that used to be referred to by 'page' and
0669: // remove that reference. (We don't want to try to figure out
0670: // which particular references were removed...)
0671: //
0672: cleanReferredBy(page, oldRefTo, cleanedRefs);
0673:
0674: //
0675: // Notify all referred pages of their referinesshoodicity.
0676: //
0677: Iterator it = cleanedRefs.iterator();
0678: while (it.hasNext()) {
0679: String referredPageName = (String) it.next();
0680: updateReferredBy(getFinalPageName(referredPageName), page);
0681: }
0682: }
0683:
0684: /**
0685: * Returns the refers-to list. For debugging.
0686: */
0687: protected Map getRefersTo() {
0688: return m_refersTo;
0689: }
0690:
0691: /**
0692: * Returns the referred-by list. For debugging.
0693: */
0694: protected Map getReferredBy() {
0695: return m_referredBy;
0696: }
0697:
0698: /**
0699: * Cleans the 'referred by' list, removing references by 'referrer' to
0700: * any other page. Called after 'referrer' is removed.
0701: */
0702: private void cleanReferredBy(String referrer,
0703: Collection oldReferred, Collection newReferred) {
0704: // Two ways to go about this. One is to look up all pages previously
0705: // referred by referrer and remove referrer from their lists, and let
0706: // the update put them back in (except possibly removed ones).
0707: // The other is to get the old referred to list, compare to the new,
0708: // and tell the ones missing in the latter to remove referrer from
0709: // their list. Hm. We'll just try the first for now. Need to come
0710: // back and optimize this a bit.
0711:
0712: if (oldReferred == null)
0713: return;
0714:
0715: Iterator it = oldReferred.iterator();
0716: while (it.hasNext()) {
0717: String referredPage = (String) it.next();
0718: Set oldRefBy = (Set) m_referredBy.get(referredPage);
0719: if (oldRefBy != null) {
0720: oldRefBy.remove(referrer);
0721: }
0722:
0723: // If the page is referred to by no one AND it doesn't even
0724: // exist, we might just as well forget about this entry.
0725: // It will be added again elsewhere if new references appear.
0726: if (((oldRefBy == null) || (oldRefBy.isEmpty()))
0727: && (m_engine.pageExists(referredPage) == false)) {
0728: m_referredBy.remove(referredPage);
0729: }
0730: }
0731:
0732: }
0733:
0734: /**
0735: * When initially building a ReferenceManager from scratch, call this method
0736: * BEFORE calling updateReferences() with a full list of existing page names.
0737: * It builds the refersTo and referredBy key lists, thus enabling
0738: * updateReferences() to function correctly.
0739: * <P>
0740: * This method should NEVER be called after initialization. It clears all mappings
0741: * from the reference tables.
0742: *
0743: * @param pages a Collection containing WikiPage objects.
0744: */
0745: private synchronized void buildKeyLists(Collection pages) {
0746: m_refersTo.clear();
0747: m_referredBy.clear();
0748:
0749: if (pages == null)
0750: return;
0751:
0752: Iterator it = pages.iterator();
0753: try {
0754: while (it.hasNext()) {
0755: WikiPage page = (WikiPage) it.next();
0756: // We add a non-null entry to referredBy to indicate the referred page exists
0757: m_referredBy.put(page.getName(), new TreeSet());
0758: // Just add a key to refersTo; the keys need to be in sync with referredBy.
0759: m_refersTo.put(page.getName(), null);
0760: }
0761: } catch (ClassCastException e) {
0762: log
0763: .fatal(
0764: "Invalid collection entry in ReferenceManager.buildKeyLists().",
0765: e);
0766: }
0767: }
0768:
0769: /**
0770: * Marks the page as referred to by the referrer. If the page does not
0771: * exist previously, nothing is done. (This means that some page, somewhere,
0772: * has a link to a page that does not exist.)
0773: * <P>
0774: * This method is NOT synchronized. It should only be referred to from
0775: * within a synchronized method, or it should be made synced if necessary.
0776: */
0777: private void updateReferredBy(String page, String referrer) {
0778: // We're not really interested in first level self-references.
0779: if (page.equals(referrer)) {
0780: return;
0781: }
0782:
0783: // Neither are we interested if plural forms refer to each other.
0784: if (m_matchEnglishPlurals) {
0785: String p2 = page.endsWith("s") ? page.substring(0, page
0786: .length() - 1) : page + "s";
0787:
0788: if (referrer.equals(p2)) {
0789: return;
0790: }
0791: }
0792:
0793: Set referrers = (Set) m_referredBy.get(page);
0794:
0795: // Even if 'page' has not been created yet, it can still be referenced.
0796: // This requires we don't use m_referredBy keys when looking up missing
0797: // pages, of course.
0798: if (referrers == null) {
0799: referrers = new TreeSet();
0800: m_referredBy.put(page, referrers);
0801: }
0802: referrers.add(referrer);
0803: }
0804:
0805: /**
0806: * Clears the references to a certain page so it's no longer in the map.
0807: *
0808: * @param pagename Name of the page to clear references for.
0809: */
0810: public synchronized void clearPageEntries(String pagename) {
0811: pagename = getFinalPageName(pagename);
0812:
0813: //
0814: // Remove this item from the referredBy list of any page
0815: // which this item refers to.
0816: //
0817: Collection c = (Collection) m_refersTo.get(pagename);
0818:
0819: if (c != null) {
0820: for (Iterator i = c.iterator(); i.hasNext();) {
0821: Collection dref = (Collection) m_referredBy.get(i
0822: .next());
0823:
0824: dref.remove(pagename);
0825: }
0826: }
0827:
0828: //
0829: // Finally, remove direct references.
0830: //
0831: m_referredBy.remove(pagename);
0832: m_refersTo.remove(pagename);
0833: }
0834:
0835: /**
0836: * Finds all unreferenced pages. This requires a linear scan through
0837: * m_referredBy to locate keys with null or empty values.
0838: */
0839: public synchronized Collection findUnreferenced() {
0840: ArrayList unref = new ArrayList();
0841:
0842: Set keys = m_referredBy.keySet();
0843: Iterator it = keys.iterator();
0844:
0845: while (it.hasNext()) {
0846: String key = (String) it.next();
0847: //Set refs = (Set) m_referredBy.get( key );
0848: Set refs = getReferenceList(m_referredBy, key);
0849: if (refs == null || refs.isEmpty()) {
0850: unref.add(key);
0851: }
0852: }
0853:
0854: return unref;
0855: }
0856:
0857: /**
0858: * Finds all references to non-existant pages. This requires a linear
0859: * scan through m_refersTo values; each value must have a corresponding
0860: * key entry in the reference Maps, otherwise such a page has never
0861: * been created.
0862: * <P>
0863: * Returns a Collection containing Strings of unreferenced page names.
0864: * Each non-existant page name is shown only once - we don't return information
0865: * on who referred to it.
0866: */
0867: public synchronized Collection findUncreated() {
0868: TreeSet uncreated = new TreeSet();
0869:
0870: // Go through m_refersTo values and check that m_refersTo has the corresponding keys.
0871: // We want to reread the code to make sure our HashMaps are in sync...
0872:
0873: Collection allReferences = m_refersTo.values();
0874: Iterator it = allReferences.iterator();
0875:
0876: while (it.hasNext()) {
0877: Collection refs = (Collection) it.next();
0878:
0879: if (refs != null) {
0880: Iterator rit = refs.iterator();
0881:
0882: while (rit.hasNext()) {
0883: String aReference = (String) rit.next();
0884:
0885: if (m_engine.pageExists(aReference) == false) {
0886: uncreated.add(aReference);
0887: }
0888: }
0889: }
0890: }
0891:
0892: return uncreated;
0893: }
0894:
0895: /**
0896: * Searches for the given page in the given Map.
0897: */
0898: private Set getReferenceList(Map coll, String pagename) {
0899: Set refs = (Set) coll.get(pagename);
0900:
0901: if (m_matchEnglishPlurals) {
0902: //
0903: // We'll add also matches from the "other" page.
0904: //
0905: Set refs2;
0906:
0907: if (pagename.endsWith("s")) {
0908: refs2 = (Set) coll.get(pagename.substring(0, pagename
0909: .length() - 1));
0910: } else {
0911: refs2 = (Set) coll.get(pagename + "s");
0912: }
0913:
0914: if (refs2 != null) {
0915: if (refs != null)
0916: refs.addAll(refs2);
0917: else
0918: refs = refs2;
0919: }
0920: }
0921: return refs;
0922: }
0923:
0924: /**
0925: * Find all pages that refer to this page. Returns null if the page
0926: * does not exist or is not referenced at all, otherwise returns a
0927: * collection containing page names (String) that refer to this one.
0928: * <p>
0929: * @param pagename The page to find referrers for.
0930: * @return A Collection of Strings. (This is, in fact, a Set, and is likely
0931: * to change at some point to a Set). May return null, if the page
0932: * does not exist, or if it has no references.
0933: */
0934: // FIXME: Return a Set instead of a Collection.
0935: public synchronized Collection findReferrers(String pagename) {
0936: Set refs = getReferenceList(m_referredBy, pagename);
0937:
0938: if (refs == null || refs.isEmpty()) {
0939: return null;
0940: }
0941:
0942: return refs;
0943:
0944: }
0945:
0946: /**
0947: * Returns all pages that refer to this page. Note that this method
0948: * returns an unmodifiable Map, which may be abruptly changed. So any
0949: * access to any iterator may result in a ConcurrentModificationException.
0950: * <p>
0951: * The advantages of using this method over findReferrers() is that
0952: * it is very fast, as it does not create a new object. The disadvantages
0953: * are that it does not do any mapping between plural names, and you
0954: * may end up getting a ConcurrentModificationException.
0955: *
0956: * @param pageName Page name to query.
0957: * @return A Set of Strings containing the names of all the pages that refer
0958: * to this page. May return null, if the page does not exist or
0959: * has not been indexed yet.
0960: * @since 2.2.33
0961: */
0962: public Set findReferredBy(String pageName) {
0963: return (Set) m_unmutableReferredBy
0964: .get(getFinalPageName(pageName));
0965: }
0966:
0967: /**
0968: * Returns all pages that this page refers to. You can use this as a quick
0969: * way of getting the links from a page, but note that it does not link any
0970: * InterWiki, image, or external links. It does contain attachments, though.
0971: * <p>
0972: * The Collection returned is unmutable, so you cannot change it. It does reflect
0973: * the current status and thus is a live object. So, if you are using any
0974: * kind of an iterator on it, be prepared for ConcurrentModificationExceptions.
0975: * <p>
0976: * The returned value is a Collection, because a page may refer to another page
0977: * multiple times.
0978: *
0979: * @param pageName Page name to query
0980: * @return A Collection of Strings containing the names of the pages that this page
0981: * refers to. May return null, if the page does not exist or has not
0982: * been indexed yet.
0983: * @since 2.2.33
0984: */
0985: public Collection findRefersTo(String pageName) {
0986: return (Collection) m_unmutableRefersTo
0987: .get(getFinalPageName(pageName));
0988: }
0989:
0990: /**
0991: * This 'deepHashCode' can be used to determine if there were any
0992: * modifications made to the underlying to and by maps of the
0993: * ReferenceManager. The maps of the ReferenceManager are not
0994: * synchronized, so someone could add/remove entries in them while the
0995: * hashCode is being computed.
0996: *
0997: * @return Sum of the hashCodes for the to and by maps of the
0998: * ReferenceManager
0999: * @since 2.3.24
1000: */
1001: /*
1002: This method traps and retries if a concurrent
1003: modifcaition occurs.
1004: TODO: It is unnecessary to calculate the hashcode; it should be calculated only
1005: when the hashmaps are changed. This is slow.
1006: */
1007: public int deepHashCode() {
1008: boolean failed = true;
1009: int signature = 0;
1010:
1011: while (failed) {
1012: signature = 0;
1013: try {
1014: signature ^= m_referredBy.hashCode();
1015: signature ^= m_refersTo.hashCode();
1016: failed = false;
1017: } catch (ConcurrentModificationException e) {
1018: Thread.yield();
1019: }
1020: }
1021:
1022: return signature;
1023: }
1024:
1025: /**
1026: * Returns a list of all pages that the ReferenceManager knows about.
1027: * This should be roughly equivalent to PageManager.getAllPages(), but without
1028: * the potential disk access overhead. Note that this method is not guaranteed
1029: * to return a Set of really all pages (especially during startup), but it is
1030: * very fast.
1031: *
1032: * @return A Set of all defined page names that ReferenceManager knows about.
1033: * @since 2.3.24
1034: */
1035: public Set findCreated() {
1036: return new HashSet(m_refersTo.keySet());
1037: }
1038:
1039: private String getFinalPageName(String orig) {
1040: try {
1041: String s = m_engine.getFinalPageName(orig);
1042:
1043: if (s == null)
1044: s = orig;
1045:
1046: return s;
1047: } catch (ProviderException e) {
1048: log
1049: .error(
1050: "Error while trying to fetch a page name; trying to cope with the situation.",
1051: e);
1052:
1053: return orig;
1054: }
1055: }
1056:
1057: public void actionPerformed(WikiEvent event) {
1058: if ((event instanceof WikiPageEvent)
1059: && (event.getType() == WikiPageEvent.PAGE_DELETED)) {
1060: String pageName = ((WikiPageEvent) event).getPageName();
1061:
1062: if (pageName != null) {
1063: pageRemoved(pageName);
1064: }
1065: }
1066: }
1067: }
|