001: /*
002: * Copyright 2004 Outerthought bvba and Schaubroeck nv
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package org.outerj.daisy.repository.serverimpl.linkextraction;
017:
018: import org.outerj.daisy.repository.*;
019: import org.outerj.daisy.repository.serverimpl.LocalRepositoryManager;
020: import org.outerj.daisy.repository.schema.PartType;
021: import org.outerj.daisy.repository.commonimpl.schema.CommonRepositorySchema;
022: import org.outerj.daisy.repository.commonimpl.AuthenticatedUser;
023: import org.outerj.daisy.repository.commonimpl.CommonRepository;
024: import org.outerj.daisy.repository.commonimpl.DocId;
025: import org.outerj.daisy.linkextraction.LinkType;
026: import org.outerj.daisy.linkextraction.LinkExtractor;
027:
028: import java.util.Collection;
029: import java.io.InputStream;
030:
031: public class LinkExtractorHelper {
032: private final Document document;
033: private final String documentBranch;
034: private final String documentLanguage;
035: private final DocId docId;
036: private final long liveVersion;
037: private final long lastVersion;
038: private final boolean documentContainsLastVersion;
039: private LinkCollectorImpl linkCollector;
040: private final CommonRepositorySchema repositorySchema;
041: private final CommonRepository repository;
042: private final AuthenticatedUser systemUser;
043: private final LocalRepositoryManager.Context context;
044:
045: /**
046: * Constructor.
047: *
048: * <p>The boolean documentContainsLastVersion indicates whether the data currently in the document object
049: * is the latest version. This is needed because the linkextractor is used during the storage of the
050: * document, when the new version does not yet really exist (likewise, that's why we need documentId
051: * and liveVersion explicitely).
052: */
053: public LinkExtractorHelper(Document document, DocId docId,
054: long liveVersion, long lastVersion,
055: boolean documentContainsLastVersion,
056: AuthenticatedUser systemUser,
057: LocalRepositoryManager.Context context) {
058: this .document = document;
059: this .documentBranch = String.valueOf(document.getBranchId());
060: this .documentLanguage = String
061: .valueOf(document.getLanguageId());
062: this .docId = docId;
063: this .liveVersion = liveVersion;
064: this .lastVersion = lastVersion;
065: this .documentContainsLastVersion = documentContainsLastVersion;
066: this .repository = context.getCommonRepository();
067: this .repositorySchema = repository.getRepositorySchema();
068: this .systemUser = systemUser;
069: this .context = context;
070: }
071:
072: public Collection<LinkInfo> extract() throws Exception {
073: this .linkCollector = new LinkCollectorImpl(docId.getSeqId(),
074: docId.getNsId(), document.getBranchId(), document
075: .getLanguageId(), repository, systemUser);
076:
077: Field[] fields;
078: Part[] parts;
079: Link[] links;
080: if (documentContainsLastVersion) {
081: fields = document.getFields().getArray();
082: parts = document.getParts().getArray();
083: links = document.getLinks().getArray();
084: } else {
085: Version version = document.getVersion(document
086: .getLastVersionId());
087: fields = document.getFields().getArray();
088: parts = version.getParts().getArray();
089: links = version.getLinks().getArray();
090: }
091:
092: if (lastVersion == liveVersion) {
093: extractLinks(fields, true, true);
094: extractLinks(parts, true, true);
095: extractLinks(links, true, true);
096: } else {
097: extractLinks(fields, true, false);
098: extractLinks(parts, true, false);
099: extractLinks(links, true, false);
100: if (liveVersion != -1) {
101: Version version = document.getVersion(liveVersion);
102: extractLinks(version.getFields().getArray(), false,
103: true);
104: extractLinks(version.getParts().getArray(), false, true);
105: extractLinks(version.getLinks().getArray(), false, true);
106: }
107: }
108:
109: return linkCollector.getLinks();
110: }
111:
112: private void extractLinks(Part[] parts, boolean isLastVersion,
113: boolean isLiveVersion) throws Exception {
114: for (Part part : parts) {
115: this .linkCollector.changeTo(part.getTypeId(),
116: isLastVersion, isLiveVersion);
117: PartType partType = repositorySchema.getPartTypeById(part
118: .getTypeId(), false, systemUser);
119:
120: if (partType.getLinkExtractor() != null) {
121: LinkExtractor linkExtractor = context
122: .getLinkExtractor(partType.getLinkExtractor());
123: if (linkExtractor != null) {
124: InputStream is = null;
125: try {
126: is = part.getDataStream();
127: linkExtractor.extractLinks(is, linkCollector,
128: documentBranch, documentLanguage);
129: } catch (Throwable e) {
130: String whichDoc = document.getId() != null ? document
131: .getVariantKey().toString()
132: : " (new document) ";
133: context.getLogger().error(
134: "Error calling link extractor for "
135: + whichDoc + " part "
136: + partType.getName(), e);
137: } finally {
138: if (is != null)
139: is.close();
140: }
141: }
142: }
143: }
144: }
145:
146: private void extractLinks(Link[] links, boolean isLastVersion,
147: boolean isLiveVersion) {
148: this .linkCollector.changeTo(-1, isLastVersion, isLiveVersion);
149: for (Link link : links) {
150: linkCollector.addLink(LinkType.OUT_OF_LINE, link
151: .getTarget());
152: }
153: }
154:
155: private void extractLinks(Field[] fields, boolean isLastVersion,
156: boolean isLiveVersion) {
157: this .linkCollector.changeTo(-1, isLastVersion, isLiveVersion);
158: for (Field field : fields) {
159: if (field.getValueType() == ValueType.LINK) {
160: Object value = field.getValue();
161:
162: Object[] values;
163: if (!(value instanceof Object[]))
164: values = new Object[] { value };
165: else
166: values = (Object[]) value;
167:
168: if (field.isHierarchical()) {
169: for (Object hierarchyValue : values) {
170: extractLinkFieldLinks(((HierarchyPath) hierarchyValue)
171: .getElements());
172: }
173: } else {
174: extractLinkFieldLinks(values);
175: }
176: }
177: }
178: }
179:
180: private void extractLinkFieldLinks(Object[] values) {
181: for (Object value : values) {
182: VariantKey variantKey = (VariantKey) value;
183: String targetBranch = variantKey.getBranchId() != -1 ? String
184: .valueOf(variantKey.getBranchId())
185: : documentBranch;
186: String targetLanguage = variantKey.getLanguageId() != -1 ? String
187: .valueOf(variantKey.getLanguageId())
188: : documentLanguage;
189: linkCollector.addLink(LinkType.FIELD, variantKey
190: .getDocumentId(), targetBranch, targetLanguage, -1);
191: }
192: }
193: }
|