Source Code Cross Referenced for HtmlConverter.java in » Portal » Open-Portal » com » sun » portal » wireless » htmlconversion » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation

1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI

Java

Java Tutorial

Illustrator Tutorials

GIMP Tutorials

C# / C Sharp

C# / CSharp Tutorial

C# / CSharp Open Source

SQL Server / T-SQL Tutorial

Oracle PL / SQL

Oracle PL/SQL Tutorial

Flash / Flex / ActionScript

VBA / Excel / Access / Word

XML

XML Tutorial

Microsoft Office PowerPoint 2007 Tutorial

Microsoft Office Excel 2007 Tutorial

Microsoft Office Word 2007 Tutorial

Java Source Code / Java Documentation » Portal » Open Portal » com.sun.portal.wireless.htmlconversion

Source Cross Referenced Class Diagram Java Document (Java Doc)

001:        /*
002:         * Created on Feb 9, 2005
003:         *
004:         */
005:        package com.sun.portal.wireless.htmlconversion;
006:
007:        import java.io.IOException;
008:        import java.io.StringReader;
009:        import java.io.StringWriter;
010:
011:        import javax.servlet.http.HttpServletRequest;
012:        import javax.servlet.http.HttpServletResponse;
013:        import javax.swing.text.html.parser.ParserDelegator;
014:        import javax.xml.parsers.DocumentBuilder;
015:        import javax.xml.parsers.DocumentBuilderFactory;
016:        import javax.xml.transform.Transformer;
017:        import javax.xml.transform.TransformerConfigurationException;
018:        import javax.xml.transform.TransformerException;
019:        import javax.xml.transform.TransformerFactory;
020:        import javax.xml.transform.dom.DOMSource;
021:        import javax.xml.transform.stream.StreamResult;
022:        import org.w3c.dom.Document;
023:        import org.xml.sax.InputSource;
024:        import org.xml.sax.SAXException;
025:        import org.xml.sax.SAXNotRecognizedException;
026:        import org.xml.sax.SAXNotSupportedException;
027:        import org.xml.sax.XMLReader;
028:        import org.xml.sax.helpers.XMLReaderFactory;
029:
030:        import com.sun.portal.wireless.htmlconversion.servlet.URLTranscoder;
031:        import com.sun.portal.log.common.PortalLogger;
032:        import java.util.logging.Level;
033:        import java.util.logging.Logger;
034:
035:        /**
036:         * Public API for this package that converts HTML input to AML output.
037:         * 
038:         * @author ashwin.mathew@sun.com
039:         */
040:        public class HtmlConverter {
041:
042:            /**
043:             * Unknown whether the document is HTML or XHTML, the API will try to
044:             * determine the document type.
045:             */
046:            public static final int DOCUMENT_TYPE_UNKNOWN = 0;
047:
048:            /**
049:             * Force transformation of the document as HTML.
050:             */
051:            public static final int DOCUMENT_TYPE_HTML = 1;
052:
053:            /**
054:             * Force transformation of the document as XHTML.
055:             */
056:            public static final int DOCUMENT_TYPE_XHTML = 2;
057:
058:            // The input HTML
059:            private String input;
060:
061:            // The output AML
062:            private Document output;
063:
064:            private URLTranscoder encoder;
065:
066:            private boolean isFragment;
067:
068:            private int documentType;
069:
070:            private boolean isTransformed = false;
071:
072:            private static final String AML_PAGE_START_TAG = "<AmlPage>";
073:
074:            private static final String AML_PAGE_END_TAG = "</AmlPage>";
075:
076:            private static final int AML_PAGE_START_TAG_LENGTH = AML_PAGE_START_TAG
077:                    .length();
078:
079:            private static final String DTD_START = "<!DOCTYPE";
080:
081:            private static final char DTD_END = '>';
082:
083:            private static final String XHTML_UPPER = "XHTML";
084:
085:            private static final String XHTML_LOWER = "xhtml";
086:
087:            private static final String FEATURE_VALIDATION = "http://xml.org/sax/features/validation";
088:
089:            private static final String FEATURE_LOAD_EXTERNAL_DTD = "http://apache.org/xml/features/nonvalidating/load-external-dtd";
090:
091:            private static final Logger logger = PortalLogger
092:                    .getLogger("com.sun.portal.wireless.htmlconversion");
093:
094:            /**
095:             * Constructs a new HtmlConverter which tries to determine the document type
096:             * itself.
097:             * 
098:             * @param input
099:             *            The input HTML content to be transformed.
100:             * @param isFragment
101:             *            Whether or not the output AML content is a whole AML page
102:             *            (with AmlDocument and AmlPage tags) or is just a fragment of
103:             *            AML to be embedded on a larger AML page.
104:             */
105:            public HtmlConverter(String input, boolean isFragment)
106:                    throws HtmlConversionException {
107:                this (input, isFragment, DOCUMENT_TYPE_UNKNOWN);
108:            }
109:
110:            /**
111:             * Constructs a new HtmlConverter for the specified document type.
112:             * 
113:             * @param input
114:             *            The input HTML content to be transformed.
115:             * @param isFragment
116:             *            Whether or not the output AML content is a whole AML page
117:             *            (with AmlDocument and AmlPage tags) or is just a fragment of
118:             *            AML to be embedded on a larger AML page.
119:             * @param documentType
120:             *            The type of the document, HTML, XHTML or unknown, must be one
121:             *            of the DOCUMENT_TYPE_* constants defined on this class.
122:             */
123:            public HtmlConverter(String input, boolean isFragment,
124:                    int documentType) throws HtmlConversionException {
125:                this .input = input;
126:                this .isFragment = isFragment;
127:                this .documentType = documentType;
128:
129:                if (logger.isLoggable(Level.FINEST)) {
130:                    logger.finest("Transforming HTML [" + input + "]");
131:                }
132:
133:                try {
134:                    DocumentBuilderFactory factory = DocumentBuilderFactory
135:                            .newInstance();
136:                    DocumentBuilder builder = factory.newDocumentBuilder();
137:                    output = builder.newDocument();
138:                } catch (Exception ex) {
139:                    // ex.printStackTrace();
140:                    logger.log(Level.SEVERE, "Error converting HTML", ex);
141:                    throw new HtmlConversionException(
142:                            HtmlConversionException.XML_ERROR, ex);
143:                }
144:
145:                if (documentType == DOCUMENT_TYPE_UNKNOWN) {
146:                    checkDocumentType();
147:                }
148:            }
149:
150:            /**
151:             * Creates and sets the URLEncoder.
152:             * 
153:             * @param request
154:             * @param response
155:             */
156:            public void setEncoder(HttpServletRequest request,
157:                    HttpServletResponse response) {
158:                encoder = new URLTranscoder(request, response);
159:            }
160:
161:            // Determines whether the input document is HTML or XHTML
162:            // The current mechanism only checks for the presence of
163:            // the string "XHTML" or "xhtml" in the opening DTD specification.
164:            // This may have to be reimplemented later to be a little more
165:            // sophisticated, for example, by checking whether or not image
166:            // and input tags in the document have a closing "/>" instead of
167:            // just ">".
168:            private void checkDocumentType() {
169:                documentType = DOCUMENT_TYPE_HTML;
170:
171:                if (input.startsWith(DTD_START)) {
172:                    int endIndex = input.indexOf(DTD_END);
173:                    String dtd = input.substring(0, endIndex);
174:
175:                    if (dtd.indexOf(XHTML_UPPER) != -1
176:                            || dtd.indexOf(XHTML_LOWER) != -1) {
177:                        documentType = DOCUMENT_TYPE_XHTML;
178:                    }
179:                }
180:            }
181:
182:            /**
183:             * Returns the transformed AML output.
184:             */
185:            public String toAML() throws HtmlConversionException {
186:                transform();
187:
188:                TransformerFactory tFactory = TransformerFactory.newInstance();
189:
190:                Transformer transformer = null;
191:                try {
192:                    transformer = tFactory.newTransformer();
193:                } catch (TransformerConfigurationException tce) {
194:                    // tce.printStackTrace();
195:                    logger.log(Level.SEVERE, "Error converting HTML", tce);
196:                    throw new HtmlConversionException(
197:                            HtmlConversionException.XML_ERROR, tce);
198:                }
199:
200:                DOMSource source = new DOMSource(output);
201:
202:                StringWriter amlDoc = new StringWriter();
203:                StreamResult result = new StreamResult(amlDoc);
204:
205:                try {
206:                    transformer.transform(source, result);
207:                } catch (TransformerException te) {
208:                    // te.printStackTrace();
209:                    logger.log(Level.SEVERE, "Error converting HTML", te);
210:                    throw new HtmlConversionException(
211:                            HtmlConversionException.XML_ERROR, te);
212:                }
213:
214:                String amlOutput = amlDoc.toString();
215:
216:                if (isFragment) {
217:                    // Rip off the AmlDocument and AmlPage tags
218:                    int amlPageStartIndex = amlOutput
219:                            .indexOf(AML_PAGE_START_TAG);
220:                    if (amlPageStartIndex != -1) {
221:                        amlOutput = amlOutput.substring(amlPageStartIndex
222:                                + AML_PAGE_START_TAG_LENGTH);
223:
224:                        int amlPageEndIndex = amlOutput
225:                                .lastIndexOf(AML_PAGE_END_TAG);
226:                        amlOutput = amlOutput.substring(0, amlPageEndIndex);
227:                    }
228:                }
229:
230:                if (logger.isLoggable(Level.FINEST)) {
231:                    logger
232:                            .finest("Transformed HTML to AML [" + amlOutput
233:                                    + "]");
234:                }
235:
236:                return amlOutput;
237:            }
238:
239:            private void transform() throws HtmlConversionException {
240:                if (isTransformed) {
241:                    return;
242:                }
243:
244:                // Assume regular HTML parser for now
245:                // Will add XHTML handling in later
246:
247:                ParserState state = new ParserState(output, encoder);
248:                GenericHtmlParserCallback genericCallback = new GenericHtmlParserCallback(
249:                        state);
250:
251:                // Check documentType and proceed.
252:                if (documentType == DOCUMENT_TYPE_HTML) {
253:                    StringReader inputReader = new StringReader(input);
254:
255:                    HtmlParserCallback callback = new HtmlParserCallback(
256:                            genericCallback);
257:
258:                    try {
259:                        new ParserDelegator()
260:                                .parse(inputReader, callback, true);
261:                    } catch (Exception e) {
262:                        // e.printStackTrace();
263:                        logger.log(Level.SEVERE, "Error converting HTML", e);
264:                        throw new HtmlConversionException(
265:                                HtmlConversionException.TRANSFORMATION_ERROR, e);
266:                    }
267:                } else // documentType == DOCUMENT_TYPE_XHTML
268:                {
269:                    XhtmlParserCallback callback = new XhtmlParserCallback(
270:                            genericCallback);
271:
272:                    XMLReader parser = null;
273:
274:                    try {
275:                        parser = XMLReaderFactory.createXMLReader();
276:                    } catch (SAXException saxEx) {
277:                        // saxEx.printStackTrace();
278:                        logger
279:                                .log(Level.SEVERE, "Error converting HTML",
280:                                        saxEx);
281:                        throw new HtmlConversionException(
282:                                HtmlConversionException.XML_ERROR, saxEx);
283:                    }
284:
285:                    parser.setContentHandler(callback);
286:                    parser.setDTDHandler(callback);
287:                    parser.setEntityResolver(callback);
288:                    parser.setErrorHandler(callback);
289:
290:                    try {
291:                        parser.setFeature(FEATURE_VALIDATION, false);
292:                        parser.setFeature(FEATURE_LOAD_EXTERNAL_DTD, false);
293:                    } catch (SAXNotRecognizedException saxEx) {
294:                        // Ignore these exceptions, and attempt to
295:                        // continue processing
296:                        // saxEx.printStackTrace();
297:                        logger.log(Level.WARNING, "Error converting HTML",
298:                                saxEx);
299:                    } catch (SAXNotSupportedException saxEx) {
300:                        // Ignore these exceptions, and attempt to
301:                        // continue processing
302:                        // saxEx.printStackTrace();
303:                        logger.log(Level.WARNING, "Error converting HTML",
304:                                saxEx);
305:                    }
306:
307:                    InputSource inputSource = new InputSource(new StringReader(
308:                            input));
309:
310:                    try {
311:                        parser.parse(inputSource);
312:                    } catch (SAXException saxEx) {
313:                        // saxEx.printStackTrace();
314:                        logger
315:                                .log(Level.SEVERE, "Error converting HTML",
316:                                        saxEx);
317:                        throw new HtmlConversionException(
318:                                HtmlConversionException.TRANSFORMATION_ERROR,
319:                                saxEx);
320:                    } catch (IOException ioEx) {
321:                        // ioEx.printStackTrace();
322:                        logger.log(Level.SEVERE, "Error converting HTML", ioEx);
323:                        throw new HtmlConversionException(
324:                                HtmlConversionException.TRANSFORMATION_ERROR,
325:                                ioEx);
326:                    }
327:                }
328:
329:                // Now flatten the tables and reform document structure
330:                state.getLayoutManager().reformLayout();
331:
332:                isTransformed = true;
333:            }
334:
335:        }

www.java2java.com | Contact Us

All other trademarks are property of their respective owners.