TIKA-131: Lazy XHTML prefix generation
[tika.git] / src / main / java / org / apache / tika / sax / XHTMLContentHandler.java
blob48c9e793db58b903d70c986185cb4605642dead8
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 package org.apache.tika.sax;
19 import org.apache.tika.metadata.Metadata;
20 import org.xml.sax.Attributes;
21 import org.xml.sax.ContentHandler;
22 import org.xml.sax.SAXException;
23 import org.xml.sax.helpers.AttributesImpl;
25 /**
26 * Content handler decorator that simplifies the task of producing XHTML
27 * events for Tika content parsers.
29 public class XHTMLContentHandler extends ContentHandlerDecorator {
31 /**
32 * The XHTML namespace URI
34 public static final String XHTML = "http://www.w3.org/1999/xhtml";
36 /**
37 * Metadata associated with the document. Used to fill in the
38 * <head/> section.
40 private final Metadata metadata;
42 /**
43 * Flag to indicate whether the document element has been started.
45 private boolean started = false;
47 public XHTMLContentHandler(ContentHandler handler, Metadata metadata) {
48 super(handler);
49 this.metadata = metadata;
52 /**
53 * Starts an XHTML document by setting up the namespace mappings.
54 * The standard XHTML prefix is generated lazily when the first
55 * element is started.
57 @Override
58 public void startDocument() throws SAXException {
59 super.startDocument();
60 startPrefixMapping("", XHTML);
63 /**
64 * Generates the following XHTML prefix when called for the first time:
65 * <pre>
66 * &lt;html&gt;
67 * &lt;head&gt;
68 * &lt;title&gt;...&lt;/title&gt;
69 * &lt;/head&gt;
70 * &lt;body&gt;
71 * </pre>
73 private void lazyStartDocument() throws SAXException {
74 if (!started) {
75 started = true;
76 startElement("html");
77 startElement("head");
78 startElement("title");
79 String title = metadata.get(Metadata.TITLE);
80 if (title != null && title.length() > 0) {
81 characters(title);
83 endElement("title");
84 endElement("head");
85 startElement("body");
89 /**
90 * Ends the XHTML document by writing the following footer and
91 * clearing the namespace mappings:
92 * <pre>
93 * &lt;/body&gt;
94 * &lt;/html&gt;
95 * </pre>
97 @Override
98 public void endDocument() throws SAXException {
99 lazyStartDocument();
100 endElement("body");
101 endElement("html");
102 endPrefixMapping("");
103 super.endDocument();
106 @Override
107 public void startElement(
108 String uri, String local, String name, Attributes attributes)
109 throws SAXException {
110 lazyStartDocument();
111 super.startElement(uri, local, name, attributes);
114 public void startElement(String name) throws SAXException {
115 startElement(XHTML, name, name, new AttributesImpl());
118 public void startElement(String name, String attribute, String value)
119 throws SAXException {
120 AttributesImpl attributes = new AttributesImpl();
121 attributes.addAttribute(XHTML, attribute, attribute, "CDATA", value);
122 startElement(XHTML, name, name, attributes);
125 public void endElement(String name) throws SAXException {
126 endElement(XHTML, name, name);
129 public void characters(String characters) throws SAXException {
130 characters(characters.toCharArray(), 0, characters.length());
133 public void element(String name, String value) throws SAXException {
134 startElement(name);
135 characters(value);
136 endElement(name);