2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 package org
.apache
.tika
.sax
;
19 import org
.apache
.tika
.metadata
.Metadata
;
20 import org
.xml
.sax
.Attributes
;
21 import org
.xml
.sax
.ContentHandler
;
22 import org
.xml
.sax
.SAXException
;
23 import org
.xml
.sax
.helpers
.AttributesImpl
;
26 * Content handler decorator that simplifies the task of producing XHTML
27 * events for Tika content parsers.
29 public class XHTMLContentHandler
extends ContentHandlerDecorator
{
32 * The XHTML namespace URI
34 public static final String XHTML
= "http://www.w3.org/1999/xhtml";
37 * Metadata associated with the document. Used to fill in the
38 * <head/> section.
40 private final Metadata metadata
;
43 * Flag to indicate whether the document element has been started.
45 private boolean started
= false;
47 public XHTMLContentHandler(ContentHandler handler
, Metadata metadata
) {
49 this.metadata
= metadata
;
53 * Starts an XHTML document by setting up the namespace mappings.
54 * The standard XHTML prefix is generated lazily when the first
58 public void startDocument() throws SAXException
{
59 super.startDocument();
60 startPrefixMapping("", XHTML
);
64 * Generates the following XHTML prefix when called for the first time:
68 * <title>...</title>
73 private void lazyStartDocument() throws SAXException
{
78 startElement("title");
79 String title
= metadata
.get(Metadata
.TITLE
);
80 if (title
!= null && title
.length() > 0) {
90 * Ends the XHTML document by writing the following footer and
91 * clearing the namespace mappings:
98 public void endDocument() throws SAXException
{
102 endPrefixMapping("");
107 public void startElement(
108 String uri
, String local
, String name
, Attributes attributes
)
109 throws SAXException
{
111 super.startElement(uri
, local
, name
, attributes
);
114 public void startElement(String name
) throws SAXException
{
115 startElement(XHTML
, name
, name
, new AttributesImpl());
118 public void startElement(String name
, String attribute
, String value
)
119 throws SAXException
{
120 AttributesImpl attributes
= new AttributesImpl();
121 attributes
.addAttribute(XHTML
, attribute
, attribute
, "CDATA", value
);
122 startElement(XHTML
, name
, name
, attributes
);
125 public void endElement(String name
) throws SAXException
{
126 endElement(XHTML
, name
, name
);
129 public void characters(String characters
) throws SAXException
{
130 characters(characters
.toCharArray(), 0, characters
.length());
133 public void element(String name
, String value
) throws SAXException
{