2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 package org
.apache
.tika
.config
;
21 import java
.io
.IOException
;
22 import java
.io
.InputStream
;
24 import java
.util
.HashMap
;
28 import org
.apache
.tika
.mime
.MimeTypes
;
29 import org
.apache
.tika
.mime
.MimeUtils
;
30 import org
.apache
.tika
.parser
.Parser
;
31 import org
.apache
.tika
.parser
.ParserPostProcessor
;
32 import org
.apache
.tika
.utils
.Utils
;
35 import org
.jdom
.Document
;
36 import org
.jdom
.Element
;
37 import org
.jdom
.JDOMException
;
38 import org
.jdom
.input
.SAXBuilder
;
39 import org
.jdom
.xpath
.XPath
;
42 * Parse xml config file.
44 public class TikaConfig
{
46 public static final String DEFAULT_CONFIG_LOCATION
=
47 "/org/apache/tika/tika-config.xml";
49 private final Map
<String
, Parser
> parsers
= new HashMap
<String
, Parser
>();
51 private static MimeUtils mimeTypeRepo
;
53 public TikaConfig(String file
) throws JDOMException
, IOException
{
57 public TikaConfig(File file
) throws JDOMException
, IOException
{
58 this(new SAXBuilder().build(file
));
61 public TikaConfig(URL url
) throws JDOMException
, IOException
{
62 this(new SAXBuilder().build(url
));
65 public TikaConfig(InputStream stream
) throws JDOMException
, IOException
{
66 this(new SAXBuilder().build(stream
));
69 public TikaConfig(Document document
) throws JDOMException
{
70 this(document
.getRootElement());
73 public TikaConfig(Element element
) throws JDOMException
{
74 Element mtr
= element
.getChild("mimeTypeRepository");
75 String mimeTypeRepoResource
= mtr
.getAttributeValue("resource");
76 mimeTypeRepo
= new MimeUtils(mimeTypeRepoResource
);
78 for (Object node
: XPath
.selectNodes(element
, "//parser")) {
79 String className
= ((Element
) node
).getAttributeValue("class");
81 Parser parser
= new ParserPostProcessor(
82 (Parser
) Class
.forName(className
).newInstance());
83 for (Object child
: ((Element
) node
).getChildren("mime")) {
84 parsers
.put(((Element
) child
).getTextTrim(), parser
);
86 } catch (Exception e
) {
87 throw new JDOMException(
88 "Invalid parser configuration: " + className
, e
);
94 * Returns the parser instance configured for the given MIME type.
95 * Returns <code>null</code> if the given MIME type is unknown.
97 * @param mimeType MIME type
98 * @return configured Parser instance, or <code>null</code>
100 public Parser
getParser(String mimeType
) {
101 return parsers
.get(mimeType
);
104 public MimeTypes
getMimeRepository(){
105 return mimeTypeRepo
.getRepository();
109 * Provides a default configuration (TikaConfig). Currently creates a
110 * new instance each time it's called; we may be able to have it
111 * return a shared instance once it is completely immutable.
114 * @throws IOException
115 * @throws JDOMException
117 public static TikaConfig
getDefaultConfig()
118 throws IOException
, JDOMException
{
120 return new TikaConfig(
121 Utils
.class.getResourceAsStream(DEFAULT_CONFIG_LOCATION
));