TIKA-71 - Remove ParserConfig and ParserFactory
[tika.git] / src / main / java / org / apache / tika / config / TikaConfig.java
blob47f1e993bdab301c41cb2456a4c9a949fedc2175
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 package org.apache.tika.config;
19 //JDK imports
20 import java.io.File;
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.net.URL;
24 import java.util.HashMap;
25 import java.util.Map;
27 //TIKA imports
28 import org.apache.tika.mime.MimeTypes;
29 import org.apache.tika.mime.MimeUtils;
30 import org.apache.tika.parser.Parser;
31 import org.apache.tika.parser.ParserPostProcessor;
32 import org.apache.tika.utils.Utils;
34 //JDOM imports
35 import org.jdom.Document;
36 import org.jdom.Element;
37 import org.jdom.JDOMException;
38 import org.jdom.input.SAXBuilder;
39 import org.jdom.xpath.XPath;
41 /**
42 * Parse xml config file.
44 public class TikaConfig {
46 public static final String DEFAULT_CONFIG_LOCATION =
47 "/org/apache/tika/tika-config.xml";
49 private final Map<String, Parser> parsers = new HashMap<String, Parser>();
51 private static MimeUtils mimeTypeRepo;
53 public TikaConfig(String file) throws JDOMException, IOException {
54 this(new File(file));
57 public TikaConfig(File file) throws JDOMException, IOException {
58 this(new SAXBuilder().build(file));
61 public TikaConfig(URL url) throws JDOMException, IOException {
62 this(new SAXBuilder().build(url));
65 public TikaConfig(InputStream stream) throws JDOMException, IOException {
66 this(new SAXBuilder().build(stream));
69 public TikaConfig(Document document) throws JDOMException {
70 this(document.getRootElement());
73 public TikaConfig(Element element) throws JDOMException {
74 Element mtr = element.getChild("mimeTypeRepository");
75 String mimeTypeRepoResource = mtr.getAttributeValue("resource");
76 mimeTypeRepo = new MimeUtils(mimeTypeRepoResource);
78 for (Object node : XPath.selectNodes(element, "//parser")) {
79 String className = ((Element) node).getAttributeValue("class");
80 try {
81 Parser parser = new ParserPostProcessor(
82 (Parser) Class.forName(className).newInstance());
83 for (Object child : ((Element) node).getChildren("mime")) {
84 parsers.put(((Element) child).getTextTrim(), parser);
86 } catch (Exception e) {
87 throw new JDOMException(
88 "Invalid parser configuration: " + className, e);
93 /**
94 * Returns the parser instance configured for the given MIME type.
95 * Returns <code>null</code> if the given MIME type is unknown.
97 * @param mimeType MIME type
98 * @return configured Parser instance, or <code>null</code>
100 public Parser getParser(String mimeType) {
101 return parsers.get(mimeType);
104 public MimeTypes getMimeRepository(){
105 return mimeTypeRepo.getRepository();
109 * Provides a default configuration (TikaConfig). Currently creates a
110 * new instance each time it's called; we may be able to have it
111 * return a shared instance once it is completely immutable.
113 * @return
114 * @throws IOException
115 * @throws JDOMException
117 public static TikaConfig getDefaultConfig()
118 throws IOException, JDOMException {
120 return new TikaConfig(
121 Utils.class.getResourceAsStream(DEFAULT_CONFIG_LOCATION));