TIKA-87 - MimeTypes should allow modification of MIME types
[tika.git] / src / main / java / org / apache / tika / mime / MimeUtils.java
blobc131dd7873b8406faa690cee9d0b032df3e8e3a2
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 package org.apache.tika.mime;
19 // JDK imports
20 import java.io.InputStream;
21 import java.io.IOException;
22 import java.net.URL;
24 import org.apache.tika.metadata.TikaMimeKeys;
25 import org.jdom.JDOMException;
27 // Tika imports
28 import org.apache.tika.config.TikaConfig;
29 import org.apache.tika.exception.TikaException;
33 /**
35 * Wrapper external interface around a {@link MimeTypes} repository.
37 public class MimeUtils implements TikaMimeKeys {
39 /** The MimeTypes repository instance */
40 private MimeTypes repository = new MimeTypes();
42 /** Creates a new instance of MimeUtils */
43 public MimeUtils(String resPath) {
44 new MimeTypesReader(repository).read(resPath);
48 /** Creates a new instance of MimeUtils */
49 public MimeUtils() throws TikaException {
50 try {
51 repository = TikaConfig.getDefaultConfig().getMimeRepository();
52 } catch (IOException e) {
53 throw new TikaException(
54 "Unable to load default MIME type repository.", e);
55 } catch (JDOMException e) {
56 throw new TikaException(
57 "Unable to load default MIME type repository.", e);
61 public final MimeTypes getRepository() {
62 return repository;
65 public String getType(String typeName, String url, byte[] data) {
66 MimeType type = null;
67 try {
68 typeName = MimeType.clean(typeName);
69 type = typeName == null ? null : repository.forName(typeName);
70 } catch (MimeTypeException mte) {
71 // Seems to be a malformed mime type name...
74 if (typeName == null || type == null || !type.matches(url)) {
75 // If no mime-type header, or cannot find a corresponding registered
76 // mime-type, or the one found doesn't match the url pattern
77 // it shouldbe, then guess a mime-type from the url pattern
78 type = repository.getMimeType(url);
79 typeName = type == null ? typeName : type.getName();
81 // if (typeName == null || type == null ||
82 // (this.magic && type.hasMagic() && !type.matches(data))) {
83 // If no mime-type already found, or the one found doesn't match
84 // the magic bytes it should be, then, guess a mime-type from the
85 // document content (magic bytes)
86 type = repository.getMimeType(data);
87 typeName = type == null ? typeName : type.getName();
88 // }
89 return typeName;
93 /**
94 * Determines the MIME type of the resource pointed to by the specified URL.
95 * Examines the file's header, and if it cannot determine the MIME type
96 * from the header, guesses the MIME type from the URL extension
97 * (e.g. "pdf).
99 * @param url
100 * @return
101 * @throws IOException
103 public String getType(URL url) throws IOException {
104 InputStream stream = url.openStream();
105 try {
106 return getType(null, url.toString(), getHeader(stream));
107 } finally {
108 stream.close();
113 * Read the resource's header for use in determination of the MIME type.
115 private byte[] getHeader(InputStream stream) throws IOException {
116 byte[] bytes = new byte[repository.getMinLength()];
117 int totalRead = 0;
118 int lastRead = stream.read(bytes);
119 while (lastRead != -1) {
120 totalRead += lastRead;
121 if (totalRead == bytes.length) {
122 return bytes;
124 lastRead = stream.read(bytes, totalRead, bytes.length - totalRead);
126 byte[] shorter = new byte[totalRead];
127 System.arraycopy(bytes, 0, shorter, 0, totalRead);
128 return shorter;