2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 package org
.apache
.tika
.mime
;
20 import java
.io
.InputStream
;
21 import java
.io
.IOException
;
24 import org
.apache
.tika
.metadata
.TikaMimeKeys
;
25 import org
.jdom
.JDOMException
;
28 import org
.apache
.tika
.config
.TikaConfig
;
29 import org
.apache
.tika
.exception
.TikaException
;
35 * Wrapper external interface around a {@link MimeTypes} repository.
37 public class MimeUtils
implements TikaMimeKeys
{
39 /** The MimeTypes repository instance */
40 private MimeTypes repository
= new MimeTypes();
42 /** Creates a new instance of MimeUtils */
43 public MimeUtils(String resPath
) {
44 new MimeTypesReader(repository
).read(resPath
);
48 /** Creates a new instance of MimeUtils */
49 public MimeUtils() throws TikaException
{
51 repository
= TikaConfig
.getDefaultConfig().getMimeRepository();
52 } catch (IOException e
) {
53 throw new TikaException(
54 "Unable to load default MIME type repository.", e
);
55 } catch (JDOMException e
) {
56 throw new TikaException(
57 "Unable to load default MIME type repository.", e
);
61 public final MimeTypes
getRepository() {
65 public String
getType(String typeName
, String url
, byte[] data
) {
68 typeName
= MimeType
.clean(typeName
);
69 type
= typeName
== null ?
null : repository
.forName(typeName
);
70 } catch (MimeTypeException mte
) {
71 // Seems to be a malformed mime type name...
74 if (typeName
== null || type
== null || !type
.matches(url
)) {
75 // If no mime-type header, or cannot find a corresponding registered
76 // mime-type, or the one found doesn't match the url pattern
77 // it shouldbe, then guess a mime-type from the url pattern
78 type
= repository
.getMimeType(url
);
79 typeName
= type
== null ? typeName
: type
.getName();
81 // if (typeName == null || type == null ||
82 // (this.magic && type.hasMagic() && !type.matches(data))) {
83 // If no mime-type already found, or the one found doesn't match
84 // the magic bytes it should be, then, guess a mime-type from the
85 // document content (magic bytes)
86 type
= repository
.getMimeType(data
);
87 typeName
= type
== null ? typeName
: type
.getName();
94 * Determines the MIME type of the resource pointed to by the specified URL.
95 * Examines the file's header, and if it cannot determine the MIME type
96 * from the header, guesses the MIME type from the URL extension
101 * @throws IOException
103 public String
getType(URL url
) throws IOException
{
104 InputStream stream
= url
.openStream();
106 return getType(null, url
.toString(), getHeader(stream
));
113 * Read the resource's header for use in determination of the MIME type.
115 private byte[] getHeader(InputStream stream
) throws IOException
{
116 byte[] bytes
= new byte[repository
.getMinLength()];
118 int lastRead
= stream
.read(bytes
);
119 while (lastRead
!= -1) {
120 totalRead
+= lastRead
;
121 if (totalRead
== bytes
.length
) {
124 lastRead
= stream
.read(bytes
, totalRead
, bytes
.length
- totalRead
);
126 byte[] shorter
= new byte[totalRead
];
127 System
.arraycopy(bytes
, 0, shorter
, 0, totalRead
);