2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 package org
.apache
.tika
.parser
.microsoft
;
19 import java
.io
.InputStream
;
21 import org
.apache
.tika
.metadata
.Metadata
;
22 import org
.apache
.tika
.sax
.BodyContentHandler
;
23 import org
.xml
.sax
.ContentHandler
;
25 import junit
.framework
.TestCase
;
27 public class WordParserTest
extends TestCase
{
29 public void testWordParser() throws Exception
{
30 InputStream input
= WordParserTest
.class.getResourceAsStream(
31 "/test-documents/testWORD.doc");
33 ContentHandler handler
= new BodyContentHandler();
34 Metadata metadata
= new Metadata();
35 new OfficeParser().parse(input
, handler
, metadata
);
39 metadata
.get(Metadata
.CONTENT_TYPE
));
40 assertEquals("Sample Word Document", metadata
.get(Metadata
.TITLE
));
41 assertEquals("Keith Bennett", metadata
.get(Metadata
.AUTHOR
));
42 assertTrue(handler
.toString().contains("Sample Word Document"));