TIKA-113: Metadata (such as title) should not be part of content
commitc998cc90be8ec8921abb00938d4a37727275b805
authorJukka Lauri Zitting <jukka@apache.org>
Thu, 10 Apr 2008 10:52:06 +0000 (10 10:52 +0000)
committerJukka Lauri Zitting <jukka@apache.org>
Thu, 10 Apr 2008 10:52:06 +0000 (10 10:52 +0000)
tree38e6b80f5601af28c9a69792aa451b41abfc35f0
parent3207b49b1f4f17560190027f9ba1de0e17b2099c
TIKA-113: Metadata (such as title) should not be part of content
    - Added BodyContentHandler that only processes XHTML body events
    - Added utility constructors for WriteOutContentHandler and BodyContentHandler
    - Updated test cases and related code to use BodyContentHandler where appropriate
    - Removed AppendableAdaptor class as it's not used anymore

git-svn-id: https://svn.eu.apache.org/repos/asf/incubator/tika/trunk@646748 13f79535-47bb-0310-9956-ffa450edef68
19 files changed:
CHANGES.txt
src/main/java/org/apache/tika/cli/TikaCLI.java
src/main/java/org/apache/tika/gui/TikaGUI.java
src/main/java/org/apache/tika/parser/ParserPostProcessor.java
src/main/java/org/apache/tika/parser/html/HtmlParser.java
src/main/java/org/apache/tika/sax/AppendableAdaptor.java [deleted file]
src/main/java/org/apache/tika/sax/BodyContentHandler.java [new file with mode: 0644]
src/main/java/org/apache/tika/sax/ContentHandlerDecorator.java
src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
src/main/java/org/apache/tika/utils/ParseUtils.java
src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
src/test/java/org/apache/tika/parser/opendocument/OpenOfficeParserTest.java
src/test/java/org/apache/tika/parser/txt/TXTParserTest.java
src/test/java/org/apache/tika/parser/xml/DcXMLParserTest.java
src/test/java/org/apache/tika/sax/AppendableAdaptorTest.java [deleted file]