Finish off eventusermodel based Excel Extractor, and update the xls to csv converter...
[poi.git] / src / testcases / org / apache / poi / hssf / extractor / TestExcelExtractor.java
blob63d67ee7716b21b499a85bede9e3e7480fe9a701
1 /* ====================================================================
2 Licensed to the Apache Software Foundation (ASF) under one or more
3 contributor license agreements. See the NOTICE file distributed with
4 this work for additional information regarding copyright ownership.
5 The ASF licenses this file to You under the Apache License, Version 2.0
6 (the "License"); you may not use this file except in compliance with
7 the License. You may obtain a copy of the License at
9 http://www.apache.org/licenses/LICENSE-2.0
11 Unless required by applicable law or agreed to in writing, software
12 distributed under the License is distributed on an "AS IS" BASIS,
13 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 See the License for the specific language governing permissions and
15 limitations under the License.
16 ==================================================================== */
18 package org.apache.poi.hssf.extractor;
20 import java.io.FileInputStream;
21 import java.io.IOException;
22 import java.io.InputStream;
24 import junit.framework.TestCase;
26 import org.apache.poi.hssf.HSSFTestDataSamples;
27 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
28 import org.apache.poi.poifs.filesystem.DirectoryNode;
29 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
30 /**
33 public final class TestExcelExtractor extends TestCase {
35 private static final ExcelExtractor createExtractor(String sampleFileName) {
37 InputStream is = HSSFTestDataSamples.openSampleFileStream(sampleFileName);
39 try {
40 return new ExcelExtractor(new POIFSFileSystem(is));
41 } catch (IOException e) {
42 throw new RuntimeException(e);
47 public void testSimple() {
49 ExcelExtractor extractor = createExtractor("Simple.xls");
51 assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor.getText());
53 // Now turn off sheet names
54 extractor.setIncludeSheetNames(false);
55 assertEquals("replaceMe\n", extractor.getText());
58 public void testNumericFormula() {
60 ExcelExtractor extractor = createExtractor("sumifformula.xls");
62 assertEquals(
63 "Sheet1\n" +
64 "1000.0\t1.0\t5.0\n" +
65 "2000.0\t2.0\t\n" +
66 "3000.0\t3.0\t\n" +
67 "4000.0\t4.0\t\n" +
68 "5000.0\t5.0\t\n" +
69 "Sheet2\nSheet3\n",
70 extractor.getText()
73 extractor.setFormulasNotResults(true);
75 assertEquals(
76 "Sheet1\n" +
77 "1000.0\t1.0\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
78 "2000.0\t2.0\t\n" +
79 "3000.0\t3.0\t\n" +
80 "4000.0\t4.0\t\n" +
81 "5000.0\t5.0\t\n" +
82 "Sheet2\nSheet3\n",
83 extractor.getText()
87 public void testwithContinueRecords() {
89 ExcelExtractor extractor = createExtractor("StringContinueRecords.xls");
91 extractor.getText();
93 // Has masses of text
94 // Until we fixed bug #41064, this would've
95 // failed by now
96 assertTrue(extractor.getText().length() > 40960);
99 public void testStringConcat() {
101 ExcelExtractor extractor = createExtractor("SimpleWithFormula.xls");
103 // Comes out as NaN if treated as a number
104 // And as XYZ if treated as a string
105 assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", extractor.getText());
107 extractor.setFormulasNotResults(true);
109 assertEquals("Sheet1\nreplaceme\nreplaceme\nCONCATENATE(A1,A2)\nSheet2\nSheet3\n", extractor.getText());
112 public void testStringFormula() {
114 ExcelExtractor extractor = createExtractor("StringFormulas.xls");
116 // Comes out as NaN if treated as a number
117 // And as XYZ if treated as a string
118 assertEquals("Sheet1\nXYZ\nSheet2\nSheet3\n", extractor.getText());
120 extractor.setFormulasNotResults(true);
122 assertEquals("Sheet1\nUPPER(\"xyz\")\nSheet2\nSheet3\n", extractor.getText());
126 public void testEventExtractor() throws Exception {
127 EventBasedExcelExtractor extractor;
129 // First up, a simple file with string
130 // based formulas in it
131 extractor = new EventBasedExcelExtractor(
132 new POIFSFileSystem(
133 HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls")
136 extractor.setIncludeSheetNames(true);
138 String text = extractor.getText();
139 assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text);
141 extractor.setIncludeSheetNames(false);
142 extractor.setFormulasNotResults(true);
144 text = extractor.getText();
145 assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text);
148 // Now, a slightly longer file with numeric formulas
149 extractor = new EventBasedExcelExtractor(
150 new POIFSFileSystem(
151 HSSFTestDataSamples.openSampleFileStream("sumifformula.xls")
154 extractor.setIncludeSheetNames(false);
155 extractor.setFormulasNotResults(true);
157 text = extractor.getText();
158 assertEquals(
159 "1000.0\t1.0\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
160 "2000.0\t2.0\n" +
161 "3000.0\t3.0\n" +
162 "4000.0\t4.0\n" +
163 "5000.0\t5.0\n",
164 text
170 * Embded in a non-excel file
172 public void testWithEmbeded() throws Exception {
173 String pdirname = System.getProperty("POIFS.testdata.path");
174 String filename = pdirname + "/word_with_embeded.doc";
175 POIFSFileSystem fs = new POIFSFileSystem(
176 new FileInputStream(filename)
179 DirectoryNode objPool = (DirectoryNode)
180 fs.getRoot().getEntry("ObjectPool");
181 DirectoryNode dirA = (DirectoryNode)
182 objPool.getEntry("_1269427460");
183 DirectoryNode dirB = (DirectoryNode)
184 objPool.getEntry("_1269427461");
186 HSSFWorkbook wbA = new HSSFWorkbook(dirA, fs, true);
187 HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
189 ExcelExtractor exA = new ExcelExtractor(wbA);
190 ExcelExtractor exB = new ExcelExtractor(wbB);
192 assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
193 exA.getText());
194 assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
196 assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
197 exB.getText());
198 assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
202 * Excel embeded in excel
204 public void testWithEmbededInOwn() throws Exception {
205 String pdirname = System.getProperty("POIFS.testdata.path");
206 String filename = pdirname + "/excel_with_embeded.xls";
207 POIFSFileSystem fs = new POIFSFileSystem(
208 new FileInputStream(filename)
211 DirectoryNode dirA = (DirectoryNode)
212 fs.getRoot().getEntry("MBD0000A3B5");
213 DirectoryNode dirB = (DirectoryNode)
214 fs.getRoot().getEntry("MBD0000A3B4");
216 HSSFWorkbook wbA = new HSSFWorkbook(dirA, fs, true);
217 HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
219 ExcelExtractor exA = new ExcelExtractor(wbA);
220 ExcelExtractor exB = new ExcelExtractor(wbB);
222 assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
223 exA.getText());
224 assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
226 assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
227 exB.getText());
228 assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
230 // And the base file too
231 ExcelExtractor ex = new ExcelExtractor(fs);
232 assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n",
233 ex.getText());
234 assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle());