From 25be5d24b0722aa2da07628ff0fa2b2c30b02654 Mon Sep 17 00:00:00 2001 From: Jukka Lauri Zitting Date: Wed, 26 Mar 2008 18:58:16 +0000 Subject: [PATCH] TIKA-132: Refactor Excel extractor to parse per sheet and add hyperlink support - Further refactoring to simplify cell value handling git-svn-id: https://svn.eu.apache.org/repos/asf/incubator/tika/trunk@641482 13f79535-47bb-0310-9956-ffa450edef68 --- .../tika/parser/microsoft/ExcelExtractor.java | 52 +++++++++++++++------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java b/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java index 350805d..4d69cda 100644 --- a/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java +++ b/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java @@ -270,12 +270,7 @@ public class ExcelExtractor { && record instanceof CellValueRecordInterface) { CellValueRecordInterface value = (CellValueRecordInterface) record; - Cell cell = getCellValue(record.getSid(), value); - if (cell != null) { - Point point = - new Point(value.getColumn(), value.getRow()); - currentSheet.put(point, cell); - } + addCell(record, getCellValue(record.getSid(), value)); } break; } @@ -290,8 +285,6 @@ public class ExcelExtractor { private Cell getCellValue( short sid, CellValueRecordInterface record) throws SAXException { - - String text = null; switch (sid) { /* FormulaRecord: Cell value from a formula */ case FormulaRecord.sid: @@ -299,15 +292,13 @@ public class ExcelExtractor { /* LabelRecord: strings stored directly in the cell */ case LabelRecord.sid: - text = ((LabelRecord)record).getValue(); - break; + return getTextCell(((LabelRecord) record).getValue()); /* LabelSSTRecord: Ref. a string in the shared string table */ case LabelSSTRecord.sid: LabelSSTRecord labelSSTRecord = (LabelSSTRecord) record; int sstIndex = labelSSTRecord.getSSTIndex(); - text = sstRecord.getString(sstIndex).getString(); - break; + return getTextCell(sstRecord.getString(sstIndex).getString()); /* NumberRecord: Contains a numeric cell value */ case NumberRecord.sid: @@ -317,11 +308,42 @@ public class ExcelExtractor { case RKRecord.sid: return new NumberCell(((RKRecord)record).getRKNumber()); } + return null; + } + + /** + * Adds the given cell (unless null) to the current + * worksheet (if any) at the position (if any) of the given record. + * + * @param record record that holds the cell value + * @param cell cell value (or null) + */ + private void addCell(Record record, Cell cell) { + if (!insideWorksheet) { + // Ignore cells outside sheets + } else if (cell == null) { + // Ignore empty cells + } else if (record instanceof CellValueRecordInterface) { + CellValueRecordInterface value = + (CellValueRecordInterface) record; + Point point = new Point(value.getColumn(), value.getRow()); + currentSheet.put(point, cell); + } + } + + /** + * Returns a text cell with the given text comment. The given text + * is trimmed, and ignored if null or empty. + * + * @param text text content, may be null + * @return text cell, or null + */ + private Cell getTextCell(String text) { if (text != null) { text = text.trim(); - } - if (text != null && text.length() > 0) { - return new TextCell(text); + if (text.length() > 0) { + return new TextCell(text); + } } return null; } -- 2.11.4.GIT