Fix @file doc comments which don't match filename
[xapian.git] / xapian-applications / omega / xlsxparse.cc
blobc5060acaf7292d7eb61a8ce517de83d7c92bb070
1 /** @file xlsxparse.cc
2 * @brief Extract fields from XLSX sheet*.xml.
3 */
4 /* Copyright (C) 2012,2013 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <config.h>
23 #include "xlsxparse.h"
25 #include <cstdlib>
26 #include <cstring>
27 #include <ctime>
29 using namespace std;
31 bool
32 XlsxParser::opening_tag(const string &tag)
34 if (tag == "c") {
35 // We need to distinguish <v> tags which are inside <c t="s">, as these
36 // are numeric references to shared strings.
37 string type;
38 if (get_parameter("t", type) && type == "s") {
39 mode = MODE_C_STRING;
40 } else {
41 mode = MODE_C_LITERAL;
42 if (get_parameter("s", type)) {
43 unsigned long style_id = strtoul(type.c_str(), NULL, 10);
44 if (date_style.find(style_id) != date_style.end()) {
45 mode = MODE_C_DATE;
49 } else if (tag == "v") {
50 if (mode == MODE_C_LITERAL) {
51 mode = MODE_V_LITERAL;
52 } else if (mode == MODE_C_STRING) {
53 mode = MODE_V_STRING;
54 } else if (mode == MODE_C_DATE) {
55 mode = MODE_V_DATE;
57 } else if (tag == "si") {
58 mode = MODE_SI;
59 } else if (tag == "sst") {
60 string unique_count;
61 if (get_parameter("uniquecount", unique_count)) {
62 unsigned long c = strtoul(unique_count.c_str(), NULL, 10);
63 // This reserving is just a performance tweak, so don't go
64 // reserving ludicrous amounts of space just because an XML
65 // attribute told us to.
66 sst.reserve(std::min(c, 1000000ul));
68 } else if (tag == "workbookpr") {
69 string v;
70 if (get_parameter("date1904", v)) {
71 date1904 = (v == "true" || v == "1");
73 } else if (tag == "numfmt") {
74 string formatcode;
75 if (get_parameter("formatcode", formatcode)) {
76 // Heuristic for "date format" (FIXME: implement properly)
77 if (strchr(formatcode.c_str(), 'd') &&
78 strchr(formatcode.c_str(), 'm') &&
79 strchr(formatcode.c_str(), 'y')) {
80 string v;
81 if (get_parameter("numfmtid", v)) {
82 unsigned long id = strtoul(v.c_str(), NULL, 10);
83 date_format.insert(id);
87 } else if (tag == "cellxfs") {
88 mode = MODE_CELLXFS;
89 } else if (tag == "xf") {
90 if (mode == MODE_CELLXFS) {
91 string v;
92 if (get_parameter("applynumberformat", v)) {
93 if (v == "true" || v == "1") {
94 if (get_parameter("numfmtid", v)) {
95 unsigned long id = strtoul(v.c_str(), NULL, 10);
96 if ((id >= 14 && id <= 17) ||
97 date_format.find(id) != date_format.end()) {
98 date_style.insert(style_index);
103 ++style_index;
106 return true;
109 void
110 XlsxParser::process_text(const string &text)
112 switch (mode) {
113 case MODE_V_DATE: {
114 // Date field.
115 unsigned long c = strtoul(text.c_str(), NULL, 10);
116 if (date1904) {
117 c -= 24107;
118 } else {
119 // The spec insists we treat 1900 as a leap year!
120 if (c > 60) --c;
121 c -= 25568;
123 time_t t = c * 86400 + 43200;
124 struct tm * tm = gmtime(&t);
125 if (tm) {
126 char buf[32];
127 size_t res = strftime(buf, sizeof(buf), "%Y-%m-%d", tm);
128 if (res)
129 append_field(string(buf, res));
131 mode = MODE_NONE;
132 return;
134 case MODE_V_STRING: {
135 // Shared string use.
136 unsigned long c = strtoul(text.c_str(), NULL, 10);
137 if (c < sst.size()) {
138 append_field(sst[c]);
140 mode = MODE_NONE;
141 return;
143 case MODE_V_LITERAL:
144 // Literal (possibly calculated) field value.
145 append_field(text);
146 mode = MODE_NONE;
147 return;
148 case MODE_SI:
149 // Shared string definition.
150 sst.push_back(text);
151 mode = MODE_NONE;
152 return;
153 default:
154 return;