Don't use WIN32 API to parse/unparse UUIDs
[xapian.git] / xapian-applications / omega / metaxmlparse.cc
blob94b10fd44385a238e5bed8efb32df95787c53966
1 /* metaxmlparse.cc: subclass of HtmlParser for parsing OpenDocument's meta.xml.
3 * Copyright (C) 2006,2009,2010,2011,2013,2015 Olly Betts
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 #include <config.h>
22 #include "metaxmlparse.h"
24 #include "datetime.h"
26 using namespace std;
28 void
29 MetaXmlParser::process_text(const string &text)
31 switch (field) {
32 case KEYWORDS:
33 if (!keywords.empty()) keywords += ' ';
34 keywords += text;
35 break;
36 case TITLE:
37 if (!title.empty()) title += ' ';
38 title += text;
39 break;
40 case SAMPLE:
41 if (!sample.empty()) sample += ' ';
42 sample += text;
43 break;
44 case AUTHOR:
45 if (!author.empty()) author += ' ';
46 author += text;
47 break;
48 case TOPIC:
49 if (!topic.empty()) topic += ' ';
50 topic += text;
51 break;
52 case CREATED: {
53 // E.g. 2013-03-04T22:57:00
54 created = parse_datetime(text);
55 break;
57 case NONE:
58 // Ignore other fields.
59 break;
63 bool
64 MetaXmlParser::opening_tag(const string &tag)
66 if (tag.size() < 8) return true;
67 if (tag[0] == 'd' && tag[1] == 'c') {
68 if (tag == "dc:subject") {
69 // dc:subject is "Subject and Keywords":
70 // "Typically, Subject will be expressed as keywords, key phrases
71 // or classification codes that describe a topic of the resource."
72 // OpenOffice uses meta:keywords for keywords - dc:subject
73 // comes from a text field labelled "Subject". Let's just treat
74 // it as more keywords.
75 field = KEYWORDS;
76 } else if (tag == "dc:title") {
77 field = TITLE;
78 } else if (tag == "dc:description") {
79 field = SAMPLE;
80 } else if (tag == "dc:creator") {
81 field = AUTHOR;
82 } else if (tag == "dc:subject") {
83 field = TOPIC;
85 } else if (tag[0] == 'm') {
86 if (tag == "meta:keyword") {
87 // e.g.:
88 // <meta:keywords>
89 // <meta:keyword>information retrieval</meta:keyword>
90 // </meta:keywords>
91 field = KEYWORDS;
92 } else if (tag == "meta:creation-date") {
93 field = CREATED;
96 return true;
99 bool
100 MetaXmlParser::closing_tag(const string &)
102 field = NONE;
103 return true;