[ci] Fix netbsd job to upgrade existing packages
[xapian.git] / xapian-applications / omega / xmlparser.h
blobe99813a72807ffb3c89fc1ad095b92be6f76bc84
1 /** @file
2 * @brief XML (and HTML) parser
3 */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2002,2006,2008,2009,2011,2016,2020,2023 Olly Betts
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 * USA
23 #ifndef OMEGA_INCLUDED_XMLPARSER_H
24 #define OMEGA_INCLUDED_XMLPARSER_H
26 #include <string>
27 #include <string_view>
29 class XmlParser {
30 const char* attribute_data;
31 mutable size_t attribute_len;
33 protected:
34 /** Control HTML-specific handling.
36 * Defaults to XML, which means no HTML-specific handling.
38 * The HtmlParser subclass overrides this to HTML at construction time,
39 * and then it can change to HTML_IN_SCRIPT and back to HTML as we
40 * move in and out of parsing script elements.
42 enum { XML, HTML, HTML_IN_SCRIPT } state = XML;
44 std::string charset;
46 /// Protected constructor for HtmlParser subclass.
47 explicit XmlParser(bool) : state(HTML) { }
49 static void decode_entities(std::string& s);
51 bool get_attribute(const std::string& name, std::string& value) const;
53 /** Process an opening tag.
55 * Return false to stop parsing of the rest of the document.
57 virtual bool opening_tag(const std::string& tag) {
58 (void)tag;
59 return true;
62 /** Process a closing tag.
64 * Return false to stop parsing of the rest of the document.
66 virtual bool closing_tag(const std::string& tag) {
67 (void)tag;
68 return true;
71 /// Process text between tags.
72 virtual void process_content(const std::string& content) {
73 (void)content;
76 public:
77 XmlParser() { }
79 XmlParser(const XmlParser&) = delete;
81 XmlParser& operator=(const XmlParser&) = delete;
83 virtual ~XmlParser() { }
85 void parse(std::string_view text);
88 #endif // OMEGA_INCLUDED_XMLPARSER_H