Add README
[xapian-trec.git] / htmlparse.h
blob66a1a69f28016c9e087788cc6f8888387fa15bb2
1 /* htmlparse.cc: simple HTML parser for omega indexer
3 * ----START-LICENCE----
4 * Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2002 Olly Betts
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
20 * USA
21 * -----END-LICENCE-----
24 #include <string>
25 #include <map>
27 using std::string;
28 using std::map;
30 class HtmlParser {
31 protected:
32 void decode_entities(string &s);
33 static map<string, unsigned int> named_ents;
34 public:
35 virtual void process_text(const string &/*text*/) { }
36 virtual void opening_tag(const string &/*tag*/,
37 const map<string,string> &/*p*/) { }
38 virtual void closing_tag(const string &/*tag*/) { }
39 virtual void parse_html(const string &text);
40 HtmlParser();
41 virtual ~HtmlParser() { }