5 This file is part of KIllustrator.
6 Copyright (C) 1998 Kai-Uwe Sattler (kus@iti.cs.uni-magdeburg.de)
8 modified for kvoctrain by Ewald Arnold kvoctrain@ewald-arnold.dein April ยด99
10 -----------------------------------------------------------------------
12 -----------------------------------------------------------------------
14 This program is free software; you can redistribute it and/or modify
15 it under the terms of the GNU Library General Public License as
17 the Free Software Foundation; either version 2 of the License, or
18 (at your option) any later version.
20 This program is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU Library General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
33 #include "XmlReader.h"
35 XmlReader::XmlReader (KOXML_ISTREAM
& is
)
40 XmlReader::~XmlReader () {
43 bool XmlReader::validHeader () {
44 // Check for: <? xml version="1.0" ?>
45 if (tokenizer
.nextToken () != XmlTokenizer::Tok_Lt
)
47 if (tokenizer
.nextToken () != XmlTokenizer::Tok_QSign
)
50 if (tokenizer
.nextToken () != XmlTokenizer::Tok_Symbol
)
52 else if (tokenizer
.element () != "xml")
55 if (tokenizer
.nextToken () != XmlTokenizer::Tok_Symbol
)
57 else if (tokenizer
.element () != "version")
59 if (tokenizer
.nextToken () != XmlTokenizer::Tok_Eq
)
61 if (tokenizer
.nextToken () != XmlTokenizer::Tok_String
)
63 else if (tokenizer
.element () != "1.0")
66 if (tokenizer
.nextToken () != XmlTokenizer::Tok_QSign
)
68 if (tokenizer
.nextToken () != XmlTokenizer::Tok_Gt
)
71 // Check for: <(!)doctype symbol system string>
73 if (tokenizer
.nextToken () != XmlTokenizer::Tok_Lt
)
76 XmlTokenizer::Token token
= tokenizer
.nextToken ();
77 if (token
== XmlTokenizer::Tok_Exclam
)
78 token
= tokenizer
.nextToken ();
80 if (token
!= XmlTokenizer::Tok_Symbol
)
82 else if (tokenizer
.element().lower() != "doctype")
85 if (tokenizer
.nextToken () != XmlTokenizer::Tok_Symbol
)
87 s_dtype
= tokenizer
.element ();
89 if (tokenizer
.nextToken () != XmlTokenizer::Tok_Symbol
)
91 else if (tokenizer
.element().lower() != "system")
94 if (tokenizer
.nextToken () != XmlTokenizer::Tok_String
)
96 s_dtd
= tokenizer
.element ();
98 if (tokenizer
.nextToken () != XmlTokenizer::Tok_Gt
)
104 const KOXML_STRING
& XmlReader::doctype () const {
108 const KOXML_STRING
& XmlReader::dtd () const {
112 const KOXML_STRING
& XmlReader::getText () {
116 bool XmlReader::readElement (XmlElement
& elem
) {
117 XmlTokenizer::Token tok
;
124 tok
= tokenizer
.nextToken ();
125 if (tok
== XmlTokenizer::Tok_Lt
) {
126 tok
= tokenizer
.nextToken ();
127 if (tok
== XmlTokenizer::Tok_Comment
) {
131 if (tok
== XmlTokenizer::Tok_Slash
)
132 result
= parseEndElement (elem
);
133 else if (tok
== XmlTokenizer::Tok_Symbol
)
134 result
= parseElement (tokenizer
.element (), elem
);
135 else if (tok
== XmlTokenizer::Tok_EOF
)
140 else if (tok
== XmlTokenizer::Tok_Text
) {
141 elem
.tagId
= "#PCDATA";
142 text
= tokenizer
.element ();
152 bool XmlReader::parseEndElement (XmlElement
& elem
) {
155 XmlTokenizer::Token tok
= tokenizer
.nextToken ();
156 if (tok
== XmlTokenizer::Tok_Symbol
) {
157 KOXML_STRING tag
= tokenizer
.element ();
158 tok
= tokenizer
.nextToken ();
159 if (tok
== XmlTokenizer::Tok_Gt
) {
169 bool XmlReader::parseElement (const KOXML_STRING
& id
, XmlElement
& elem
) {
170 KOXML_STRING tag
= id
;
172 std::list
<XmlAttribute
> attrib_list
;
174 XmlTokenizer::Token tok
= tokenizer
.nextToken ();
175 while (tok
!= XmlTokenizer::Tok_Gt
) {
176 if (tok
== XmlTokenizer::Tok_Slash
) {
182 else if (tok
== XmlTokenizer::Tok_Symbol
) {
183 if (! attrib_list
.empty () || closed
)
186 if (! readAttributes (attrib_list
))
192 tok
= tokenizer
.nextToken ();
196 elem
.closed
= closed
;
197 elem
.attribs
= attrib_list
;
201 bool XmlReader::readAttributes (std::list
<XmlAttribute
>& attrib_list
) {
202 XmlTokenizer::Token tok
= tokenizer
.nextToken ();
204 while (tok
!= XmlTokenizer::Tok_Gt
) {
205 if (tok
== XmlTokenizer::Tok_Symbol
) {
206 KOXML_STRING id
= tokenizer
.element ();
207 if (tokenizer
.nextToken () == XmlTokenizer::Tok_Eq
) {
208 if (tokenizer
.nextToken () == XmlTokenizer::Tok_String
) {
210 // un-escape dangerous characters in reverted order
211 KOXML_STRING val
= tokenizer
.element();
213 while ((pos
= val
.find(""", pos
)) >= 0) {
214 KOXML_STRING_REMOVE( val
, pos
, 6);
215 KOXML_STRING_INSERT( val
, pos
, "\"");
219 while ((pos
= val
.find("&lf;", pos
)) >= 0) {
220 KOXML_STRING_REMOVE( val
, pos
, 4);
221 KOXML_STRING_INSERT( val
, pos
, "\r");
225 while ((pos
= val
.find("&nl;", pos
)) >= 0) {
226 KOXML_STRING_REMOVE( val
, pos
, 4);
227 KOXML_STRING_INSERT( val
, pos
, "\n");
231 while ((pos
= val
.find("<", pos
)) >= 0) {
232 KOXML_STRING_REMOVE( val
, pos
, 4);
233 KOXML_STRING_INSERT( val
, pos
, "<");
237 while ((pos
= val
.find ("&", pos
)) >= 0) {
238 KOXML_STRING_REMOVE( val
, pos
+1, 4);
242 XmlAttribute
attrib (id
, val
);
243 attrib_list
.push_back (attrib
);
246 cerr
<< "invalid attribute value" << endl
;
251 cerr
<< "missing '='" << endl
;
255 else if (tok
== XmlTokenizer::Tok_Slash
) {
259 cerr
<< "invalid attribute name: " << (int) tok
<< endl
;
262 tok
= tokenizer
.nextToken ();