3 * Copyright (C) 2008-2009 Jürg Billeter
4 * Copyright (C) 2011 Florian Brosch
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 * Jürg Billeter <j@bitron.ch>
26 * Simple reader for a subset of XML.
28 public class Valadoc
.MarkupReader
: Object
{
29 public string filename
{
39 public string content
{
44 private MappedFile mapped_file
;
46 private string[] lines
;
48 private char* current
;
54 private Vala
.Map
<string, string> attributes
= new Vala
.HashMap
<string, string> (str_hash
, str_equal
);
55 private bool empty_element
;
57 private ErrorReporter reporter
;
59 public MarkupReader
.from_string (string filename
, string content
, ErrorReporter reporter
) {
60 this
.filename
= filename
;
61 this
.reporter
= reporter
;
63 lines
= content
.split ("\n");
65 end
= begin
+ content
.length
;
72 public MarkupReader (string filename
, ErrorReporter reporter
) {
73 this
.filename
= filename
;
74 this
.reporter
= reporter
;
77 mapped_file
= new
MappedFile (filename
, false);
78 begin
= mapped_file
.get_contents ();
79 lines
= ((string) begin
).split ("\n");
80 end
= begin
+ mapped_file
.get_length ();
86 } catch (FileError e
) {
87 reporter
.simple_error (null, "Unable to map file '%s': %s", filename
, e
.message
);
91 public string?
get_line_content (int line_nr
) {
92 if (this
.lines
.length
> line_nr
) {
93 return this
.lines
[line_nr
];
99 public string?
get_attribute (string attr
) {
100 return attributes
[attr
];
104 * Returns a copy of the current attributes.
106 * @return map of current attributes
108 public Vala
.Map
<string,string> get_attributes () {
109 var result
= new Vala
.HashMap
<string, string> (str_hash
, str_equal
);
110 foreach (var key
in attributes
.get_keys ()) {
111 result
.set (key
, attributes
.get (key
));
116 private string read_name () {
117 char* begin
= current
;
118 while (current
< end
) {
119 if (current
[0] == ' ' || current
[0] == '\t' || current
[0] == '>'
120 || current
[0] == '/' || current
[0] == '=' || current
[0] == '\n') {
123 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
124 if (u
!= (unichar
) (-1)) {
125 current
+= u
.to_utf8 (null);
127 reporter
.simple_error ("%s:%d".printf (filename
, line
),
128 "invalid UTF-8 character");
131 if (current
== begin
) {
132 // syntax error: invalid name
134 return ((string) begin
).substring (0, (int) (current
- begin
));
137 public MarkupTokenType
read_token (out MarkupSourceLocation token_begin
, out MarkupSourceLocation token_end
) {
141 empty_element
= false;
142 token_begin
= MarkupSourceLocation (begin
, line
, column
);
143 token_end
= MarkupSourceLocation (begin
, line
, column
);
144 return MarkupTokenType
.END_ELEMENT
;
152 MarkupTokenType type
= MarkupTokenType
.NONE
;
153 char* begin
= current
;
154 token_begin
= MarkupSourceLocation (begin
, line
, column
);
156 if (current
>= end
) {
157 type
= MarkupTokenType
.EOF
;
158 } else if (current
[0] == '<') {
160 if (current
>= end
) {
162 } else if (current
[0] == '?') {
163 // processing instruction
164 } else if (current
[0] == '!') {
165 // comment or doctype
167 if (current
< end
- 1 && current
[0] == '-' && current
[1] == '-') {
170 while (current
< end
- 2) {
171 if (current
[0] == '-' && current
[1] == '-' && current
[2] == '>') {
175 } else if (current
[0] == '\n') {
182 // ignore comment, read next token
183 return read_token (out token_begin
, out token_end
);
185 } else if (current
[0] == '/') {
186 type
= MarkupTokenType
.END_ELEMENT
;
189 if (current
>= end
|| current
[0] != '>') {
194 type
= MarkupTokenType
.START_ELEMENT
;
197 while (current
< end
&& current
[0] != '>' && current
[0] != '/') {
198 string attr_name
= read_name ();
199 if (current
>= end
|| current
[0] != '=') {
203 // FIXME allow single quotes
204 if (current
>= end
|| current
[0] != '"') {
209 string attr_value
= text ('"', false);
211 if (current
>= end
|| current
[0] != '"') {
215 attributes
.set (attr_name
, attr_value
);
218 if (current
[0] == '/') {
219 empty_element
= true;
223 empty_element
= false;
225 if (current
>= end
|| current
[0] != '>') {
233 if (current
[0] != '<') {
234 content
= text ('<', true);
238 return read_token (out token_begin
, out token_end
);
241 type
= MarkupTokenType
.TEXT
;
244 token_end
= MarkupSourceLocation (current
, line
, column
- 1);
249 private string text (char end_char
, bool rm_trailing_whitespace
) {
250 StringBuilder content
= new
StringBuilder ();
251 char* text_begin
= current
;
252 char* last_linebreak
= current
;
254 while (current
< end
&& current
[0] != end_char
) {
255 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
256 if (u
== (unichar
) (-1)) {
257 reporter
.simple_error ("%s:%d".printf (filename
, line
),
258 "invalid UTF-8 character");
259 } else if (u
== '&') {
260 char* next_pos
= current
+ u
.to_utf8 (null);
261 if (((string) next_pos
).has_prefix ("amp;")) {
262 content
.append (((string) text_begin
).substring (0, (int) (current
- text_begin
)));
263 content
.append_c ('&');
265 text_begin
= current
;
266 } else if (((string) next_pos
).has_prefix ("quot;")) {
267 content
.append (((string) text_begin
).substring (0, (int) (current
- text_begin
)));
268 content
.append_c ('"');
270 text_begin
= current
;
271 } else if (((string) next_pos
).has_prefix ("apos;")) {
272 content
.append (((string) text_begin
).substring (0, (int) (current
- text_begin
)));
273 content
.append_c ('\'');
275 text_begin
= current
;
276 } else if (((string) next_pos
).has_prefix ("lt;")) {
277 content
.append (((string) text_begin
).substring (0, (int) (current
- text_begin
)));
278 content
.append_c ('<');
280 text_begin
= current
;
281 } else if (((string) next_pos
).has_prefix ("gt;")) {
282 content
.append (((string) text_begin
).substring (0, (int) (current
- text_begin
)));
283 content
.append_c ('>');
285 text_begin
= current
;
286 } else if (((string) next_pos
).has_prefix ("percnt;")) {
287 content
.append (((string) text_begin
).substring (0, (int) (current
- text_begin
)));
288 content
.append_c ('%');
290 text_begin
= current
;
292 current
+= u
.to_utf8 (null);
298 last_linebreak
= current
;
301 current
+= u
.to_utf8 (null);
306 if (text_begin
!= current
) {
307 content
.append (((string) text_begin
).substring (0, (int) (current
- text_begin
)));
310 column
+= (int) (current
- last_linebreak
);
312 // Removes trailing whitespace
313 if (rm_trailing_whitespace
) {
314 char* str_pos
= ((char*)content
.str
) + content
.len
;
315 for (str_pos
--; str_pos
> ((char*)content
.str
) && str_pos
[0].isspace(); str_pos
--);
316 content
.erase ((ssize_t
) (str_pos
-((char*) content
.str
) + 1), -1);
322 private void space () {
323 while (current
< end
&& current
[0].isspace ()) {
324 if (current
[0] == '\n') {