tests: Add "assignment" operator parser tests to increase coverage
[vala-gnome.git] / libvaladoc / markupreader.vala
blobc2e10a4942bbb3b036f0f9a7b0c8262d88483c01
1 /* markupreader.vala
3 * Copyright (C) 2008-2009 Jürg Billeter
4 * Copyright (C) 2011 Florian Brosch
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * Author:
21 * Jürg Billeter <j@bitron.ch>
25 /**
26 * Simple reader for a subset of XML.
28 public class Valadoc.MarkupReader : Object {
29 public string filename {
30 private set;
31 get;
34 public string name {
35 private set;
36 get;
39 public string content {
40 private set;
41 get;
44 private MappedFile mapped_file;
46 private string[] lines;
47 private char* begin;
48 private char* current;
49 private char* end;
51 private int line;
52 private int column;
54 private Vala.Map<string, string> attributes = new Vala.HashMap<string, string> (str_hash, str_equal);
55 private bool empty_element;
57 private ErrorReporter reporter;
59 public MarkupReader.from_string (string filename, string content, ErrorReporter reporter) {
60 this.filename = filename;
61 this.reporter = reporter;
63 lines = content.split ("\n");
64 begin = content;
65 end = begin + content.length;
66 current = begin;
68 column = 1;
69 line = 1;
72 public MarkupReader (string filename, ErrorReporter reporter) {
73 this.filename = filename;
74 this.reporter = reporter;
76 try {
77 mapped_file = new MappedFile (filename, false);
78 begin = mapped_file.get_contents ();
79 lines = ((string) begin).split ("\n");
80 end = begin + mapped_file.get_length ();
82 current = begin;
84 line = 1;
85 column = 1;
86 } catch (FileError e) {
87 reporter.simple_error (null, "Unable to map file '%s': %s", filename, e.message);
91 public string? get_line_content (int line_nr) {
92 if (this.lines.length > line_nr) {
93 return this.lines[line_nr];
96 return null;
99 public string? get_attribute (string attr) {
100 return attributes[attr];
104 * Returns a copy of the current attributes.
106 * @return map of current attributes
108 public Vala.Map<string,string> get_attributes () {
109 var result = new Vala.HashMap<string, string> (str_hash, str_equal);
110 foreach (var key in attributes.get_keys ()) {
111 result.set (key, attributes.get (key));
113 return result;
116 private string read_name () {
117 char* begin = current;
118 while (current < end) {
119 if (current[0] == ' ' || current[0] == '\t' || current[0] == '>'
120 || current[0] == '/' || current[0] == '=' || current[0] == '\n') {
121 break;
123 unichar u = ((string) current).get_char_validated ((long) (end - current));
124 if (u != (unichar) (-1)) {
125 current += u.to_utf8 (null);
126 } else {
127 reporter.simple_error ("%s:%d".printf (filename, line),
128 "invalid UTF-8 character");
131 if (current == begin) {
132 // syntax error: invalid name
134 return ((string) begin).substring (0, (int) (current - begin));
137 public MarkupTokenType read_token (out MarkupSourceLocation token_begin, out MarkupSourceLocation token_end) {
138 attributes.clear ();
140 if (empty_element) {
141 empty_element = false;
142 token_begin = MarkupSourceLocation (begin, line, column);
143 token_end = MarkupSourceLocation (begin, line, column);
144 return MarkupTokenType.END_ELEMENT;
147 content = null;
148 name = null;
150 space ();
152 MarkupTokenType type = MarkupTokenType.NONE;
153 char* begin = current;
154 token_begin = MarkupSourceLocation (begin, line, column);
156 if (current >= end) {
157 type = MarkupTokenType.EOF;
158 } else if (current[0] == '<') {
159 current++;
160 if (current >= end) {
161 // error
162 } else if (current[0] == '?') {
163 // processing instruction
164 } else if (current[0] == '!') {
165 // comment or doctype
166 current++;
167 if (current < end - 1 && current[0] == '-' && current[1] == '-') {
168 // comment
169 current += 2;
170 while (current < end - 2) {
171 if (current[0] == '-' && current[1] == '-' && current[2] == '>') {
172 // end of comment
173 current += 3;
174 break;
175 } else if (current[0] == '\n') {
176 line++;
177 column = 0;
179 current++;
182 // ignore comment, read next token
183 return read_token (out token_begin, out token_end);
185 } else if (current[0] == '/') {
186 type = MarkupTokenType.END_ELEMENT;
187 current++;
188 name = read_name ();
189 if (current >= end || current[0] != '>') {
190 // error
192 current++;
193 } else {
194 type = MarkupTokenType.START_ELEMENT;
195 name = read_name ();
196 space ();
197 while (current < end && current[0] != '>' && current[0] != '/') {
198 string attr_name = read_name ();
199 if (current >= end || current[0] != '=') {
200 // error
202 current++;
203 // FIXME allow single quotes
204 if (current >= end || current[0] != '"') {
205 // error
207 current++;
209 string attr_value = text ('"', false);
211 if (current >= end || current[0] != '"') {
212 // error
214 current++;
215 attributes.set (attr_name, attr_value);
216 space ();
218 if (current[0] == '/') {
219 empty_element = true;
220 current++;
221 space ();
222 } else {
223 empty_element = false;
225 if (current >= end || current[0] != '>') {
226 // error
228 current++;
230 } else {
231 space ();
233 if (current[0] != '<') {
234 content = text ('<', true);
235 } else {
236 // no text
237 // read next token
238 return read_token (out token_begin, out token_end);
241 type = MarkupTokenType.TEXT;
244 token_end = MarkupSourceLocation (current, line, column - 1);
246 return type;
249 private string text (char end_char, bool rm_trailing_whitespace) {
250 StringBuilder content = new StringBuilder ();
251 char* text_begin = current;
252 char* last_linebreak = current;
254 while (current < end && current[0] != end_char) {
255 unichar u = ((string) current).get_char_validated ((long) (end - current));
256 if (u == (unichar) (-1)) {
257 reporter.simple_error ("%s:%d".printf (filename, line),
258 "invalid UTF-8 character");
259 } else if (u == '&') {
260 char* next_pos = current + u.to_utf8 (null);
261 if (((string) next_pos).has_prefix ("amp;")) {
262 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
263 content.append_c ('&');
264 current += 5;
265 text_begin = current;
266 } else if (((string) next_pos).has_prefix ("quot;")) {
267 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
268 content.append_c ('"');
269 current += 6;
270 text_begin = current;
271 } else if (((string) next_pos).has_prefix ("apos;")) {
272 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
273 content.append_c ('\'');
274 current += 6;
275 text_begin = current;
276 } else if (((string) next_pos).has_prefix ("lt;")) {
277 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
278 content.append_c ('<');
279 current += 4;
280 text_begin = current;
281 } else if (((string) next_pos).has_prefix ("gt;")) {
282 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
283 content.append_c ('>');
284 current += 4;
285 text_begin = current;
286 } else if (((string) next_pos).has_prefix ("percnt;")) {
287 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
288 content.append_c ('%');
289 current += 8;
290 text_begin = current;
291 } else {
292 current += u.to_utf8 (null);
294 } else {
295 if (u == '\n') {
296 line++;
297 column = 0;
298 last_linebreak = current;
301 current += u.to_utf8 (null);
302 column++;
306 if (text_begin != current) {
307 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
310 column += (int) (current - last_linebreak);
312 // Removes trailing whitespace
313 if (rm_trailing_whitespace) {
314 char* str_pos = ((char*)content.str) + content.len;
315 for (str_pos--; str_pos > ((char*)content.str) && str_pos[0].isspace(); str_pos--);
316 content.erase ((ssize_t) (str_pos-((char*) content.str) + 1), -1);
319 return content.str;
322 private void space () {
323 while (current < end && current[0].isspace ()) {
324 if (current[0] == '\n') {
325 line++;
326 column = 0;
328 current++;
329 column++;