Intrinsicify SpanHelpers.IndexOf(char) (dotnet/coreclr#22505)
[mono-project.git] / mono / eglib / gmarkup.c
blobab23d491a235578db9578b4454f96fdf6855423a
1 /*
2 * gmakrup.c: Minimal XML markup reader.
4 * Unlike the GLib one, this can not be restarted with more text
5 * as the Mono use does not require it.
7 * Actually, with further thought, I think that this could be made
8 * to restart very easily. The pos == end condition would mean
9 * "return to caller" and only at end parse this would be a fatal
10 * error.
12 * Not that it matters to Mono, but it is very simple to change, there
13 * is a tricky situation: there are a few places where we check p+n
14 * in the source, and that would have to change to be progressive, instead
15 * of depending on the string to be complete at that point, so we would
16 * have to introduce extra states to cope with that.
18 * Author:
19 * Miguel de Icaza (miguel@novell.com)
21 * (C) 2006 Novell, Inc.
23 * Permission is hereby granted, free of charge, to any person obtaining
24 * a copy of this software and associated documentation files (the
25 * "Software"), to deal in the Software without restriction, including
26 * without limitation the rights to use, copy, modify, merge, publish,
27 * distribute, sublicense, and/or sell copies of the Software, and to
28 * permit persons to whom the Software is furnished to do so, subject to
29 * the following conditions:
31 * The above copyright notice and this permission notice shall be
32 * included in all copies or substantial portions of the Software.
34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
38 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
39 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
40 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
42 #include "config.h"
43 #include <stdio.h>
44 #include <ctype.h>
45 #include <glib.h>
47 #define set_error(msg, ...) do { if (gerror != NULL) *gerror = g_error_new (GINT_TO_POINTER (1), 1, msg, __VA_ARGS__); } while (0);
49 typedef enum {
50 START,
51 START_ELEMENT,
52 TEXT,
53 FLUSH_TEXT,
54 CLOSING_ELEMENT,
55 COMMENT,
56 SKIP_XML_DECLARATION
57 } MonoGMarkupParseState;
59 struct _GMarkupParseContext {
60 GMarkupParser parser;
61 gpointer user_data;
62 GDestroyNotify user_data_dnotify;
63 MonoGMarkupParseState state;
65 /* Stores the name of the current element, so we can issue the end_element */
66 GSList *level;
68 GString *text;
71 GMarkupParseContext *
72 g_markup_parse_context_new (const GMarkupParser *parser,
73 GMarkupParseFlags flags,
74 gpointer user_data,
75 GDestroyNotify user_data_dnotify)
77 GMarkupParseContext *context = g_new0 (GMarkupParseContext, 1);
79 context->parser = *parser;
80 context->user_data = user_data;
81 context->user_data_dnotify = user_data_dnotify;
83 return context;
86 void
87 g_markup_parse_context_free (GMarkupParseContext *context)
89 GSList *l;
91 g_return_if_fail (context != NULL);
93 if (context->user_data_dnotify != NULL)
94 (context->user_data_dnotify) (context->user_data);
96 if (context->text != NULL)
97 g_string_free (context->text, TRUE);
98 for (l = context->level; l; l = l->next)
99 g_free (l->data);
100 g_slist_free (context->level);
101 g_free (context);
104 static gboolean
105 my_isspace (char c)
107 if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v')
108 return TRUE;
109 return FALSE;
112 static gboolean
113 my_isalnum (char c)
115 if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
116 return TRUE;
117 if (c >= '0' && c <= '9')
118 return TRUE;
120 return FALSE;
123 static gboolean
124 my_isalpha (char c)
126 if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
127 return TRUE;
128 return FALSE;
131 static gboolean
132 my_isnamestartchar (char c)
134 /* NameStartChar from https://www.w3.org/TR/xml/#sec-common-syn excluding non-ASCII and ':' */
135 if (my_isalpha (c) || c == '_')
136 return TRUE;
137 return FALSE;
140 static gboolean
141 my_isnamechar (char c)
143 /* NameChar from https://www.w3.org/TR/xml/#sec-common-syn excluding non-ASCII and ':' */
144 if (my_isalnum (c) || c == '_' || c == '-' || c == '.')
145 return TRUE;
146 return FALSE;
149 static const char *
150 skip_space (const char *p, const char *end)
152 for (; p < end && my_isspace (*p); p++)
154 return p;
157 static const char *
158 parse_value (const char *p, const char *end, char **value, GError **gerror)
160 const char *start;
161 int l;
163 if (*p != '"'){
164 set_error ("%s", "Expected the attribute value to start with a quote");
165 return end;
167 start = ++p;
168 for (; p < end && *p != '"'; p++)
170 if (p == end)
171 return end;
172 l = (int)(p - start);
173 p++;
174 *value = g_malloc (l + 1);
175 if (*value == NULL)
176 return end;
177 strncpy (*value, start, l);
178 (*value) [l] = 0;
179 return p;
182 static const char *
183 parse_name (const char *p, const char *end, char **value)
185 const char *start = p;
186 int l;
188 if (p < end && my_isnamestartchar (*p))
189 for (; p < end && my_isnamechar (*p); p++)
191 if (p == end)
192 return end;
194 l = (int)(p - start);
195 *value = g_malloc (l + 1);
196 if (*value == NULL)
197 return end;
198 strncpy (*value, start, l);
199 (*value) [l] = 0;
200 return p;
203 static const char *
204 parse_attributes (const char *p, const char *end, char ***names, char ***values, GError **gerror, int *full_stop, int state)
206 int nnames = 0;
208 while (TRUE){
209 p = skip_space (p, end);
210 if (p == end)
211 return end;
213 if (*p == '>'){
214 *full_stop = 0;
215 return p;
217 if (state == SKIP_XML_DECLARATION && *p == '?' && ((p+1) < end) && *(p+1) == '>'){
218 *full_stop = 0;
219 return p+1;
222 if (*p == '/' && ((p+1) < end && *(p+1) == '>')){
223 *full_stop = 1;
224 return p+1;
225 } else {
226 char *name, *value;
228 p = parse_name (p, end, &name);
229 if (p == end)
230 return p;
232 p = skip_space (p, end);
233 if (p == end){
234 g_free (name);
235 return p;
237 if (*p != '='){
238 set_error ("Expected an = after the attribute name `%s'", name);
239 g_free (name);
240 return end;
242 p++;
243 p = skip_space (p, end);
244 if (p == end){
245 g_free (name);
246 return end;
249 p = parse_value (p, end, &value, gerror);
250 if (p == end){
251 g_free (name);
252 return p;
255 ++nnames;
256 *names = g_realloc (*names, sizeof (char **) * (nnames+1));
257 *values = g_realloc (*values, sizeof (char **) * (nnames+1));
258 (*names) [nnames-1] = name;
259 (*values) [nnames-1] = value;
260 (*names) [nnames] = NULL;
261 (*values) [nnames] = NULL;
266 static void
267 destroy_parse_state (GMarkupParseContext *context)
269 GSList *p;
271 for (p = context->level; p != NULL; p = p->next)
272 g_free (p->data);
274 g_slist_free (context->level);
275 if (context->text != NULL)
276 g_string_free (context->text, TRUE);
277 context->text = NULL;
278 context->level = NULL;
281 gboolean
282 g_markup_parse_context_parse (GMarkupParseContext *context,
283 const gchar *text, gssize text_len,
284 GError **gerror)
286 const char *p, *end;
288 g_return_val_if_fail (context != NULL, FALSE);
289 g_return_val_if_fail (text != NULL, FALSE);
290 g_return_val_if_fail (text_len >= 0, FALSE);
292 end = text + text_len;
294 for (p = text; p < end; p++){
295 char c = *p;
297 switch (context->state){
298 case START:
299 if (c == ' ' || c == '\t' || c == '\f' || c == '\n' || (c & 0x80))
300 continue;
301 if (c == '<'){
302 if (p+1 < end && p [1] == '?'){
303 context->state = SKIP_XML_DECLARATION;
304 p++;
305 } else
306 context->state = START_ELEMENT;
307 continue;
309 set_error ("%s", "Expected < to start the document");
310 goto fail;
312 case SKIP_XML_DECLARATION:
313 case START_ELEMENT: {
314 const char *element_start = p, *element_end;
315 char *ename = NULL;
316 int full_stop = 0, l;
317 gchar **names = NULL, **values = NULL;
319 for (; p < end && my_isspace (*p); p++)
321 if (p == end){
322 set_error ("%s", "Unfinished element");
323 goto fail;
326 if (*p == '!' && (p+2 < end) && (p [1] == '-') && (p [2] == '-')){
327 context->state = COMMENT;
328 p += 2;
329 break;
332 if (!my_isnamestartchar (*p)){
333 set_error ("%s", "Expected an element name");
334 goto fail;
337 for (++p; p < end && my_isnamechar (*p); p++)
339 if (p == end){
340 set_error ("%s", "Expected an element");
341 goto fail;
343 element_end = p;
345 for (; p < end && my_isspace (*p); p++)
347 if (p == end){
348 set_error ("%s", "Unfinished element");
349 goto fail;
351 p = parse_attributes (p, end, &names, &values, gerror, &full_stop, context->state);
352 if (p == end){
353 if (names != NULL) {
354 g_strfreev (names);
355 g_strfreev (values);
357 /* Only set the error if parse_attributes did not */
358 if (gerror != NULL && *gerror == NULL)
359 set_error ("%s", "Unfinished sequence");
360 goto fail;
362 l = (int)(element_end - element_start);
363 ename = g_malloc (l + 1);
364 if (ename == NULL)
365 goto fail;
366 strncpy (ename, element_start, l);
367 ename [l] = 0;
369 if (context->state == START_ELEMENT)
370 if (context->parser.start_element != NULL)
371 context->parser.start_element (context, ename,
372 (const gchar **) names,
373 (const gchar **) values,
374 context->user_data, gerror);
376 if (names != NULL){
377 g_strfreev (names);
378 g_strfreev (values);
381 if (gerror != NULL && *gerror != NULL){
382 g_free (ename);
383 goto fail;
386 if (full_stop){
387 if (context->parser.end_element != NULL && context->state == START_ELEMENT){
388 context->parser.end_element (context, ename, context->user_data, gerror);
389 if (gerror != NULL && *gerror != NULL){
390 g_free (ename);
391 goto fail;
394 g_free (ename);
395 } else {
396 context->level = g_slist_prepend (context->level, ename);
399 context->state = TEXT;
400 break;
401 } /* case START_ELEMENT */
403 case TEXT: {
404 if (c == '<'){
405 context->state = FLUSH_TEXT;
406 break;
408 if (context->parser.text != NULL){
409 if (context->text == NULL)
410 context->text = g_string_new ("");
411 g_string_append_c (context->text, c);
413 break;
416 case COMMENT:
417 if (*p != '-')
418 break;
419 if (p+2 < end && (p [1] == '-') && (p [2] == '>')){
420 context->state = TEXT;
421 p += 2;
422 break;
424 break;
426 case FLUSH_TEXT:
427 if (context->parser.text != NULL && context->text != NULL){
428 context->parser.text (context, context->text->str, context->text->len,
429 context->user_data, gerror);
430 if (gerror != NULL && *gerror != NULL)
431 goto fail;
434 if (c == '/')
435 context->state = CLOSING_ELEMENT;
436 else {
437 p--;
438 context->state = START_ELEMENT;
440 break;
442 case CLOSING_ELEMENT: {
443 GSList *current = context->level;
444 char *text;
446 if (context->level == NULL){
447 set_error ("%s", "Too many closing tags, not enough open tags");
448 goto fail;
451 text = (char*)current->data;
452 if (context->parser.end_element != NULL){
453 context->parser.end_element (context, text, context->user_data, gerror);
454 if (gerror != NULL && *gerror != NULL){
455 g_free (text);
456 goto fail;
459 g_free (text);
461 while (p < end && *p != '>')
462 p++;
464 context->level = context->level->next;
465 g_slist_free_1 (current);
466 context->state = TEXT;
467 break;
468 } /* case CLOSING_ELEMENT */
470 } /* switch */
474 return TRUE;
475 fail:
476 if (context->parser.error && gerror != NULL && *gerror)
477 context->parser.error (context, *gerror, context->user_data);
479 destroy_parse_state (context);
480 return FALSE;
483 gboolean
484 g_markup_parse_context_end_parse (GMarkupParseContext *context, GError **gerror)
486 g_return_val_if_fail (context != NULL, FALSE);
489 * In our case, we always signal errors during parse, not at the end
490 * see the notes at the top of this file for details on how this
491 * could be moved here
493 return TRUE;