2 * gmakrup.c: Minimal XML markup reader.
4 * Unlike the GLib one, this can not be restarted with more text
5 * as the Mono use does not require it.
7 * Actually, with further thought, I think that this could be made
8 * to restart very easily. The pos == end condition would mean
9 * "return to caller" and only at end parse this would be a fatal
12 * Not that it matters to Mono, but it is very simple to change, there
13 * is a tricky situation: there are a few places where we check p+n
14 * in the source, and that would have to change to be progressive, instead
15 * of depending on the string to be complete at that point, so we would
16 * have to introduce extra states to cope with that.
19 * Miguel de Icaza (miguel@novell.com)
21 * (C) 2006 Novell, Inc.
23 * Permission is hereby granted, free of charge, to any person obtaining
24 * a copy of this software and associated documentation files (the
25 * "Software"), to deal in the Software without restriction, including
26 * without limitation the rights to use, copy, modify, merge, publish,
27 * distribute, sublicense, and/or sell copies of the Software, and to
28 * permit persons to whom the Software is furnished to do so, subject to
29 * the following conditions:
31 * The above copyright notice and this permission notice shall be
32 * included in all copies or substantial portions of the Software.
34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
38 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
39 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
40 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
47 #define set_error(msg, ...) do { if (gerror != NULL) *gerror = g_error_new (GINT_TO_POINTER (1), 1, msg, __VA_ARGS__); } while (0);
57 } MonoGMarkupParseState
;
59 struct _GMarkupParseContext
{
62 GDestroyNotify user_data_dnotify
;
63 MonoGMarkupParseState state
;
65 /* Stores the name of the current element, so we can issue the end_element */
72 g_markup_parse_context_new (const GMarkupParser
*parser
,
73 GMarkupParseFlags flags
,
75 GDestroyNotify user_data_dnotify
)
77 GMarkupParseContext
*context
= g_new0 (GMarkupParseContext
, 1);
79 context
->parser
= *parser
;
80 context
->user_data
= user_data
;
81 context
->user_data_dnotify
= user_data_dnotify
;
87 g_markup_parse_context_free (GMarkupParseContext
*context
)
91 g_return_if_fail (context
!= NULL
);
93 if (context
->user_data_dnotify
!= NULL
)
94 (context
->user_data_dnotify
) (context
->user_data
);
96 if (context
->text
!= NULL
)
97 g_string_free (context
->text
, TRUE
);
98 for (l
= context
->level
; l
; l
= l
->next
)
100 g_slist_free (context
->level
);
107 if (c
== ' ' || c
== '\t' || c
== '\r' || c
== '\n' || c
== '\v')
115 if ((c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z'))
117 if (c
>= '0' && c
<= '9')
126 if ((c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z'))
132 my_isnamestartchar (char c
)
134 /* NameStartChar from https://www.w3.org/TR/xml/#sec-common-syn excluding non-ASCII and ':' */
135 if (my_isalpha (c
) || c
== '_')
141 my_isnamechar (char c
)
143 /* NameChar from https://www.w3.org/TR/xml/#sec-common-syn excluding non-ASCII and ':' */
144 if (my_isalnum (c
) || c
== '_' || c
== '-' || c
== '.')
150 skip_space (const char *p
, const char *end
)
152 for (; p
< end
&& my_isspace (*p
); p
++)
158 parse_value (const char *p
, const char *end
, char **value
, GError
**gerror
)
164 set_error ("%s", "Expected the attribute value to start with a quote");
168 for (; p
< end
&& *p
!= '"'; p
++)
172 l
= (int)(p
- start
);
174 *value
= g_malloc (l
+ 1);
177 strncpy (*value
, start
, l
);
183 parse_name (const char *p
, const char *end
, char **value
)
185 const char *start
= p
;
188 if (p
< end
&& my_isnamestartchar (*p
))
189 for (; p
< end
&& my_isnamechar (*p
); p
++)
194 l
= (int)(p
- start
);
195 *value
= g_malloc (l
+ 1);
198 strncpy (*value
, start
, l
);
204 parse_attributes (const char *p
, const char *end
, char ***names
, char ***values
, GError
**gerror
, int *full_stop
, int state
)
209 p
= skip_space (p
, end
);
217 if (state
== SKIP_XML_DECLARATION
&& *p
== '?' && ((p
+1) < end
) && *(p
+1) == '>'){
222 if (*p
== '/' && ((p
+1) < end
&& *(p
+1) == '>')){
228 p
= parse_name (p
, end
, &name
);
232 p
= skip_space (p
, end
);
238 set_error ("Expected an = after the attribute name `%s'", name
);
243 p
= skip_space (p
, end
);
249 p
= parse_value (p
, end
, &value
, gerror
);
256 *names
= g_realloc (*names
, sizeof (char **) * (nnames
+1));
257 *values
= g_realloc (*values
, sizeof (char **) * (nnames
+1));
258 (*names
) [nnames
-1] = name
;
259 (*values
) [nnames
-1] = value
;
260 (*names
) [nnames
] = NULL
;
261 (*values
) [nnames
] = NULL
;
267 destroy_parse_state (GMarkupParseContext
*context
)
271 for (p
= context
->level
; p
!= NULL
; p
= p
->next
)
274 g_slist_free (context
->level
);
275 if (context
->text
!= NULL
)
276 g_string_free (context
->text
, TRUE
);
277 context
->text
= NULL
;
278 context
->level
= NULL
;
282 g_markup_parse_context_parse (GMarkupParseContext
*context
,
283 const gchar
*text
, gssize text_len
,
288 g_return_val_if_fail (context
!= NULL
, FALSE
);
289 g_return_val_if_fail (text
!= NULL
, FALSE
);
290 g_return_val_if_fail (text_len
>= 0, FALSE
);
292 end
= text
+ text_len
;
294 for (p
= text
; p
< end
; p
++){
297 switch (context
->state
){
299 if (c
== ' ' || c
== '\t' || c
== '\f' || c
== '\n' || (c
& 0x80))
302 if (p
+1 < end
&& p
[1] == '?'){
303 context
->state
= SKIP_XML_DECLARATION
;
306 context
->state
= START_ELEMENT
;
309 set_error ("%s", "Expected < to start the document");
312 case SKIP_XML_DECLARATION
:
313 case START_ELEMENT
: {
314 const char *element_start
= p
, *element_end
;
316 int full_stop
= 0, l
;
317 gchar
**names
= NULL
, **values
= NULL
;
319 for (; p
< end
&& my_isspace (*p
); p
++)
322 set_error ("%s", "Unfinished element");
326 if (*p
== '!' && (p
+2 < end
) && (p
[1] == '-') && (p
[2] == '-')){
327 context
->state
= COMMENT
;
332 if (!my_isnamestartchar (*p
)){
333 set_error ("%s", "Expected an element name");
337 for (++p
; p
< end
&& my_isnamechar (*p
); p
++)
340 set_error ("%s", "Expected an element");
345 for (; p
< end
&& my_isspace (*p
); p
++)
348 set_error ("%s", "Unfinished element");
351 p
= parse_attributes (p
, end
, &names
, &values
, gerror
, &full_stop
, context
->state
);
357 /* Only set the error if parse_attributes did not */
358 if (gerror
!= NULL
&& *gerror
== NULL
)
359 set_error ("%s", "Unfinished sequence");
362 l
= (int)(element_end
- element_start
);
363 ename
= g_malloc (l
+ 1);
366 strncpy (ename
, element_start
, l
);
369 if (context
->state
== START_ELEMENT
)
370 if (context
->parser
.start_element
!= NULL
)
371 context
->parser
.start_element (context
, ename
,
372 (const gchar
**) names
,
373 (const gchar
**) values
,
374 context
->user_data
, gerror
);
381 if (gerror
!= NULL
&& *gerror
!= NULL
){
387 if (context
->parser
.end_element
!= NULL
&& context
->state
== START_ELEMENT
){
388 context
->parser
.end_element (context
, ename
, context
->user_data
, gerror
);
389 if (gerror
!= NULL
&& *gerror
!= NULL
){
396 context
->level
= g_slist_prepend (context
->level
, ename
);
399 context
->state
= TEXT
;
401 } /* case START_ELEMENT */
405 context
->state
= FLUSH_TEXT
;
408 if (context
->parser
.text
!= NULL
){
409 if (context
->text
== NULL
)
410 context
->text
= g_string_new ("");
411 g_string_append_c (context
->text
, c
);
419 if (p
+2 < end
&& (p
[1] == '-') && (p
[2] == '>')){
420 context
->state
= TEXT
;
427 if (context
->parser
.text
!= NULL
&& context
->text
!= NULL
){
428 context
->parser
.text (context
, context
->text
->str
, context
->text
->len
,
429 context
->user_data
, gerror
);
430 if (gerror
!= NULL
&& *gerror
!= NULL
)
435 context
->state
= CLOSING_ELEMENT
;
438 context
->state
= START_ELEMENT
;
442 case CLOSING_ELEMENT
: {
443 GSList
*current
= context
->level
;
446 if (context
->level
== NULL
){
447 set_error ("%s", "Too many closing tags, not enough open tags");
451 text
= (char*)current
->data
;
452 if (context
->parser
.end_element
!= NULL
){
453 context
->parser
.end_element (context
, text
, context
->user_data
, gerror
);
454 if (gerror
!= NULL
&& *gerror
!= NULL
){
461 while (p
< end
&& *p
!= '>')
464 context
->level
= context
->level
->next
;
465 g_slist_free_1 (current
);
466 context
->state
= TEXT
;
468 } /* case CLOSING_ELEMENT */
476 if (context
->parser
.error
&& gerror
!= NULL
&& *gerror
)
477 context
->parser
.error (context
, *gerror
, context
->user_data
);
479 destroy_parse_state (context
);
484 g_markup_parse_context_end_parse (GMarkupParseContext
*context
, GError
**gerror
)
486 g_return_val_if_fail (context
!= NULL
, FALSE
);
489 * In our case, we always signal errors during parse, not at the end
490 * see the notes at the top of this file for details on how this
491 * could be moved here