2 * gmakrup.c: Minimal XML markup reader.
4 * Unlike the GLib one, this can not be restarted with more text
5 * as the Mono use does not require it.
7 * Actually, with further thought, I think that this could be made
8 * to restart very easily. The pos == end condition would mean
9 * "return to caller" and only at end parse this would be a fatal
12 * Not that it matters to Mono, but it is very simple to change, there
13 * is a tricky situation: there are a few places where we check p+n
14 * in the source, and that would have to change to be progressive, instead
15 * of depending on the string to be complete at that point, so we would
16 * have to introduce extra states to cope with that.
19 * Miguel de Icaza (miguel@novell.com)
21 * (C) 2006 Novell, Inc.
23 * Permission is hereby granted, free of charge, to any person obtaining
24 * a copy of this software and associated documentation files (the
25 * "Software"), to deal in the Software without restriction, including
26 * without limitation the rights to use, copy, modify, merge, publish,
27 * distribute, sublicense, and/or sell copies of the Software, and to
28 * permit persons to whom the Software is furnished to do so, subject to
29 * the following conditions:
31 * The above copyright notice and this permission notice shall be
32 * included in all copies or substantial portions of the Software.
34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
38 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
39 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
40 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
46 #define set_error(msg, ...) do { if (error != NULL) *error = g_error_new (GINT_TO_POINTER (1), 1, msg, __VA_ARGS__); } while (0);
58 struct _GMarkupParseContext
{
61 GDestroyNotify user_data_dnotify
;
64 /* Stores the name of the current element, so we can issue the end_element */
71 g_markup_parse_context_new (const GMarkupParser
*parser
,
72 GMarkupParseFlags flags
,
74 GDestroyNotify user_data_dnotify
)
76 GMarkupParseContext
*context
= g_new0 (GMarkupParseContext
, 1);
78 context
->parser
= *parser
;
79 context
->user_data
= user_data
;
80 context
->user_data_dnotify
= user_data_dnotify
;
86 g_markup_parse_context_free (GMarkupParseContext
*context
)
90 g_return_if_fail (context
!= NULL
);
92 if (context
->user_data_dnotify
!= NULL
)
93 (context
->user_data_dnotify
) (context
->user_data
);
95 if (context
->text
!= NULL
)
96 g_string_free (context
->text
, TRUE
);
97 for (l
= context
->level
; l
; l
= l
->next
)
99 g_slist_free (context
->level
);
106 if (c
== ' ' || c
== '\t' || c
== '\r' || c
== '\n' || c
== '\v')
114 if ((c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z'))
116 if (c
>= '0' && c
<= '9')
125 if ((c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z'))
131 skip_space (const char *p
, const char *end
)
133 for (; p
< end
&& my_isspace (*p
); p
++)
139 parse_value (const char *p
, const char *end
, char **value
, GError
**error
)
145 set_error ("%s", "Expected the attribute value to start with a quote");
149 for (; p
< end
&& *p
!= '"'; p
++)
153 l
= (int)(p
- start
);
155 *value
= g_malloc (l
+ 1);
158 strncpy (*value
, start
, l
);
164 parse_name (const char *p
, const char *end
, char **value
)
166 const char *start
= p
;
169 for (; p
< end
&& my_isalnum (*p
); p
++)
174 l
= (int)(p
- start
);
175 *value
= g_malloc (l
+ 1);
178 strncpy (*value
, start
, l
);
184 parse_attributes (const char *p
, const char *end
, char ***names
, char ***values
, GError
**error
, int *full_stop
, int state
)
189 p
= skip_space (p
, end
);
197 if (state
== SKIP_XML_DECLARATION
&& *p
== '?' && ((p
+1) < end
) && *(p
+1) == '>'){
202 if (*p
== '/' && ((p
+1) < end
&& *(p
+1) == '>')){
208 p
= parse_name (p
, end
, &name
);
212 p
= skip_space (p
, end
);
218 set_error ("Expected an = after the attribute name `%s'", name
);
223 p
= skip_space (p
, end
);
229 p
= parse_value (p
, end
, &value
, error
);
236 *names
= g_realloc (*names
, sizeof (char **) * (nnames
+1));
237 *values
= g_realloc (*values
, sizeof (char **) * (nnames
+1));
238 (*names
) [nnames
-1] = name
;
239 (*values
) [nnames
-1] = value
;
240 (*names
) [nnames
] = NULL
;
241 (*values
) [nnames
] = NULL
;
247 destroy_parse_state (GMarkupParseContext
*context
)
251 for (p
= context
->level
; p
!= NULL
; p
= p
->next
)
254 g_slist_free (context
->level
);
255 if (context
->text
!= NULL
)
256 g_string_free (context
->text
, TRUE
);
257 context
->text
= NULL
;
258 context
->level
= NULL
;
262 g_markup_parse_context_parse (GMarkupParseContext
*context
,
263 const gchar
*text
, gssize text_len
,
268 g_return_val_if_fail (context
!= NULL
, FALSE
);
269 g_return_val_if_fail (text
!= NULL
, FALSE
);
270 g_return_val_if_fail (text_len
>= 0, FALSE
);
272 end
= text
+ text_len
;
274 for (p
= text
; p
< end
; p
++){
277 switch (context
->state
){
279 if (c
== ' ' || c
== '\t' || c
== '\f' || c
== '\n' || (c
& 0x80))
282 if (p
+1 < end
&& p
[1] == '?'){
283 context
->state
= SKIP_XML_DECLARATION
;
286 context
->state
= START_ELEMENT
;
289 set_error ("%s", "Expected < to start the document");
292 case SKIP_XML_DECLARATION
:
293 case START_ELEMENT
: {
294 const char *element_start
= p
, *element_end
;
296 int full_stop
= 0, l
;
297 gchar
**names
= NULL
, **values
= NULL
;
299 for (; p
< end
&& my_isspace (*p
); p
++)
302 set_error ("%s", "Unfinished element");
306 if (*p
== '!' && (p
+2 < end
) && (p
[1] == '-') && (p
[2] == '-')){
307 context
->state
= COMMENT
;
312 if (!my_isalpha (*p
)){
313 set_error ("%s", "Expected an element name");
317 for (++p
; p
< end
&& (my_isalnum (*p
) || (*p
== '.')); p
++)
320 set_error ("%s", "Expected an element");
325 for (; p
< end
&& my_isspace (*p
); p
++)
328 set_error ("%s", "Unfinished element");
331 p
= parse_attributes (p
, end
, &names
, &values
, error
, &full_stop
, context
->state
);
337 /* Only set the error if parse_attributes did not */
338 if (error
!= NULL
&& *error
== NULL
)
339 set_error ("%s", "Unfinished sequence");
342 l
= (int)(element_end
- element_start
);
343 ename
= g_malloc (l
+ 1);
346 strncpy (ename
, element_start
, l
);
349 if (context
->state
== START_ELEMENT
)
350 if (context
->parser
.start_element
!= NULL
)
351 context
->parser
.start_element (context
, ename
,
352 (const gchar
**) names
,
353 (const gchar
**) values
,
354 context
->user_data
, error
);
361 if (error
!= NULL
&& *error
!= NULL
){
367 if (context
->parser
.end_element
!= NULL
&& context
->state
== START_ELEMENT
){
368 context
->parser
.end_element (context
, ename
, context
->user_data
, error
);
369 if (error
!= NULL
&& *error
!= NULL
){
376 context
->level
= g_slist_prepend (context
->level
, ename
);
379 context
->state
= TEXT
;
381 } /* case START_ELEMENT */
385 context
->state
= FLUSH_TEXT
;
388 if (context
->parser
.text
!= NULL
){
389 if (context
->text
== NULL
)
390 context
->text
= g_string_new ("");
391 g_string_append_c (context
->text
, c
);
399 if (p
+2 < end
&& (p
[1] == '-') && (p
[2] == '>')){
400 context
->state
= TEXT
;
407 if (context
->parser
.text
!= NULL
&& context
->text
!= NULL
){
408 context
->parser
.text (context
, context
->text
->str
, context
->text
->len
,
409 context
->user_data
, error
);
410 if (error
!= NULL
&& *error
!= NULL
)
415 context
->state
= CLOSING_ELEMENT
;
418 context
->state
= START_ELEMENT
;
422 case CLOSING_ELEMENT
: {
423 GSList
*current
= context
->level
;
426 if (context
->level
== NULL
){
427 set_error ("%s", "Too many closing tags, not enough open tags");
431 text
= current
->data
;
432 if (context
->parser
.end_element
!= NULL
){
433 context
->parser
.end_element (context
, text
, context
->user_data
, error
);
434 if (error
!= NULL
&& *error
!= NULL
){
441 while (p
< end
&& *p
!= '>')
444 context
->level
= context
->level
->next
;
445 g_slist_free_1 (current
);
446 context
->state
= TEXT
;
448 } /* case CLOSING_ELEMENT */
456 if (context
->parser
.error
&& error
!= NULL
&& *error
)
457 context
->parser
.error (context
, *error
, context
->user_data
);
459 destroy_parse_state (context
);
464 g_markup_parse_context_end_parse (GMarkupParseContext
*context
, GError
**error
)
466 g_return_val_if_fail (context
!= NULL
, FALSE
);
469 * In our case, we always signal errors during parse, not at the end
470 * see the notes at the top of this file for details on how this
471 * could be moved here