2 * gmakrup.c: Minimal XML markup reader.
4 * Unlike the GLib one, this can not be restarted with more text
5 * as the Mono use does not require it.
7 * Actually, with further thought, I think that this could be made
8 * to restart very easily. The pos == end condition would mean
9 * "return to caller" and only at end parse this would be a fatal
12 * Not that it matters to Mono, but it is very simple to change, there
13 * is a tricky situation: there are a few places where we check p+n
14 * in the source, and that would have to change to be progressive, instead
15 * of depending on the string to be complete at that point, so we would
16 * have to introduce extra states to cope with that.
19 * Miguel de Icaza (miguel@novell.com)
21 * (C) 2006 Novell, Inc.
23 * Permission is hereby granted, free of charge, to any person obtaining
24 * a copy of this software and associated documentation files (the
25 * "Software"), to deal in the Software without restriction, including
26 * without limitation the rights to use, copy, modify, merge, publish,
27 * distribute, sublicense, and/or sell copies of the Software, and to
28 * permit persons to whom the Software is furnished to do so, subject to
29 * the following conditions:
31 * The above copyright notice and this permission notice shall be
32 * included in all copies or substantial portions of the Software.
34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
38 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
39 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
40 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
45 #define set_error(msg, ...) do { if (error != NULL) *error = g_error_new (GINT_TO_POINTER (1), 1, msg, __VA_ARGS__); } while (0);
57 struct _GMarkupParseContext
{
60 GDestroyNotify user_data_dnotify
;
63 /* Stores the name of the current element, so we can issue the end_element */
70 g_markup_parse_context_new (const GMarkupParser
*parser
,
71 GMarkupParseFlags flags
,
73 GDestroyNotify user_data_dnotify
)
75 GMarkupParseContext
*context
= g_new0 (GMarkupParseContext
, 1);
77 context
->parser
= *parser
;
78 context
->user_data
= user_data
;
79 context
->user_data_dnotify
= user_data_dnotify
;
85 g_markup_parse_context_free (GMarkupParseContext
*context
)
89 g_return_if_fail (context
!= NULL
);
91 if (context
->user_data_dnotify
!= NULL
)
92 (context
->user_data_dnotify
) (context
->user_data
);
94 if (context
->text
!= NULL
)
95 g_string_free (context
->text
, TRUE
);
96 for (l
= context
->level
; l
; l
= l
->next
)
98 g_slist_free (context
->level
);
103 skip_space (const char *p
, const char *end
)
105 for (; p
< end
&& isspace (*p
); p
++)
111 parse_value (const char *p
, const char *end
, char **value
, GError
**error
)
117 set_error ("%s", "Expected the attribute value to start with a quote");
121 for (; p
< end
&& *p
!= '"'; p
++)
125 l
= (int)(p
- start
);
127 *value
= malloc (l
+ 1);
130 strncpy (*value
, start
, l
);
136 parse_name (const char *p
, const char *end
, char **value
)
138 const char *start
= p
;
141 for (; p
< end
&& isalnum (*p
); p
++)
146 l
= (int)(p
- start
);
147 *value
= malloc (l
+ 1);
150 strncpy (*value
, start
, l
);
156 parse_attributes (const char *p
, const char *end
, char ***names
, char ***values
, GError
**error
, int *full_stop
, int state
)
161 p
= skip_space (p
, end
);
169 if (state
== SKIP_XML_DECLARATION
&& *p
== '?' && ((p
+1) < end
) && *(p
+1) == '>'){
174 if (*p
== '/' && ((p
+1) < end
&& *(p
+1) == '>')){
180 p
= parse_name (p
, end
, &name
);
184 p
= skip_space (p
, end
);
190 set_error ("Expected an = after the attribute name `%s'", name
);
195 p
= skip_space (p
, end
);
201 p
= parse_value (p
, end
, &value
, error
);
208 *names
= g_realloc (*names
, sizeof (char **) * (nnames
+1));
209 *values
= g_realloc (*values
, sizeof (char **) * (nnames
+1));
210 (*names
) [nnames
-1] = name
;
211 (*values
) [nnames
-1] = value
;
212 (*names
) [nnames
] = NULL
;
213 (*values
) [nnames
] = NULL
;
219 destroy_parse_state (GMarkupParseContext
*context
)
223 for (p
= context
->level
; p
!= NULL
; p
= p
->next
)
226 g_slist_free (context
->level
);
227 if (context
->text
!= NULL
)
228 g_string_free (context
->text
, TRUE
);
229 context
->text
= NULL
;
230 context
->level
= NULL
;
234 g_markup_parse_context_parse (GMarkupParseContext
*context
,
235 const gchar
*text
, gssize text_len
,
240 g_return_val_if_fail (context
!= NULL
, FALSE
);
241 g_return_val_if_fail (text
!= NULL
, FALSE
);
242 g_return_val_if_fail (text_len
>= 0, FALSE
);
244 end
= text
+ text_len
;
246 for (p
= text
; p
< end
; p
++){
249 switch (context
->state
){
251 if (c
== ' ' || c
== '\t' || c
== '\f' || c
== '\n')
254 if (p
+1 < end
&& p
[1] == '?'){
255 context
->state
= SKIP_XML_DECLARATION
;
258 context
->state
= START_ELEMENT
;
261 set_error ("%s", "Expected < to start the document");
264 case SKIP_XML_DECLARATION
:
265 case START_ELEMENT
: {
266 const char *element_start
= p
, *element_end
;
268 int full_stop
= 0, l
;
269 gchar
**names
= NULL
, **values
= NULL
;
271 for (; p
< end
&& isspace (*p
); p
++)
274 set_error ("%s", "Unfinished element");
278 if (*p
== '!' && (p
+2 < end
) && (p
[1] == '-') && (p
[2] == '-')){
279 context
->state
= COMMENT
;
284 if (!(isascii (*p
) && isalpha (*p
))){
285 set_error ("%s", "Expected an element name");
289 for (++p
; p
< end
&& (isalnum (*p
) || (*p
== '.')); p
++)
292 set_error ("%s", "Expected an element");
297 for (; p
< end
&& isspace (*p
); p
++)
300 set_error ("%s", "Unfinished element");
303 p
= parse_attributes (p
, end
, &names
, &values
, error
, &full_stop
, context
->state
);
309 /* Only set the error if parse_attributes did not */
310 if (error
!= NULL
&& *error
== NULL
)
311 set_error ("%s", "Unfinished sequence");
314 l
= (int)(element_end
- element_start
);
315 ename
= malloc (l
+ 1);
318 strncpy (ename
, element_start
, l
);
321 if (context
->state
== START_ELEMENT
)
322 if (context
->parser
.start_element
!= NULL
)
323 context
->parser
.start_element (context
, ename
,
324 (const gchar
**) names
,
325 (const gchar
**) values
,
326 context
->user_data
, error
);
333 if (error
!= NULL
&& *error
!= NULL
){
339 if (context
->parser
.end_element
!= NULL
&& context
->state
== START_ELEMENT
){
340 context
->parser
.end_element (context
, ename
, context
->user_data
, error
);
341 if (error
!= NULL
&& *error
!= NULL
){
348 context
->level
= g_slist_prepend (context
->level
, ename
);
351 context
->state
= TEXT
;
353 } /* case START_ELEMENT */
357 context
->state
= FLUSH_TEXT
;
360 if (context
->parser
.text
!= NULL
){
361 if (context
->text
== NULL
)
362 context
->text
= g_string_new ("");
363 g_string_append_c (context
->text
, c
);
371 if (p
+2 < end
&& (p
[1] == '-') && (p
[2] == '>')){
372 context
->state
= TEXT
;
379 if (context
->parser
.text
!= NULL
){
380 context
->parser
.text (context
, context
->text
->str
, context
->text
->len
,
381 context
->user_data
, error
);
382 if (error
!= NULL
&& *error
!= NULL
)
387 context
->state
= CLOSING_ELEMENT
;
390 context
->state
= START_ELEMENT
;
394 case CLOSING_ELEMENT
: {
395 GSList
*current
= context
->level
;
398 if (context
->level
== NULL
){
399 set_error ("%s", "Too many closing tags, not enough open tags");
403 text
= current
->data
;
404 if (context
->parser
.end_element
!= NULL
){
405 context
->parser
.end_element (context
, text
, context
->user_data
, error
);
406 if (error
!= NULL
&& *error
!= NULL
){
413 while (p
< end
&& *p
!= '>')
416 context
->level
= context
->level
->next
;
417 g_slist_free_1 (current
);
418 context
->state
= TEXT
;
420 } /* case CLOSING_ELEMENT */
428 if (context
->parser
.error
&& error
!= NULL
&& *error
)
429 context
->parser
.error (context
, *error
, context
->user_data
);
431 destroy_parse_state (context
);
436 g_markup_parse_context_end_parse (GMarkupParseContext
*context
, GError
**error
)
438 g_return_val_if_fail (context
!= NULL
, FALSE
);
441 * In our case, we always signal errors during parse, not at the end
442 * see the notes at the top of this file for details on how this
443 * could be moved here