1 /* Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 #include "my_global.h"
22 #define MY_XML_UNKNOWN 'U'
23 #define MY_XML_EOF 'E'
24 #define MY_XML_STRING 'S'
25 #define MY_XML_IDENT 'I'
29 #define MY_XML_SLASH '/'
30 #define MY_XML_COMMENT 'C'
31 #define MY_XML_TEXT 'T'
32 #define MY_XML_QUESTION '?'
33 #define MY_XML_EXCLAM '!'
34 #define MY_XML_CDATA 'D'
36 typedef struct xml_attr_st
46 #define MY_XML_ID0 0x01 /* Identifier initial character */
47 #define MY_XML_ID1 0x02 /* Identifier medial character */
48 #define MY_XML_SPC 0x08 /* Spacing character */
52 http://www.w3.org/TR/REC-xml/
53 [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
54 CombiningChar | Extender
55 [5] Name ::= (Letter | '_' | ':') (NameChar)*
58 static char my_xml_ctype
[256]=
60 /*00*/ 0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0,
61 /*10*/ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
62 /*20*/ 8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0, /* !"#$%&'()*+,-./ */
63 /*30*/ 2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0, /* 0123456789:;<=>? */
64 /*40*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* @ABCDEFGHIJKLMNO */
65 /*50*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3, /* PQRSTUVWXYZ[\]^_ */
66 /*60*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* `abcdefghijklmno */
67 /*70*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, /* pqrstuvwxyz{|}~ */
68 /*80*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
69 /*90*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
70 /*A0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
71 /*B0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
72 /*C0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
73 /*D0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
74 /*E0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
75 /*F0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
78 #define my_xml_is_space(c) (my_xml_ctype[(uchar) (c)] & MY_XML_SPC)
79 #define my_xml_is_id0(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID0)
80 #define my_xml_is_id1(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID1)
83 static const char *lex2str(int lex
)
87 case MY_XML_EOF
: return "END-OF-INPUT";
88 case MY_XML_STRING
: return "STRING";
89 case MY_XML_IDENT
: return "IDENT";
90 case MY_XML_CDATA
: return "CDATA";
91 case MY_XML_EQ
: return "'='";
92 case MY_XML_LT
: return "'<'";
93 case MY_XML_GT
: return "'>'";
94 case MY_XML_SLASH
: return "'/'";
95 case MY_XML_COMMENT
: return "COMMENT";
96 case MY_XML_TEXT
: return "TEXT";
97 case MY_XML_QUESTION
: return "'?'";
98 case MY_XML_EXCLAM
: return "'!'";
100 return "unknown token";
103 static void my_xml_norm_text(MY_XML_ATTR
*a
)
105 for ( ; (a
->beg
< a
->end
) && my_xml_is_space(a
->beg
[0]) ; a
->beg
++ );
106 for ( ; (a
->beg
< a
->end
) && my_xml_is_space(a
->end
[-1]) ; a
->end
-- );
110 static inline my_bool
111 my_xml_parser_prefix_cmp(MY_XML_PARSER
*p
, const char *s
, size_t slen
)
113 return (p
->cur
+ slen
> p
->end
) || memcmp(p
->cur
, s
, slen
);
117 static int my_xml_scan(MY_XML_PARSER
*p
,MY_XML_ATTR
*a
)
121 for (; ( p
->cur
< p
->end
) && my_xml_is_space(p
->cur
[0]) ; p
->cur
++);
123 if (p
->cur
>= p
->end
)
134 if (!my_xml_parser_prefix_cmp(p
, C_STRING_WITH_LEN("<!--")))
136 for (; p
->cur
< p
->end
; p
->cur
++)
138 if (!my_xml_parser_prefix_cmp(p
, C_STRING_WITH_LEN("-->")))
147 else if (!my_xml_parser_prefix_cmp(p
, C_STRING_WITH_LEN("<![CDATA[")))
150 for (; p
->cur
< p
->end
- 2 ; p
->cur
++)
152 if (p
->cur
[0] == ']' && p
->cur
[1] == ']' && p
->cur
[2] == '>')
161 else if (strchr("?=/<>!",p
->cur
[0]))
167 else if ( (p
->cur
[0] == '"') || (p
->cur
[0] == '\'') )
170 "string" or 'string' found.
171 Scan until the closing quote/doublequote, or until the END-OF-INPUT.
174 for (; ( p
->cur
< p
->end
) && (p
->cur
[0] != a
->beg
[0]); p
->cur
++)
177 if (p
->cur
< p
->end
) /* Closing quote or doublequote has been found */
180 if (!(p
->flags
& MY_XML_FLAG_SKIP_TEXT_NORMALIZATION
))
184 else if (my_xml_is_id0(p
->cur
[0]))
187 while (p
->cur
< p
->end
&& my_xml_is_id1(p
->cur
[0]))
197 printf("LEX=%s[%d]\n",lex2str(lex
),a
->end
-a
->beg
);
205 static int my_xml_value(MY_XML_PARSER
*st
, const char *str
, size_t len
)
207 return (st
->value
) ? (st
->value
)(st
,str
,len
) : MY_XML_OK
;
211 static int my_xml_enter(MY_XML_PARSER
*st
, const char *str
, size_t len
)
213 if ((size_t) (st
->attrend
-st
->attr
+len
+1) > sizeof(st
->attr
))
215 sprintf(st
->errstr
,"To deep XML");
218 if (st
->attrend
> st
->attr
)
223 memcpy(st
->attrend
,str
,len
);
226 if (st
->flags
& MY_XML_FLAG_RELATIVE_NAMES
)
227 return st
->enter
? st
->enter(st
, str
, len
) : MY_XML_OK
;
229 return st
->enter
? st
->enter(st
,st
->attr
,st
->attrend
-st
->attr
) : MY_XML_OK
;
233 static void mstr(char *s
,const char *src
,size_t l1
, size_t l2
)
235 l1
= l1
<l2
? l1
: l2
;
241 static int my_xml_leave(MY_XML_PARSER
*p
, const char *str
, size_t slen
)
249 /* Find previous '/' or beginning */
250 for (e
=p
->attrend
; (e
>p
->attr
) && (e
[0] != '/') ; e
--);
251 glen
= (size_t) ((e
[0] == '/') ? (p
->attrend
-e
-1) : p
->attrend
-e
);
253 if (str
&& (slen
!= glen
))
255 mstr(s
,str
,sizeof(s
)-1,slen
);
258 mstr(g
,e
+1,sizeof(g
)-1,glen
),
259 sprintf(p
->errstr
,"'</%s>' unexpected ('</%s>' wanted)",s
,g
);
262 sprintf(p
->errstr
,"'</%s>' unexpected (END-OF-INPUT wanted)", s
);
266 if (p
->flags
& MY_XML_FLAG_RELATIVE_NAMES
)
267 rc
= p
->leave_xml
? p
->leave_xml(p
, str
, slen
) : MY_XML_OK
;
269 rc
= (p
->leave_xml
? p
->leave_xml(p
,p
->attr
,p
->attrend
-p
->attr
) :
279 int my_xml_parse(MY_XML_PARSER
*p
,const char *str
, size_t len
)
286 while ( p
->cur
< p
->end
)
289 if (p
->cur
[0] == '<')
295 lex
=my_xml_scan(p
,&a
);
297 if (MY_XML_COMMENT
== lex
)
300 if (lex
== MY_XML_CDATA
)
304 my_xml_value(p
, a
.beg
, (size_t) (a
.end
-a
.beg
));
308 lex
=my_xml_scan(p
,&a
);
310 if (MY_XML_SLASH
== lex
)
312 if (MY_XML_IDENT
!= (lex
=my_xml_scan(p
,&a
)))
314 sprintf(p
->errstr
,"%s unexpected (ident wanted)",lex2str(lex
));
317 if (MY_XML_OK
!= my_xml_leave(p
,a
.beg
,(size_t) (a
.end
-a
.beg
)))
319 lex
=my_xml_scan(p
,&a
);
323 if (MY_XML_EXCLAM
== lex
)
325 lex
=my_xml_scan(p
,&a
);
328 else if (MY_XML_QUESTION
== lex
)
330 lex
=my_xml_scan(p
,&a
);
334 if (MY_XML_IDENT
== lex
)
336 p
->current_node_type
= MY_XML_NODE_TAG
;
337 if (MY_XML_OK
!= my_xml_enter(p
,a
.beg
,(size_t) (a
.end
-a
.beg
)))
342 sprintf(p
->errstr
,"%s unexpected (ident or '/' wanted)",
347 while ((MY_XML_IDENT
== (lex
=my_xml_scan(p
,&a
))) ||
348 ((MY_XML_STRING
== lex
&& exclam
)))
351 if (MY_XML_EQ
== (lex
=my_xml_scan(p
,&b
)))
353 lex
=my_xml_scan(p
,&b
);
354 if ( (lex
== MY_XML_IDENT
) || (lex
== MY_XML_STRING
) )
356 p
->current_node_type
= MY_XML_NODE_ATTR
;
357 if ((MY_XML_OK
!= my_xml_enter(p
,a
.beg
,(size_t) (a
.end
-a
.beg
))) ||
358 (MY_XML_OK
!= my_xml_value(p
,b
.beg
,(size_t) (b
.end
-b
.beg
))) ||
359 (MY_XML_OK
!= my_xml_leave(p
,a
.beg
,(size_t) (a
.end
-a
.beg
))))
364 sprintf(p
->errstr
,"%s unexpected (ident or string wanted)",
369 else if (MY_XML_IDENT
== lex
)
371 p
->current_node_type
= MY_XML_NODE_ATTR
;
372 if ((MY_XML_OK
!= my_xml_enter(p
,a
.beg
,(size_t) (a
.end
-a
.beg
))) ||
373 (MY_XML_OK
!= my_xml_leave(p
,a
.beg
,(size_t) (a
.end
-a
.beg
))))
376 else if ((MY_XML_STRING
== lex
) && exclam
)
379 We are in <!DOCTYPE>, e.g.
380 <!DOCTYPE name SYSTEM "SystemLiteral">
381 <!DOCTYPE name PUBLIC "PublidLiteral" "SystemLiteral">
382 Just skip "SystemLiteral" and "PublicidLiteral"
389 if (lex
== MY_XML_SLASH
)
391 if (MY_XML_OK
!= my_xml_leave(p
,NULL
,0))
393 lex
=my_xml_scan(p
,&a
);
399 if (lex
!= MY_XML_QUESTION
)
401 sprintf(p
->errstr
,"%s unexpected ('?' wanted)",lex2str(lex
));
404 if (MY_XML_OK
!= my_xml_leave(p
,NULL
,0))
406 lex
=my_xml_scan(p
,&a
);
411 if (MY_XML_OK
!= my_xml_leave(p
,NULL
,0))
415 if (lex
!= MY_XML_GT
)
417 sprintf(p
->errstr
,"%s unexpected ('>' wanted)",lex2str(lex
));
424 for ( ; (p
->cur
< p
->end
) && (p
->cur
[0] != '<') ; p
->cur
++);
427 if (!(p
->flags
& MY_XML_FLAG_SKIP_TEXT_NORMALIZATION
))
428 my_xml_norm_text(&a
);
431 my_xml_value(p
,a
.beg
,(size_t) (a
.end
-a
.beg
));
438 sprintf(p
->errstr
,"unexpected END-OF-INPUT");
445 void my_xml_parser_create(MY_XML_PARSER
*p
)
447 bzero((void*)p
,sizeof(p
[0]));
451 void my_xml_parser_free(MY_XML_PARSER
*p
__attribute__((unused
)))
456 void my_xml_set_value_handler(MY_XML_PARSER
*p
,
457 int (*action
)(MY_XML_PARSER
*p
, const char *s
,
463 void my_xml_set_enter_handler(MY_XML_PARSER
*p
,
464 int (*action
)(MY_XML_PARSER
*p
, const char *s
,
471 void my_xml_set_leave_handler(MY_XML_PARSER
*p
,
472 int (*action
)(MY_XML_PARSER
*p
, const char *s
,
479 void my_xml_set_user_data(MY_XML_PARSER
*p
, void *user_data
)
481 p
->user_data
=user_data
;
485 const char *my_xml_error_string(MY_XML_PARSER
*p
)
491 size_t my_xml_error_pos(MY_XML_PARSER
*p
)
493 const char *beg
=p
->beg
;
495 for ( s
=p
->beg
; s
<p
->cur
; s
++)
500 return (size_t) (p
->cur
-beg
);
503 uint
my_xml_error_lineno(MY_XML_PARSER
*p
)
507 for (s
=p
->beg
; s
<p
->cur
; s
++)