mySQL 5.0.11 sources for tomato
[tomato.git] / release / src / router / mysql / strings / xml.c
blob3ad955bbabdbe619eaf1429ef6d1cf2b209fb085
1 /* Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 #include "my_global.h"
18 #include "m_string.h"
19 #include "my_xml.h"
22 #define MY_XML_UNKNOWN 'U'
23 #define MY_XML_EOF 'E'
24 #define MY_XML_STRING 'S'
25 #define MY_XML_IDENT 'I'
26 #define MY_XML_EQ '='
27 #define MY_XML_LT '<'
28 #define MY_XML_GT '>'
29 #define MY_XML_SLASH '/'
30 #define MY_XML_COMMENT 'C'
31 #define MY_XML_TEXT 'T'
32 #define MY_XML_QUESTION '?'
33 #define MY_XML_EXCLAM '!'
34 #define MY_XML_CDATA 'D'
36 typedef struct xml_attr_st
38 const char *beg;
39 const char *end;
40 } MY_XML_ATTR;
44 XML ctype:
46 #define MY_XML_ID0 0x01 /* Identifier initial character */
47 #define MY_XML_ID1 0x02 /* Identifier medial character */
48 #define MY_XML_SPC 0x08 /* Spacing character */
52 http://www.w3.org/TR/REC-xml/
53 [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
54 CombiningChar | Extender
55 [5] Name ::= (Letter | '_' | ':') (NameChar)*
58 static char my_xml_ctype[256]=
60 /*00*/ 0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0,
61 /*10*/ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
62 /*20*/ 8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0, /* !"#$%&'()*+,-./ */
63 /*30*/ 2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0, /* 0123456789:;<=>? */
64 /*40*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* @ABCDEFGHIJKLMNO */
65 /*50*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3, /* PQRSTUVWXYZ[\]^_ */
66 /*60*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* `abcdefghijklmno */
67 /*70*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, /* pqrstuvwxyz{|}~ */
68 /*80*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
69 /*90*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
70 /*A0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
71 /*B0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
72 /*C0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
73 /*D0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
74 /*E0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
75 /*F0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
78 #define my_xml_is_space(c) (my_xml_ctype[(uchar) (c)] & MY_XML_SPC)
79 #define my_xml_is_id0(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID0)
80 #define my_xml_is_id1(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID1)
83 static const char *lex2str(int lex)
85 switch(lex)
87 case MY_XML_EOF: return "END-OF-INPUT";
88 case MY_XML_STRING: return "STRING";
89 case MY_XML_IDENT: return "IDENT";
90 case MY_XML_CDATA: return "CDATA";
91 case MY_XML_EQ: return "'='";
92 case MY_XML_LT: return "'<'";
93 case MY_XML_GT: return "'>'";
94 case MY_XML_SLASH: return "'/'";
95 case MY_XML_COMMENT: return "COMMENT";
96 case MY_XML_TEXT: return "TEXT";
97 case MY_XML_QUESTION: return "'?'";
98 case MY_XML_EXCLAM: return "'!'";
100 return "unknown token";
103 static void my_xml_norm_text(MY_XML_ATTR *a)
105 for ( ; (a->beg < a->end) && my_xml_is_space(a->beg[0]) ; a->beg++ );
106 for ( ; (a->beg < a->end) && my_xml_is_space(a->end[-1]) ; a->end-- );
110 static inline my_bool
111 my_xml_parser_prefix_cmp(MY_XML_PARSER *p, const char *s, size_t slen)
113 return (p->cur + slen > p->end) || memcmp(p->cur, s, slen);
117 static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a)
119 int lex;
121 for (; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ; p->cur++);
123 if (p->cur >= p->end)
125 a->beg=p->end;
126 a->end=p->end;
127 lex=MY_XML_EOF;
128 goto ret;
131 a->beg=p->cur;
132 a->end=p->cur;
134 if (!my_xml_parser_prefix_cmp(p, C_STRING_WITH_LEN("<!--")))
136 for (; p->cur < p->end; p->cur++)
138 if (!my_xml_parser_prefix_cmp(p, C_STRING_WITH_LEN("-->")))
140 p->cur+= 3;
141 break;
144 a->end=p->cur;
145 lex=MY_XML_COMMENT;
147 else if (!my_xml_parser_prefix_cmp(p, C_STRING_WITH_LEN("<![CDATA[")))
149 p->cur+= 9;
150 for (; p->cur < p->end - 2 ; p->cur++)
152 if (p->cur[0] == ']' && p->cur[1] == ']' && p->cur[2] == '>')
154 p->cur+= 3;
155 a->end= p->cur;
156 break;
159 lex= MY_XML_CDATA;
161 else if (strchr("?=/<>!",p->cur[0]))
163 p->cur++;
164 a->end=p->cur;
165 lex=a->beg[0];
167 else if ( (p->cur[0] == '"') || (p->cur[0] == '\'') )
170 "string" or 'string' found.
171 Scan until the closing quote/doublequote, or until the END-OF-INPUT.
173 p->cur++;
174 for (; ( p->cur < p->end ) && (p->cur[0] != a->beg[0]); p->cur++)
176 a->end=p->cur;
177 if (p->cur < p->end) /* Closing quote or doublequote has been found */
178 p->cur++;
179 a->beg++;
180 if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION))
181 my_xml_norm_text(a);
182 lex=MY_XML_STRING;
184 else if (my_xml_is_id0(p->cur[0]))
186 p->cur++;
187 while (p->cur < p->end && my_xml_is_id1(p->cur[0]))
188 p->cur++;
189 a->end=p->cur;
190 my_xml_norm_text(a);
191 lex=MY_XML_IDENT;
193 else
194 lex= MY_XML_UNKNOWN;
196 #if 0
197 printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg);
198 #endif
200 ret:
201 return lex;
205 static int my_xml_value(MY_XML_PARSER *st, const char *str, size_t len)
207 return (st->value) ? (st->value)(st,str,len) : MY_XML_OK;
211 static int my_xml_enter(MY_XML_PARSER *st, const char *str, size_t len)
213 if ((size_t) (st->attrend-st->attr+len+1) > sizeof(st->attr))
215 sprintf(st->errstr,"To deep XML");
216 return MY_XML_ERROR;
218 if (st->attrend > st->attr)
220 st->attrend[0]= '/';
221 st->attrend++;
223 memcpy(st->attrend,str,len);
224 st->attrend+=len;
225 st->attrend[0]='\0';
226 if (st->flags & MY_XML_FLAG_RELATIVE_NAMES)
227 return st->enter ? st->enter(st, str, len) : MY_XML_OK;
228 else
229 return st->enter ? st->enter(st,st->attr,st->attrend-st->attr) : MY_XML_OK;
233 static void mstr(char *s,const char *src,size_t l1, size_t l2)
235 l1 = l1<l2 ? l1 : l2;
236 memcpy(s,src,l1);
237 s[l1]='\0';
241 static int my_xml_leave(MY_XML_PARSER *p, const char *str, size_t slen)
243 char *e;
244 size_t glen;
245 char s[32];
246 char g[32];
247 int rc;
249 /* Find previous '/' or beginning */
250 for (e=p->attrend; (e>p->attr) && (e[0] != '/') ; e--);
251 glen = (size_t) ((e[0] == '/') ? (p->attrend-e-1) : p->attrend-e);
253 if (str && (slen != glen))
255 mstr(s,str,sizeof(s)-1,slen);
256 if (glen)
258 mstr(g,e+1,sizeof(g)-1,glen),
259 sprintf(p->errstr,"'</%s>' unexpected ('</%s>' wanted)",s,g);
261 else
262 sprintf(p->errstr,"'</%s>' unexpected (END-OF-INPUT wanted)", s);
263 return MY_XML_ERROR;
266 if (p->flags & MY_XML_FLAG_RELATIVE_NAMES)
267 rc= p->leave_xml ? p->leave_xml(p, str, slen) : MY_XML_OK;
268 else
269 rc= (p->leave_xml ? p->leave_xml(p,p->attr,p->attrend-p->attr) :
270 MY_XML_OK);
272 *e='\0';
273 p->attrend=e;
275 return rc;
279 int my_xml_parse(MY_XML_PARSER *p,const char *str, size_t len)
281 p->attrend=p->attr;
282 p->beg=str;
283 p->cur=str;
284 p->end=str+len;
286 while ( p->cur < p->end )
288 MY_XML_ATTR a;
289 if (p->cur[0] == '<')
291 int lex;
292 int question=0;
293 int exclam=0;
295 lex=my_xml_scan(p,&a);
297 if (MY_XML_COMMENT == lex)
298 continue;
300 if (lex == MY_XML_CDATA)
302 a.beg+= 9;
303 a.end-= 3;
304 my_xml_value(p, a.beg, (size_t) (a.end-a.beg));
305 continue;
308 lex=my_xml_scan(p,&a);
310 if (MY_XML_SLASH == lex)
312 if (MY_XML_IDENT != (lex=my_xml_scan(p,&a)))
314 sprintf(p->errstr,"%s unexpected (ident wanted)",lex2str(lex));
315 return MY_XML_ERROR;
317 if (MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg)))
318 return MY_XML_ERROR;
319 lex=my_xml_scan(p,&a);
320 goto gt;
323 if (MY_XML_EXCLAM == lex)
325 lex=my_xml_scan(p,&a);
326 exclam=1;
328 else if (MY_XML_QUESTION == lex)
330 lex=my_xml_scan(p,&a);
331 question=1;
334 if (MY_XML_IDENT == lex)
336 p->current_node_type= MY_XML_NODE_TAG;
337 if (MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg)))
338 return MY_XML_ERROR;
340 else
342 sprintf(p->errstr,"%s unexpected (ident or '/' wanted)",
343 lex2str(lex));
344 return MY_XML_ERROR;
347 while ((MY_XML_IDENT == (lex=my_xml_scan(p,&a))) ||
348 ((MY_XML_STRING == lex && exclam)))
350 MY_XML_ATTR b;
351 if (MY_XML_EQ == (lex=my_xml_scan(p,&b)))
353 lex=my_xml_scan(p,&b);
354 if ( (lex == MY_XML_IDENT) || (lex == MY_XML_STRING) )
356 p->current_node_type= MY_XML_NODE_ATTR;
357 if ((MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg))) ||
358 (MY_XML_OK != my_xml_value(p,b.beg,(size_t) (b.end-b.beg))) ||
359 (MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg))))
360 return MY_XML_ERROR;
362 else
364 sprintf(p->errstr,"%s unexpected (ident or string wanted)",
365 lex2str(lex));
366 return MY_XML_ERROR;
369 else if (MY_XML_IDENT == lex)
371 p->current_node_type= MY_XML_NODE_ATTR;
372 if ((MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg))) ||
373 (MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg))))
374 return MY_XML_ERROR;
376 else if ((MY_XML_STRING == lex) && exclam)
379 We are in <!DOCTYPE>, e.g.
380 <!DOCTYPE name SYSTEM "SystemLiteral">
381 <!DOCTYPE name PUBLIC "PublidLiteral" "SystemLiteral">
382 Just skip "SystemLiteral" and "PublicidLiteral"
385 else
386 break;
389 if (lex == MY_XML_SLASH)
391 if (MY_XML_OK != my_xml_leave(p,NULL,0))
392 return MY_XML_ERROR;
393 lex=my_xml_scan(p,&a);
397 if (question)
399 if (lex != MY_XML_QUESTION)
401 sprintf(p->errstr,"%s unexpected ('?' wanted)",lex2str(lex));
402 return MY_XML_ERROR;
404 if (MY_XML_OK != my_xml_leave(p,NULL,0))
405 return MY_XML_ERROR;
406 lex=my_xml_scan(p,&a);
409 if (exclam)
411 if (MY_XML_OK != my_xml_leave(p,NULL,0))
412 return MY_XML_ERROR;
415 if (lex != MY_XML_GT)
417 sprintf(p->errstr,"%s unexpected ('>' wanted)",lex2str(lex));
418 return MY_XML_ERROR;
421 else
423 a.beg=p->cur;
424 for ( ; (p->cur < p->end) && (p->cur[0] != '<') ; p->cur++);
425 a.end=p->cur;
427 if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION))
428 my_xml_norm_text(&a);
429 if (a.beg != a.end)
431 my_xml_value(p,a.beg,(size_t) (a.end-a.beg));
436 if (p->attr[0])
438 sprintf(p->errstr,"unexpected END-OF-INPUT");
439 return MY_XML_ERROR;
441 return MY_XML_OK;
445 void my_xml_parser_create(MY_XML_PARSER *p)
447 bzero((void*)p,sizeof(p[0]));
451 void my_xml_parser_free(MY_XML_PARSER *p __attribute__((unused)))
456 void my_xml_set_value_handler(MY_XML_PARSER *p,
457 int (*action)(MY_XML_PARSER *p, const char *s,
458 size_t l))
460 p->value=action;
463 void my_xml_set_enter_handler(MY_XML_PARSER *p,
464 int (*action)(MY_XML_PARSER *p, const char *s,
465 size_t l))
467 p->enter=action;
471 void my_xml_set_leave_handler(MY_XML_PARSER *p,
472 int (*action)(MY_XML_PARSER *p, const char *s,
473 size_t l))
475 p->leave_xml=action;
479 void my_xml_set_user_data(MY_XML_PARSER *p, void *user_data)
481 p->user_data=user_data;
485 const char *my_xml_error_string(MY_XML_PARSER *p)
487 return p->errstr;
491 size_t my_xml_error_pos(MY_XML_PARSER *p)
493 const char *beg=p->beg;
494 const char *s;
495 for ( s=p->beg ; s<p->cur; s++)
497 if (s[0] == '\n')
498 beg=s;
500 return (size_t) (p->cur-beg);
503 uint my_xml_error_lineno(MY_XML_PARSER *p)
505 uint res=0;
506 const char *s;
507 for (s=p->beg ; s<p->cur; s++)
509 if (s[0] == '\n')
510 res++;
512 return res;