moved kdeaccessibility kdeaddons kdeadmin kdeartwork kdebindings kdeedu kdegames...
[kdeedu.git] / kstars / kstars / indi / lilxml.c
blob5514dce99901483446eb48a2add874cd1d6be0f2
1 #if 0
2 liblilxml
3 Copyright (C) 2003 Elwood C. Downey
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 #endif
21 /* little DOM-style XML parser.
22 * only handles elements, attributes and pcdata content.
23 * <! ... > and <? ... > are silently ignored.
24 * pcdata is collected into one string, sans leading whitespace first line.
26 * #define MAIN_TST to create standalone test program
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <ctype.h>
34 #include "lilxml.h"
35 #include "indicom.h"
37 static int oneXMLchar (LilXML *lp, int c, char errmsg[]);
38 static void initParser(LilXML *lp);
39 static void pushXMLEle(LilXML *lp);
40 static void popXMLEle(LilXML *lp);
41 static void resetEndTag(LilXML *lp);
42 static void addAttr(LilXML *lp);
43 static void delAttr (XMLAtt *a);
44 static int isTokenChar (int start, int c);
45 static void growString (char **sp, int c);
46 static void growPCData (XMLEle *ep, int c);
47 static char *newString (void);
48 static void *moremem (void *old, int n);
49 static void freemem (void *m);
51 typedef enum {
52 LOOK4START = 0, /* looking for first element start */
53 LOOK4TAG, /* looking for element tag */
54 INTAG, /* reading tag */
55 LOOK4ATTRN, /* looking for attr name, > or / */
56 INATTRN, /* reading attr name */
57 LOOK4ATTRV, /* looking for attr value */
58 SAWSLASH, /* saw / in element opening */
59 INATTRV, /* in attr value */
60 LOOK4CON, /* skipping leading content whitespc */
61 INCON, /* reading content */
62 LTINCON, /* saw < in content */
63 LOOK4CLOSETAG, /* looking for closing tag after < */
64 INCLOSETAG /* reading closing tag */
65 } State; /* parsing states */
67 /* maintain state while parsing */
68 struct _LilXML {
69 State cs; /* current state */
70 int ln; /* line number for diags */
71 XMLEle *ce; /* current element being built */
72 char *endtag; /* to check for match with opening tag*/
73 int delim; /* attribute value delimiter */
74 int lastc; /* last char (just used wiht skipping)*/
75 int skipping; /* in comment or declaration */
78 /* internal representation of a (possibly nested) XML element */
79 struct _xml_ele {
80 char *tag; /* element tag */
81 struct _xml_ele *pe; /* parent element, or NULL if root */
82 XMLAtt **at; /* list of attributes */
83 int nat; /* number of attributes */
84 int ait; /* used to iterate over at[] */
85 struct _xml_ele **el; /* list of child elements */
86 int nel; /* number of child elements */
87 int eit; /* used to iterate over el[] */
88 char *pcdata; /* character data in this element */
89 int pcdatal; /* handy length sans \0 (tends to be big) */
92 /* internal representation of an attribute */
93 struct _xml_att {
94 char *name; /* name */
95 char *valu; /* value */
96 struct _xml_ele *ce; /* containing element */
99 /* pass back a fresh handle for use with our other functions */
100 LilXML *
101 newLilXML ()
103 LilXML *lp = (LilXML *) moremem (NULL, sizeof(LilXML));
104 initParser(lp);
105 return (lp);
108 /* discard */
109 void
110 delLilXML (LilXML *lp)
112 freemem (lp);
115 /* delete ep and all its children */
116 void
117 delXMLEle (XMLEle *ep)
119 int i;
121 /* benign if NULL */
122 if (!ep)
123 return;
125 /* delete all parts of ep */
126 freemem (ep->tag);
127 freemem (ep->pcdata);
128 if (ep->at) {
129 for (i = 0; i < ep->nat; i++)
130 delAttr (ep->at[i]);
131 freemem (ep->at);
133 if (ep->el) {
134 for (i = 0; i < ep->nel; i++)
135 delXMLEle (ep->el[i]);
136 freemem (ep->el);
139 /* delete ep itself */
140 freemem (ep);
143 /* process one more character of an XML file.
144 * when find closure with outter element return root of complete tree.
145 * when find error return NULL with reason in errmsg[].
146 * when need more return NULL with errmsg[0] = '\0'.
147 * N.B. it is up to the caller to delete the tree delXMLEle().
149 XMLEle *
150 readXMLEle (LilXML *lp, int newc, char errmsg[])
152 XMLEle *root;
153 int s;
155 /* start optimistic */
156 errmsg[0] = '\0';
158 /* EOF? */
159 if (newc == 0) {
160 snprintf (errmsg, ERRMSG_SIZE, "Line %d: XML EOF", lp->ln);
161 initParser(lp);
162 return (NULL);
165 /* new line? */
166 if (newc == '\n')
167 lp->ln++;
169 /* skip comments and declarations. requires 1 char history */
170 if (!lp->skipping && lp->lastc == '<' && (newc == '?' || newc == '!')) {
171 lp->skipping = 1;
172 lp->lastc = newc;
173 return (NULL);
175 if (lp->skipping) {
176 if (newc == '>')
177 lp->skipping = 0;
178 lp->lastc = newc;
179 return (NULL);
181 if (newc == '<') {
182 lp->lastc = '<';
183 return (NULL);
186 /* do a pending '<' first then newc */
187 if (lp->lastc == '<') {
188 if (oneXMLchar (lp, '<', errmsg) < 0) {
189 initParser(lp);
190 return (NULL);
192 /* N.B. we assume '<' will never result in closure */
195 /* process newc (at last!) */
196 s = oneXMLchar (lp, newc, errmsg);
197 if (s == 0) {
198 lp->lastc = newc;
199 return (NULL);
201 if (s < 0) {
202 initParser(lp);
203 return (NULL);
206 /* Ok! return ce and we start over.
207 * N.B. up to caller to call delXMLEle with what we return.
209 root = lp->ce;
210 lp->ce = NULL;
211 initParser(lp);
212 return (root);
215 /* search ep for an attribute with given name.
216 * return NULL if not found.
218 XMLAtt *
219 findXMLAtt (XMLEle *ep, const char *name)
221 int i;
223 for (i = 0; i < ep->nat; i++)
224 if (!strcmp (ep->at[i]->name, name))
225 return (ep->at[i]);
226 return (NULL);
229 /* search ep for an element with given tag.
230 * return NULL if not found.
232 XMLEle *
233 findXMLEle (XMLEle *ep, const char *tag)
235 int i;
237 for (i = 0; i < ep->nel; i++)
238 if (!strcmp (ep->el[i]->tag, tag))
239 return (ep->el[i]);
240 return (NULL);
243 /* iterate over each child element of ep.
244 * call first time with first set to 1, then 0 from then on.
245 * returns NULL when no more or err
247 XMLEle *
248 nextXMLEle (XMLEle *ep, int init)
250 int eit;
252 if (init)
253 ep->eit = 0;
255 eit = ep->eit++;
256 if (eit < 0 || eit >= ep->nel)
257 return (NULL);
258 return (ep->el[eit]);
261 /* iterate over each attribute of ep.
262 * call first time with first set to 1, then 0 from then on.
263 * returns NULL when no more or err
265 XMLAtt *
266 nextXMLAtt (XMLEle *ep, int init)
268 int ait;
270 if (init)
271 ep->ait = 0;
273 ait = ep->ait++;
274 if (ait < 0 || ait >= ep->nat)
275 return (NULL);
276 return (ep->at[ait]);
279 /* return parent of given XMLEle */
280 XMLEle *
281 parentXMLEle (XMLEle *ep)
283 return (ep->pe);
286 /* return parent element of given XMLAtt */
287 XMLEle *
288 parentXMLAtt (XMLAtt *ap)
290 return (ap->ce);
293 /* access functions */
295 /* return the tag name of the given element */
296 char *
297 tagXMLEle (XMLEle *ep)
299 return (ep->tag);
302 /* return the pcdata portion of the given element */
303 char *
304 pcdataXMLEle (XMLEle *ep)
306 return (ep->pcdata);
309 /* return the number of characters in the pcdata portion of the given element */
310 int
311 pcdatalenXMLEle (XMLEle *ep)
313 return (ep->pcdatal);
316 /* return the nanme of the given attribute */
317 char *
318 nameXMLAtt (XMLAtt *ap)
320 return (ap->name);
323 /* return the value of the given attribute */
324 char *
325 valuXMLAtt (XMLAtt *ap)
327 return (ap->valu);
330 /* return the number of child elements of the given element */
332 nXMLEle (XMLEle *ep)
334 return (ep->nel);
337 /* return the number of attributes in the given element */
339 nXMLAtt (XMLEle *ep)
341 return (ep->nat);
345 /* search ep for an attribute with the given name and return its value.
346 * return "" if not found.
348 const char *
349 findXMLAttValu (XMLEle *ep, char *name)
351 XMLAtt *a = findXMLAtt (ep, name);
352 return (a ? a->valu : "");
355 /* handy wrapper to read one xml file.
356 * return root element else NULL with report in errmsg[]
358 XMLEle *
359 readXMLFile (FILE *fp, LilXML *lp, char errmsg[])
361 int c;
363 while ((c = fgetc(fp)) != EOF) {
364 XMLEle *root = readXMLEle (lp, c, errmsg);
365 if (root || errmsg[0])
366 return (root);
369 return (NULL);
372 /* sample print ep to fp
373 * N.B. set level = 0 on first call
375 #define PRINDENT 4 /* sample print indent each level */
376 void
377 prXMLEle (FILE *fp, XMLEle *ep, int level)
379 int indent = level*PRINDENT;
380 int i;
382 fprintf (fp, "%*s<%s", indent, "", ep->tag);
383 for (i = 0; i < ep->nat; i++)
384 fprintf (fp, " %s=\"%s\"", ep->at[i]->name, ep->at[i]->valu);
385 if (ep->nel > 0) {
386 fprintf (fp, ">\n");
387 for (i = 0; i < ep->nel; i++)
388 prXMLEle (fp, ep->el[i], level+1);
390 if (ep->pcdata[0]) {
391 char *nl;
392 if (ep->nel == 0)
393 fprintf (fp, ">\n");
394 /* indent if none or one line */
395 nl = strpbrk (ep->pcdata, "\n\r");
396 if (!nl || nl == &ep->pcdata[ep->pcdatal-1])
397 fprintf (fp, "%*s", indent+PRINDENT, "");
398 fprintf (fp, "%s", ep->pcdata);
399 if (!nl)
400 fprintf (fp, "\n");
402 if (ep->nel > 0 || ep->pcdata[0])
403 fprintf (fp, "%*s</%s>\n", indent, "", ep->tag);
404 else
405 fprintf (fp, "/>\n");
410 /* process one more char in XML file.
411 * if find final closure, return 1 and tree is in ce.
412 * if need more, return 0.
413 * if real trouble, return -1 and put reason in errmsg.
415 static int
416 oneXMLchar (LilXML *lp, int c, char errmsg[])
418 switch (lp->cs) {
419 case LOOK4START: /* looking for first element start */
420 if (c == '<') {
421 pushXMLEle(lp);
422 lp->cs = LOOK4TAG;
424 /* silently ignore until resync */
425 break;
427 case LOOK4TAG: /* looking for element tag */
428 if (isTokenChar (1, c)) {
429 growString (&lp->ce->tag, c);
430 lp->cs = INTAG;
431 } else if (!isspace(c)) {
432 snprintf (errmsg, ERRMSG_SIZE, "Line %d: Bogus tag char %c", lp->ln, c);
433 return (-1);
435 break;
437 case INTAG: /* reading tag */
438 if (isTokenChar (0, c))
439 growString (&lp->ce->tag, c);
440 else if (c == '>')
441 lp->cs = LOOK4CON;
442 else if (c == '/')
443 lp->cs = SAWSLASH;
444 else
445 lp->cs = LOOK4ATTRN;
446 break;
448 case LOOK4ATTRN: /* looking for attr name, > or / */
449 if (c == '>')
450 lp->cs = LOOK4CON;
451 else if (c == '/')
452 lp->cs = SAWSLASH;
453 else if (isTokenChar (1, c)) {
454 addAttr(lp);
455 growString (&lp->ce->at[lp->ce->nat-1]->name, c);
456 lp->cs = INATTRN;
457 } else if (!isspace(c)) {
458 snprintf (errmsg, ERRMSG_SIZE, "Line %d: Bogus leading attr name char: %c",
459 lp->ln, c);
460 return (-1);
462 break;
464 case SAWSLASH: /* saw / in element opening */
465 if (c == '>') {
466 if (!lp->ce->pe)
467 return(1); /* root has no content */
468 popXMLEle(lp);
469 lp->cs = LOOK4CON;
470 } else {
471 snprintf (errmsg, ERRMSG_SIZE, "Line %d: Bogus char %c before >", lp->ln, c);
472 return (-1);
474 break;
476 case INATTRN: /* reading attr name */
477 if (isTokenChar (0, c))
478 growString (&lp->ce->at[lp->ce->nat-1]->name, c);
479 else if (isspace(c) || c == '=')
480 lp->cs = LOOK4ATTRV;
481 else {
482 snprintf (errmsg, ERRMSG_SIZE, "Line %d: Bogus attr name char: %c", lp->ln,c);
483 return (-1);
485 break;
487 case LOOK4ATTRV: /* looking for attr value */
488 if (c == '\'' || c == '"') {
489 lp->delim = c;
490 growString (&lp->ce->at[lp->ce->nat-1]->valu, '\0');
491 lp->cs = INATTRV;
492 } else if (!(isspace(c) || c == '=')) {
493 snprintf (errmsg, ERRMSG_SIZE, "Line %d: No value for attribute %.100s", lp->ln,
494 lp->ce->at[lp->ce->nat-1]->name);
495 return (-1);
497 break;
499 case INATTRV: /* in attr value */
500 if (c == lp->delim)
501 lp->cs = LOOK4ATTRN;
502 else if (!iscntrl(c))
503 growString (&lp->ce->at[lp->ce->nat-1]->valu, c);
504 break;
506 case LOOK4CON: /* skipping leading content whitespace*/
507 if (c == '<')
508 lp->cs = LTINCON;
509 else if (!isspace(c)) {
510 growPCData (lp->ce, c);
511 lp->cs = INCON;
513 break;
515 case INCON: /* reading content */
516 if (c == '<') {
517 /* if text contains a nl trim trailing blanks.
518 * chomp trailing nl if only one.
520 char *nl = strpbrk (lp->ce->pcdata, "\n\r");
521 if (nl)
522 while (lp->ce->pcdatal > 0 &&
523 lp->ce->pcdata[lp->ce->pcdatal-1] == ' ')
524 lp->ce->pcdata[--lp->ce->pcdatal] = '\0';
525 if (nl == &lp->ce->pcdata[lp->ce->pcdatal-1])
526 lp->ce->pcdata[--lp->ce->pcdatal] = '\0'; /* safe! */
527 lp->cs = LTINCON;
528 } else
529 growPCData (lp->ce, c);
530 break;
532 case LTINCON: /* saw < in content */
533 if (c == '/') {
534 resetEndTag(lp);
535 lp->cs = LOOK4CLOSETAG;
536 } else {
537 pushXMLEle(lp);
538 if (isTokenChar(1,c)) {
539 growString (&lp->ce->tag, c);
540 lp->cs = INTAG;
541 } else
542 lp->cs = LOOK4TAG;
544 break;
546 case LOOK4CLOSETAG: /* looking for closing tag after < */
547 if (isTokenChar (1, c)) {
548 growString (&lp->endtag, c);
549 lp->cs = INCLOSETAG;
550 } else if (!isspace(c)) {
551 snprintf (errmsg, ERRMSG_SIZE, "Line %d: Bogus preend tag char %c", lp->ln,c);
552 return (-1);
554 break;
556 case INCLOSETAG: /* reading closing tag */
557 if (isTokenChar(0, c))
558 growString (&lp->endtag, c);
559 else if (c == '>') {
560 if (strcmp (lp->ce->tag, lp->endtag)) {
561 snprintf (errmsg, ERRMSG_SIZE, "Line %d: closing tag %.64s does not match %.64s",
562 lp->ln, lp->endtag, lp->ce->tag);
563 return (-1);
564 } else if (lp->ce->pe) {
565 popXMLEle(lp);
566 lp->cs = LOOK4CON; /* back to content after nested elem */
567 } else
568 return (1); /* yes! */
569 } else if (!isspace(c)) {
570 snprintf (errmsg, ERRMSG_SIZE, "Line %d: Bogus end tag char %c", lp->ln, c);
571 return (-1);
573 break;
576 return (0);
579 /* set up for a fresh start */
580 static void
581 initParser(LilXML *lp)
583 memset (lp, 0, sizeof(*lp));
584 lp->cs = LOOK4START;
585 lp->ln = 1;
586 delXMLEle (lp->ce);
587 lp->ce = NULL;
588 resetEndTag(lp);
589 lp->lastc = 0;
590 lp->skipping = 0;
593 /* start a new XMLEle.
594 * if ce already set up, add to its list of child elements.
595 * point ce to a new XMLEle.
596 * endtag no longer valid.
598 static void
599 pushXMLEle(LilXML *lp)
601 XMLEle *newe = (XMLEle *) moremem (NULL, sizeof(XMLEle));
602 XMLEle *ce = lp->ce;
604 memset (newe, 0, sizeof(*newe));
605 newe->tag = newString();
606 newe->pcdata = newString();
607 newe->pe = ce;
609 if (ce) {
610 ce->el = (XMLEle **) moremem (ce->el, (ce->nel+1)*sizeof(XMLEle*));
611 ce->el[ce->nel++] = newe;
613 lp->ce = newe;
614 resetEndTag(lp);
617 /* point ce to parent of current ce.
618 * endtag no longer valid.
620 static void
621 popXMLEle(LilXML *lp)
623 lp->ce = lp->ce->pe;
624 resetEndTag(lp);
627 /* add one new XMLAtt to the current element */
628 static void
629 addAttr(LilXML *lp)
631 XMLAtt *newa = (XMLAtt *) moremem (NULL, sizeof(XMLAtt));
632 XMLEle *ce = lp->ce;
634 memset (newa, 0, sizeof(*newa));
635 newa->name = newString();
636 newa->valu = newString();
637 newa->ce = ce;
639 ce->at = (XMLAtt **) moremem (ce->at, (ce->nat+1)*sizeof(XMLAtt *));
640 ce->at[ce->nat++] = newa;
643 /* delete a and all it holds */
644 static void
645 delAttr (XMLAtt *a)
647 if (!a)
648 return;
649 if (a->name)
650 freemem (a->name);
651 if (a->valu)
652 freemem (a->valu);
653 freemem(a);
656 /* delete endtag if appropriate */
657 static void
658 resetEndTag(LilXML *lp)
660 if (lp->endtag) {
661 freemem (lp->endtag);
662 lp->endtag = 0;
666 /* 1 if c is a valid token character, else 0.
667 * it can be alpha or '_' or numeric unless start.
669 static int
670 isTokenChar (int start, int c)
672 return (isalpha(c) || c == '_' || (!start && isdigit(c)));
675 /* grow the malloced string at *sp to append c */
676 static void
677 growString (char **sp, int c)
679 int l = *sp ? strlen(*sp) : 0;
680 *sp = (char *) moremem (*sp, l+2); /* c + '\0' */
681 (*sp)[l++] = (char)c;
682 (*sp)[l] = '\0';
685 /* special fast version of growString just for ep->pcdata that avoids all the
686 * strlens and tiny increments in allocated mem
688 static void
689 growPCData (XMLEle *ep, int c)
691 int l = ep->pcdatal++;
692 if ((l%32) == 0) {
693 int nm = 32*(l/32+1) + 2; /* c + '\0' */
694 ep->pcdata = (char *) moremem (ep->pcdata, nm);
696 ep->pcdata[l++] = (char)c;
697 ep->pcdata[l] = '\0';
700 /* return a malloced string of one '\0' */
701 static char *
702 newString()
704 char *str;
706 *(str = (char *)moremem(NULL, 16)) = '\0'; /* expect more */
707 return (str);
710 static void *
711 moremem (void *old, int n)
713 return (old ? realloc (old, n) : malloc (n));
716 static void
717 freemem (void *m)
719 free (m);
722 #if defined(MAIN_TST)
724 main (int ac, char *av[])
726 LilXML *lp = newLilXML();
727 char errmsg[ERRMSG_SIZE];
728 XMLEle *root;
730 root = readXMLFile (stdin, lp, errmsg);
731 if (root) {
732 fprintf (stderr, "::::::::::::: %s\n", tagXMLEle(root));
733 prXMLEle (stdout, root, 0);
734 delXMLEle (root);
735 } else if (errmsg[0]) {
736 fprintf (stderr, "Error: %s\n", errmsg);
739 delLilXML (lp);
741 return (0);
743 #endif