Include <stdio.h> to "utils.h" due to PANIC
[libisds.git] / src / physxml.c
blob9972dc84212ddbed36c1a17c4658494a3a460293
1 #define _POSIX_SOURCE /* For strtok_r */
2 #include "isds_priv.h"
3 #include "utils.h"
5 #include <string.h>
6 #include <expat.h>
7 #include <inttypes.h>
9 #define PHYSXML_ELEMENT_SEPARATOR "|"
10 #define PHYSXML_NS_SEPARATOR ">"
11 #define NS_CHAR_SEPARATOR '>'
13 struct expat_data {
14 XML_Parser parser;
15 const XML_Char **elements; /* NULL terminated array of elements */
16 _Bool found;
17 size_t *start;
18 size_t *end;
19 int depth; /* Current parser depth, root element is 0 */
20 int element_depth; /* elements[element_depth] we are in,
21 -1 if we are not in any (root mismatch)*/
25 /* Check for expat compile-time configuration */
26 _hidden isds_error expat_init(void) {
27 XML_Expat_Version current;
28 const int min_major = 1;
29 const int min_minor = 95;
30 const int min_micro = 8;
31 const XML_Feature *features; /* Static array stored in expat BSS */
32 _Bool ns_supported = 0;
35 * Max(XML_Size) <= Max(size_t)
36 * XML_Char is char, not a wchar_t
37 * XML_UNICODE is undefined (i.e. strings in UTF-8)
38 * */
40 /* Check minimal expat version */
41 current = XML_ExpatVersionInfo();
42 if ( (current.major < min_major) ||
43 (current.major == min_major && current.minor < min_minor) ||
44 (current.major == min_major && current.minor == min_minor &&
45 current.micro < min_micro) ) {
46 isds_log(ILF_ISDS, ILL_CRIT,
47 _("Minimal %d.%d.%d Expat version required. "
48 "Current version is %d.%d.%d\n"),
49 min_major, min_minor, min_micro,
50 current.major, current.minor, current.micro);
51 return IE_ERROR;
54 /* XML_Char must be char, not a wchar_t */
55 features = XML_GetFeatureList();
56 while (features->feature != XML_FEATURE_END) {
57 switch (features->feature) {
58 case XML_FEATURE_UNICODE_WCHAR_T:
59 case XML_FEATURE_UNICODE:
60 isds_log(ILF_ISDS, ILL_CRIT,
61 _("Expat compiled with UTF-16 (wide) characters\n"));
62 return IE_ERROR;
63 break;
64 case XML_FEATURE_SIZEOF_XML_CHAR:
65 if (features->value != sizeof(char)) {
66 isds_log(ILF_ISDS, ILL_CRIT,
67 "Expat compiled with XML_Chars incompatible "
68 "with chars\n");
69 return IE_ERROR;
71 break;
72 case XML_FEATURE_NS:
73 ns_supported = 1;
74 default:
75 break;
77 features++;
80 if (!ns_supported) {
81 isds_log(ILF_ISDS, ILL_CRIT,
82 _("Expat not compiled with name space support\n"));
83 return IE_ERROR;
86 return IE_SUCCESS;
90 /* Breaks element path address into NULL terminated array of elements in
91 * preserved order. Zeroth array element will be first path element.
92 * @path element address, content will be damaged
93 * @return array of elements, NULL in case of error */
94 static const XML_Char **path2elements(XML_Char *path) {
95 const XML_Char **elements = NULL;
96 XML_Char *tmp_path;
97 char *saveptr = NULL;
98 XML_Char *element;
99 unsigned int depth = 0;
101 if (!path) return NULL;
103 elements = malloc(sizeof(elements[0]) * (strlen(path) + 1));
104 if (!elements) return NULL;
106 elements[0] = NULL;
108 tmp_path = path;
109 while ((element = (XML_Char *) strtok_r(tmp_path,
110 PHYSXML_ELEMENT_SEPARATOR, &saveptr))) {
111 tmp_path = NULL;
112 elements[depth++] = element;
115 elements[depth] = NULL;
116 return elements;
120 /* Examine start and empty element tag.
121 * @name is expanded name */
122 static void XMLCALL element_start(void *userData, const XML_Char *name,
123 const XML_Char **atts) {
124 struct expat_data *data = (struct expat_data *) userData;
125 data->depth++;
127 const XML_Index index = XML_GetCurrentByteIndex(data->parser);
128 /*const int count = XML_GetCurrentByteCount(data->parser);*/
129 /* XXX: Because document length is stored as size_t, index always fits
130 * size_t. */
131 const size_t boundary = index;
133 /*printf("Start: name=%s, depth=%zd, offset=%#jx "
134 "count=%u => boundary=%#zx\n",
135 name, data->depth, (uintmax_t)index, count, boundary); */
137 if ((!data->found) &&
138 (data->depth == data->element_depth + 1) &&
139 (!strcmp(data->elements[data->element_depth + 1], name))) {
140 data->element_depth++;
142 /*printf("! Start tag for element `%s' found\n",
143 data->elements[data->element_depth]);*/
145 if (!data->elements[data->element_depth + 1]) {
146 data->found = 1;
147 *data->start = boundary;
153 /* Examine end and empty element tag.
154 * @name is expanded name */
155 static void XMLCALL element_end(void *userData, const XML_Char *name) {
157 struct expat_data *data = (struct expat_data *) userData;
158 enum XML_Status xerr;
160 const XML_Index index = (uintmax_t) XML_GetCurrentByteIndex(data->parser);
161 const int count = XML_GetCurrentByteCount(data->parser);
162 /* XXX: Because document length is stored as size_t, index + count always
163 * fits size_t. */
164 const size_t boundary = index + count - 1;
166 /*printf("End: name=%s, depth=%zd, offset=%#jx "
167 "count=%u => boundary=%#zx\n",
168 name, data->depth, (uintmax_t)index, count, boundary);*/
170 if (data->element_depth == data->depth) {
171 if (data->found) {
172 /*printf("! End tag for element `%s' found\n",
173 data->elements[data->element_depth]);*/
174 *data->end = boundary;
176 /* Here we can stop parser
177 * XXX: requires Expat 1.95.8 */
178 xerr = XML_StopParser(data->parser, XML_FALSE);
179 if (xerr != XML_STATUS_OK) {
180 PANIC(_("Error while stopping parser\n"));
184 data->element_depth--;
187 data->depth--;
191 /* Locate element specified by element path in XML stream.
192 * TODO: Support other encodings than UTF-8
193 * @document is XML documuent as bitstream
194 * @length is size of @docuement in bytes. Zero length is forbidden.
195 * @path is special path (e.g. "|html|head|title",
196 * quallified element names are specified as
197 * NSURI '>' LOCALNAME, ommit NSURI and '>' separator if no namespace
198 * should be addressed (i.e. use only locale name)
199 * You can use PHYSXML_ELEMENT_SEPARATOR and PHYSXML_NS_SEPARATOR string
200 * macros.
201 * @start outputs start of the element location in @document (inclusive,
202 * counts from 0)
203 * @end outputs end of element (inclusive, counts from 0)
204 * @return 0 if element found */
205 _hidden isds_error find_element_boundary(void *document, size_t length,
206 char *path, size_t *start, size_t *end) {
208 XML_Parser parser;
209 enum XML_Status xerr;
210 struct expat_data user_data;
212 if (!document || !path || !start || !end || length <= 0)
213 return IE_INVAL;
215 /* Parse XPath */
216 user_data.elements = path2elements(path);
217 if (!user_data.elements) return IE_NOMEM;
219 /* No element means whole document */
220 if (!user_data.elements[0]) {
221 free(user_data.elements);
222 *start = 0;
223 *end = length - 1;
224 return IE_SUCCESS;
227 /* Create parser*/
228 parser = XML_ParserCreateNS(NULL, NS_CHAR_SEPARATOR);
230 XML_SetStartElementHandler(parser, element_start);
231 XML_SetEndElementHandler(parser, element_end);
233 user_data.parser = parser;
234 user_data.found = 0;
235 user_data.start = start;
236 user_data.end = end;
237 user_data.depth = -1;
238 user_data.element_depth = -1;
239 XML_SetUserData(parser, &user_data);
241 /* Parse it */
242 xerr = XML_Parse(parser, (const char *) document, length, 1);
243 if (xerr != XML_STATUS_OK &&
244 !( (xerr == XML_STATUS_ERROR &&
245 XML_GetErrorCode(parser) == XML_ERROR_ABORTED))) {
246 free(user_data.elements);
247 isds_log(ILF_ISDS, ILL_CRIT, _("XML_Parse failed\n"));
248 return IE_ERROR;
250 free(user_data.elements);
252 XML_ParserFree(parser);
253 if (user_data.found) return IE_SUCCESS;
254 else return IE_NOEXIST;