SpiderMonkey: fix issue with javascript:history.back()
[elinks.git] / src / dom / test / sgml-parser.c
blob9e106ade2682e25f0d5e96fb1f3b054fb0aa6388
1 /* Tool for testing the SGML parser */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <stdarg.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
12 #include "elinks.h"
14 #include "dom/configuration.h"
15 #include "dom/node.h"
16 #include "dom/sgml/dump.h"
17 #include "dom/sgml/parser.h"
18 #include "dom/stack.h"
19 #include "util/test.h"
22 unsigned int number_of_lines = 0;
24 static int
25 update_number_of_lines(struct dom_stack *stack)
27 struct sgml_parser *parser = stack->contexts[0]->data;
28 int lines;
30 if (!(parser->flags & SGML_PARSER_COUNT_LINES))
31 return 0;
33 lines = get_sgml_parser_line_number(parser);
34 if (number_of_lines < lines)
35 number_of_lines = lines;
37 return 1;
40 /* Print the string in a compressed form: a single line with newlines etc.
41 * replaced with "\\n" sequence. */
42 static void
43 print_compressed_string(struct dom_string *string)
45 unsigned char escape[3] = { '\\', '?', 0 };
46 size_t pos;
48 for (pos = 0; pos < string->length; pos++) {
49 unsigned char data = string->string[pos];
51 switch (data) {
52 case '\n': escape[1] = 'n'; break;
53 case '\r': escape[1] = 'r'; break;
54 case '\t': escape[1] = 't'; break;
55 default:
56 putchar(data);
57 continue;
60 printf("%s", escape);
64 /* Set @string to the value of the given @node, however, with strings
65 * compressed and entity references 'expanded'. */
66 static void
67 print_dom_node_value(struct dom_node *node)
69 struct dom_string *value;
71 assert(node);
73 switch (node->type) {
74 case DOM_NODE_ENTITY_REFERENCE:
75 /* FIXME: Set to the entity value. */
76 printf("%.*s", node->string.length, node->string.string);
77 break;
79 default:
80 value = get_dom_node_value(node);
81 if (!value) {
82 printf("(no value)");
83 return;
86 print_compressed_string(value);
90 static unsigned char indent_string[] =
91 " ";
93 #define get_indent_offset(stack) \
94 (((stack)->depth < sizeof(indent_string)/2 ? (stack)->depth * 2 : sizeof(indent_string)) - 2)
97 static void
98 print_indent(struct dom_stack *stack)
100 printf("%.*s", (int) get_indent_offset(stack), indent_string);
103 static enum dom_code
104 sgml_parser_test_tree(struct dom_stack *stack, struct dom_node *node, void *data)
106 struct dom_string *value = &node->string;
107 struct dom_string *name = get_dom_node_name(node);
109 /* Always print the URI for identification. */
110 if (update_number_of_lines(stack))
111 return DOM_CODE_OK;
113 print_indent(stack);
114 printf("%.*s: %.*s\n",
115 name->length, name->string,
116 value->length, value->string);
118 return DOM_CODE_OK;
121 static enum dom_code
122 sgml_parser_test_id_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
124 struct dom_string *name;
125 struct dom_string *id;
127 assert(node);
129 if (update_number_of_lines(stack))
130 return DOM_CODE_OK;
132 name = get_dom_node_name(node);
133 id = get_dom_node_type_name(node->type);
135 print_indent(stack);
136 printf("%.*s: %.*s -> ",
137 id->length, id->string,
138 name->length, name->string);
139 print_dom_node_value(node);
140 printf("\n");
142 return DOM_CODE_OK;
145 static enum dom_code
146 sgml_parser_test_leaf(struct dom_stack *stack, struct dom_node *node, void *data)
148 struct dom_string *name;
150 assert(node);
152 if (update_number_of_lines(stack))
153 return DOM_CODE_OK;
155 name = get_dom_node_name(node);
157 print_indent(stack);
158 printf("%.*s: ",
159 name->length, name->string);
160 print_dom_node_value(node);
161 printf("\n");
163 return DOM_CODE_OK;
166 static enum dom_code
167 sgml_parser_test_branch(struct dom_stack *stack, struct dom_node *node, void *data)
169 struct dom_string *name;
170 struct dom_string *id;
172 assert(node);
174 if (update_number_of_lines(stack))
175 return DOM_CODE_OK;
177 name = get_dom_node_name(node);
178 id = get_dom_node_type_name(node->type);
180 print_indent(stack);
181 printf("%.*s: %.*s\n",
182 id->length, id->string, name->length, name->string);
184 return DOM_CODE_OK;
187 static enum dom_code
188 sgml_parser_test_end(struct dom_stack *stack, struct dom_node *node, void *data)
190 struct sgml_parser *parser = stack->contexts[0]->data;
192 if ((parser->flags & SGML_PARSER_COUNT_LINES)
193 && !(parser->flags & SGML_PARSER_DETECT_ERRORS)) {
194 printf("%d\n", number_of_lines);
197 return DOM_CODE_OK;
200 struct dom_stack_context_info sgml_parser_test_context_info = {
201 /* Object size: */ 0,
202 /* Push: */
204 /* */ NULL,
205 /* DOM_NODE_ELEMENT */ sgml_parser_test_branch,
206 /* DOM_NODE_ATTRIBUTE */ sgml_parser_test_id_leaf,
207 /* DOM_NODE_TEXT */ sgml_parser_test_leaf,
208 /* DOM_NODE_CDATA_SECTION */ sgml_parser_test_leaf,
209 /* DOM_NODE_ENTITY_REFERENCE */ sgml_parser_test_branch,
210 /* DOM_NODE_ENTITY */ sgml_parser_test_id_leaf,
211 /* DOM_NODE_PROC_INSTRUCTION */ sgml_parser_test_id_leaf,
212 /* DOM_NODE_COMMENT */ sgml_parser_test_leaf,
213 /* DOM_NODE_DOCUMENT */ sgml_parser_test_tree,
214 /* DOM_NODE_DOCUMENT_TYPE */ sgml_parser_test_id_leaf,
215 /* DOM_NODE_DOCUMENT_FRAGMENT */ sgml_parser_test_id_leaf,
216 /* DOM_NODE_NOTATION */ sgml_parser_test_id_leaf,
218 /* Pop: */
220 /* */ NULL,
221 /* DOM_NODE_ELEMENT */ NULL,
222 /* DOM_NODE_ATTRIBUTE */ NULL,
223 /* DOM_NODE_TEXT */ NULL,
224 /* DOM_NODE_CDATA_SECTION */ NULL,
225 /* DOM_NODE_ENTITY_REFERENCE */ NULL,
226 /* DOM_NODE_ENTITY */ NULL,
227 /* DOM_NODE_PROC_INSTRUCTION */ NULL,
228 /* DOM_NODE_COMMENT */ NULL,
229 /* DOM_NODE_DOCUMENT */ sgml_parser_test_end,
230 /* DOM_NODE_DOCUMENT_TYPE */ NULL,
231 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL,
232 /* DOM_NODE_NOTATION */ NULL,
236 static enum dom_code
237 sgml_error_function(struct sgml_parser *parser, struct dom_string *string,
238 unsigned int line_number)
240 printf("error on line %d: %.*s\n",
241 line_number, string->length, string->string);
243 return DOM_CODE_OK;
247 main(int argc, char *argv[])
249 struct sgml_parser *parser;
250 enum sgml_document_type doctype = SGML_DOCTYPE_HTML;
251 enum sgml_parser_flag flags = 0;
252 enum sgml_parser_type type = SGML_PARSER_STREAM;
253 enum dom_code code = 0;
254 enum dom_config_flag normalize_flags = 0;
255 struct dom_config config;
256 int normalize = 0;
257 int dump = 0;
258 int complete = 1;
259 size_t read_stdin = 0;
260 struct dom_string uri = STATIC_DOM_STRING("dom://test");
261 struct dom_string source = STATIC_DOM_STRING("(no source)");
262 int i;
264 for (i = 1; i < argc; i++) {
265 char *arg = argv[i];
267 if (strncmp(arg, "--", 2))
268 break;
270 arg += 2;
272 if (get_test_opt(&arg, "uri", &i, argc, argv, "a URI")) {
273 set_dom_string(&uri, arg, strlen(arg));
275 } else if (get_test_opt(&arg, "src", &i, argc, argv, "a string")) {
276 set_dom_string(&source, arg, strlen(arg));
278 } else if (get_test_opt(&arg, "stdin", &i, argc, argv, "a number")) {
279 read_stdin = atoi(arg);
280 flags |= SGML_PARSER_INCREMENTAL;
282 } else if (get_test_opt(&arg, "normalize", &i, argc, argv, "a string")) {
283 normalize = 1;
284 normalize_flags = parse_dom_config(arg, ',');
285 type = SGML_PARSER_TREE;
287 } else if (!strcmp(arg, "print-lines")) {
288 flags |= SGML_PARSER_COUNT_LINES;
290 } else if (!strcmp(arg, "incomplete")) {
291 flags |= SGML_PARSER_INCREMENTAL;
292 complete = 0;
294 } else if (!strcmp(arg, "dump")) {
295 type = SGML_PARSER_TREE;
296 dump = 1;
298 } else if (!strcmp(arg, "error")) {
299 flags |= SGML_PARSER_DETECT_ERRORS;
301 } else if (!strcmp(arg, "help")) {
302 die(NULL);
304 } else {
305 die("Unknown argument '%s'", arg - 2);
309 parser = init_sgml_parser(type, doctype, &uri, flags);
310 if (!parser) return 1;
312 parser->error_func = sgml_error_function;
313 if (normalize)
314 add_dom_config_normalizer(&parser->stack, &config, normalize_flags);
315 else if (!dump)
316 add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info);
318 if (read_stdin > 0) {
319 unsigned char *buffer;
321 buffer = mem_alloc(read_stdin);
322 if (!buffer)
323 die("Cannot allocate buffer");
325 complete = 0;
327 while (!complete) {
328 size_t size = fread(buffer, 1, read_stdin, stdin);
330 if (ferror(stdin))
331 die("error reading from stdin");
333 complete = feof(stdin);
335 code = parse_sgml(parser, buffer, size, complete);
336 switch (code) {
337 case DOM_CODE_OK:
338 break;
340 case DOM_CODE_INCOMPLETE:
341 if (!complete) break;
342 /* Error */
343 default:
344 complete = 1;
348 mem_free(buffer);
350 } else {
351 code = parse_sgml(parser, source.string, source.length, complete);
354 if (parser->root) {
355 assert(!complete || parser->stack.depth > 0);
357 while (!dom_stack_is_empty(&parser->stack)) {
358 get_dom_stack_top(&parser->stack)->immutable = 0;
359 pop_dom_node(&parser->stack);
362 if (normalize || dump) {
363 struct dom_stack stack;
365 /* Note, that we cannot free nodes when walking the DOM
366 * tree since walk_dom_node() uses an index to traverse
367 * the tree. */
368 init_dom_stack(&stack, DOM_STACK_FLAG_NONE);
369 /* XXX: This context needs to be added first because it
370 * assumes the parser can be accessed via
371 * stack->contexts[0].data. */
372 if (normalize)
373 add_dom_stack_context(&stack, parser, &sgml_parser_test_context_info);
374 else if (dump)
375 add_sgml_file_dumper(&stack, stdout);
376 walk_dom_nodes(&stack, parser->root);
377 done_dom_stack(&stack);
378 done_dom_node(parser->root);
382 done_sgml_parser(parser);
383 #ifdef DEBUG_MEMLEAK
384 check_memory_leaks();
385 #endif
387 return code != DOM_CODE_OK ? 1 : 0;