1 /* Tool for testing the SGML parser */
14 #include "dom/configuration.h"
16 #include "dom/sgml/dump.h"
17 #include "dom/sgml/parser.h"
18 #include "dom/stack.h"
19 #include "util/test.h"
22 unsigned int number_of_lines
= 0;
25 update_number_of_lines(struct dom_stack
*stack
)
27 struct sgml_parser
*parser
= stack
->contexts
[0]->data
;
30 if (!(parser
->flags
& SGML_PARSER_COUNT_LINES
))
33 lines
= get_sgml_parser_line_number(parser
);
34 if (number_of_lines
< lines
)
35 number_of_lines
= lines
;
40 /* Print the string in a compressed form: a single line with newlines etc.
41 * replaced with "\\n" sequence. */
43 print_compressed_string(struct dom_string
*string
)
45 unsigned char escape
[3] = { '\\', '?', 0 };
48 for (pos
= 0; pos
< string
->length
; pos
++) {
49 unsigned char data
= string
->string
[pos
];
52 case '\n': escape
[1] = 'n'; break;
53 case '\r': escape
[1] = 'r'; break;
54 case '\t': escape
[1] = 't'; break;
64 /* Set @string to the value of the given @node, however, with strings
65 * compressed and entity references 'expanded'. */
67 print_dom_node_value(struct dom_node
*node
)
69 struct dom_string
*value
;
74 case DOM_NODE_ENTITY_REFERENCE
:
75 /* FIXME: Set to the entity value. */
76 printf("%.*s", node
->string
.length
, node
->string
.string
);
80 value
= get_dom_node_value(node
);
86 print_compressed_string(value
);
90 static unsigned char indent_string
[] =
93 #define get_indent_offset(stack) \
94 (((stack)->depth < sizeof(indent_string)/2 ? (stack)->depth * 2 : sizeof(indent_string)) - 2)
98 print_indent(struct dom_stack
*stack
)
100 printf("%.*s", (int) get_indent_offset(stack
), indent_string
);
104 sgml_parser_test_tree(struct dom_stack
*stack
, struct dom_node
*node
, void *data
)
106 struct dom_string
*value
= &node
->string
;
107 struct dom_string
*name
= get_dom_node_name(node
);
109 /* Always print the URI for identification. */
110 if (update_number_of_lines(stack
))
114 printf("%.*s: %.*s\n",
115 name
->length
, name
->string
,
116 value
->length
, value
->string
);
122 sgml_parser_test_id_leaf(struct dom_stack
*stack
, struct dom_node
*node
, void *data
)
124 struct dom_string
*name
;
125 struct dom_string
*id
;
129 if (update_number_of_lines(stack
))
132 name
= get_dom_node_name(node
);
133 id
= get_dom_node_type_name(node
->type
);
136 printf("%.*s: %.*s -> ",
137 id
->length
, id
->string
,
138 name
->length
, name
->string
);
139 print_dom_node_value(node
);
146 sgml_parser_test_leaf(struct dom_stack
*stack
, struct dom_node
*node
, void *data
)
148 struct dom_string
*name
;
152 if (update_number_of_lines(stack
))
155 name
= get_dom_node_name(node
);
159 name
->length
, name
->string
);
160 print_dom_node_value(node
);
167 sgml_parser_test_branch(struct dom_stack
*stack
, struct dom_node
*node
, void *data
)
169 struct dom_string
*name
;
170 struct dom_string
*id
;
174 if (update_number_of_lines(stack
))
177 name
= get_dom_node_name(node
);
178 id
= get_dom_node_type_name(node
->type
);
181 printf("%.*s: %.*s\n",
182 id
->length
, id
->string
, name
->length
, name
->string
);
188 sgml_parser_test_end(struct dom_stack
*stack
, struct dom_node
*node
, void *data
)
190 struct sgml_parser
*parser
= stack
->contexts
[0]->data
;
192 if ((parser
->flags
& SGML_PARSER_COUNT_LINES
)
193 && !(parser
->flags
& SGML_PARSER_DETECT_ERRORS
)) {
194 printf("%d\n", number_of_lines
);
200 struct dom_stack_context_info sgml_parser_test_context_info
= {
201 /* Object size: */ 0,
205 /* DOM_NODE_ELEMENT */ sgml_parser_test_branch
,
206 /* DOM_NODE_ATTRIBUTE */ sgml_parser_test_id_leaf
,
207 /* DOM_NODE_TEXT */ sgml_parser_test_leaf
,
208 /* DOM_NODE_CDATA_SECTION */ sgml_parser_test_leaf
,
209 /* DOM_NODE_ENTITY_REFERENCE */ sgml_parser_test_branch
,
210 /* DOM_NODE_ENTITY */ sgml_parser_test_id_leaf
,
211 /* DOM_NODE_PROC_INSTRUCTION */ sgml_parser_test_id_leaf
,
212 /* DOM_NODE_COMMENT */ sgml_parser_test_leaf
,
213 /* DOM_NODE_DOCUMENT */ sgml_parser_test_tree
,
214 /* DOM_NODE_DOCUMENT_TYPE */ sgml_parser_test_id_leaf
,
215 /* DOM_NODE_DOCUMENT_FRAGMENT */ sgml_parser_test_id_leaf
,
216 /* DOM_NODE_NOTATION */ sgml_parser_test_id_leaf
,
221 /* DOM_NODE_ELEMENT */ NULL
,
222 /* DOM_NODE_ATTRIBUTE */ NULL
,
223 /* DOM_NODE_TEXT */ NULL
,
224 /* DOM_NODE_CDATA_SECTION */ NULL
,
225 /* DOM_NODE_ENTITY_REFERENCE */ NULL
,
226 /* DOM_NODE_ENTITY */ NULL
,
227 /* DOM_NODE_PROC_INSTRUCTION */ NULL
,
228 /* DOM_NODE_COMMENT */ NULL
,
229 /* DOM_NODE_DOCUMENT */ sgml_parser_test_end
,
230 /* DOM_NODE_DOCUMENT_TYPE */ NULL
,
231 /* DOM_NODE_DOCUMENT_FRAGMENT */ NULL
,
232 /* DOM_NODE_NOTATION */ NULL
,
237 sgml_error_function(struct sgml_parser
*parser
, struct dom_string
*string
,
238 unsigned int line_number
)
240 printf("error on line %d: %.*s\n",
241 line_number
, string
->length
, string
->string
);
247 main(int argc
, char *argv
[])
249 struct sgml_parser
*parser
;
250 enum sgml_document_type doctype
= SGML_DOCTYPE_HTML
;
251 enum sgml_parser_flag flags
= 0;
252 enum sgml_parser_type type
= SGML_PARSER_STREAM
;
253 enum dom_code code
= 0;
254 enum dom_config_flag normalize_flags
= 0;
255 struct dom_config config
;
259 size_t read_stdin
= 0;
260 struct dom_string uri
= STATIC_DOM_STRING("dom://test");
261 struct dom_string source
= STATIC_DOM_STRING("(no source)");
264 for (i
= 1; i
< argc
; i
++) {
267 if (strncmp(arg
, "--", 2))
272 if (get_test_opt(&arg
, "uri", &i
, argc
, argv
, "a URI")) {
273 set_dom_string(&uri
, arg
, strlen(arg
));
275 } else if (get_test_opt(&arg
, "src", &i
, argc
, argv
, "a string")) {
276 set_dom_string(&source
, arg
, strlen(arg
));
278 } else if (get_test_opt(&arg
, "stdin", &i
, argc
, argv
, "a number")) {
279 read_stdin
= atoi(arg
);
280 flags
|= SGML_PARSER_INCREMENTAL
;
282 } else if (get_test_opt(&arg
, "normalize", &i
, argc
, argv
, "a string")) {
284 normalize_flags
= parse_dom_config(arg
, ',');
285 type
= SGML_PARSER_TREE
;
287 } else if (!strcmp(arg
, "print-lines")) {
288 flags
|= SGML_PARSER_COUNT_LINES
;
290 } else if (!strcmp(arg
, "incomplete")) {
291 flags
|= SGML_PARSER_INCREMENTAL
;
294 } else if (!strcmp(arg
, "dump")) {
295 type
= SGML_PARSER_TREE
;
298 } else if (!strcmp(arg
, "error")) {
299 flags
|= SGML_PARSER_DETECT_ERRORS
;
301 } else if (!strcmp(arg
, "help")) {
305 die("Unknown argument '%s'", arg
- 2);
309 parser
= init_sgml_parser(type
, doctype
, &uri
, flags
);
310 if (!parser
) return 1;
312 parser
->error_func
= sgml_error_function
;
314 add_dom_config_normalizer(&parser
->stack
, &config
, normalize_flags
);
316 add_dom_stack_context(&parser
->stack
, NULL
, &sgml_parser_test_context_info
);
318 if (read_stdin
> 0) {
319 unsigned char *buffer
;
321 buffer
= mem_alloc(read_stdin
);
323 die("Cannot allocate buffer");
328 size_t size
= fread(buffer
, 1, read_stdin
, stdin
);
331 die("error reading from stdin");
333 complete
= feof(stdin
);
335 code
= parse_sgml(parser
, buffer
, size
, complete
);
340 case DOM_CODE_INCOMPLETE
:
341 if (!complete
) break;
351 code
= parse_sgml(parser
, source
.string
, source
.length
, complete
);
355 assert(!complete
|| parser
->stack
.depth
> 0);
357 while (!dom_stack_is_empty(&parser
->stack
)) {
358 get_dom_stack_top(&parser
->stack
)->immutable
= 0;
359 pop_dom_node(&parser
->stack
);
362 if (normalize
|| dump
) {
363 struct dom_stack stack
;
365 /* Note, that we cannot free nodes when walking the DOM
366 * tree since walk_dom_node() uses an index to traverse
368 init_dom_stack(&stack
, DOM_STACK_FLAG_NONE
);
369 /* XXX: This context needs to be added first because it
370 * assumes the parser can be accessed via
371 * stack->contexts[0].data. */
373 add_dom_stack_context(&stack
, parser
, &sgml_parser_test_context_info
);
375 add_sgml_file_dumper(&stack
, stdout
);
376 walk_dom_nodes(&stack
, parser
->root
);
377 done_dom_stack(&stack
);
378 done_dom_node(parser
->root
);
382 done_sgml_parser(parser
);
384 check_memory_leaks();
387 return code
!= DOM_CODE_OK
? 1 : 0;