2 tre-compile.c - TRE regex compiler
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
11 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
18 #endif /* HAVE_CONFIG_H */
24 #include "tre-internal.h"
26 #include "tre-stack.h"
28 #include "tre-parse.h"
29 #include "tre-compile.h"
34 The bit_ffs() macro in bitstring.h is flawed. Replace it with a working one.
37 #define bit_ffs(name, nbits, value) { \
38 register bitstr_t *_name = name; \
39 register int _byte, _nbits = nbits; \
40 register int _stopbyte = _bit_byte(_nbits), _value = -1; \
41 for (_byte = 0; _byte <= _stopbyte; ++_byte) \
43 _value = _byte << 3; \
44 for (_stopbyte = _name[_byte]; !(_stopbyte&0x1); \
45 ++_value, _stopbyte >>= 1); \
52 Algorithms to setup tags so that submatch addressing can be done.
57 static const char *tag_dir_str
[] = {
63 static const char _indent
[] = " ";
66 print_indent(int indent
)
72 static void print_last_matched_pre(tre_last_matched_pre_t
*lm
, int indent
,
75 print_last_match_branch_pre(tre_last_matched_branch_pre_t
*branch
, int indent
,
78 tre_last_matched_pre_t
*u
= branch
->last_matched
;
79 int n_last_matched
= 0;
88 DPRINT(("BRANCH: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
89 branch
->tot_branches
, branch
->tot_last_matched
, branch
->tot_tags
));
91 DPRINT(("..n_last_matched=%d last_matched=%d\n", branch
->n_last_matched
,
93 if (branch
->n_last_matched
!= n_last_matched
)
94 DPRINT(("*** mismatch between n_last_matched and unions ***\n"));
95 if (branch
->cmp_tag
> 0)
98 const char *sep
= " tags=";
100 DPRINT(("..cmp_tag=%d n_tags=%d", branch
->cmp_tag
, branch
->n_tags
));
101 for (i
= 0; i
< num_tags
; i
++)
102 if (bit_test(branch
->tags
, i
))
104 DPRINT(("%s%d", sep
, i
));
110 u
= branch
->last_matched
;
114 print_last_matched_pre(u
, indent
, num_tags
);
120 print_last_matched_pre(tre_last_matched_pre_t
*lm
, int indent
, int num_tags
)
122 tre_last_matched_branch_pre_t
*b
= lm
->branches
;
131 print_indent(indent
);
132 DPRINT(("LAST_MATCHED: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
133 lm
->tot_branches
, lm
->tot_last_matched
, lm
->tot_tags
));
134 print_indent(indent
);
135 DPRINT(("..start_tag=%d n_branches=%d branches=%d\n", lm
->start_tag
,
136 lm
->n_branches
, n_branches
));
137 if (lm
->n_branches
!= n_branches
)
138 DPRINT(("*** mismatch between n and branches ***\n"));
144 print_last_match_branch_pre(b
, indent
, num_tags
);
149 static void print_last_matched(tre_last_matched_t
*lm
, int indent
);
151 print_last_match_branch(tre_last_matched_branch_t
*branch
, int indent
)
153 tre_last_matched_t
*u
;
156 print_indent(indent
);
157 DPRINT(("BRANCH: n_last_matched=%d\n", branch
->n_last_matched
));
158 if (branch
->cmp_tag
> 0)
160 print_indent(indent
);
161 DPRINT(("..cmp_tag=%d n_tags=%d", branch
->cmp_tag
, branch
->n_tags
));
162 if (branch
->n_tags
> 0)
164 const char *sep
= " tags=";
165 for (i
= 0; i
< branch
->n_tags
; i
++)
167 DPRINT(("%s%d", sep
, branch
->tags
[i
]));
174 u
= branch
->last_matched
;
176 for (i
= branch
->n_last_matched
; i
> 0; i
--, u
++)
177 print_last_matched(u
, indent
);
181 print_last_matched(tre_last_matched_t
*lm
, int indent
)
184 tre_last_matched_branch_t
*b
;
186 print_indent(indent
);
187 DPRINT(("LAST_MATCHED: n_branches=%d start_tag=%d\n", lm
->n_branches
,
192 for (i
= lm
->n_branches
; i
> 0; i
--, b
++)
193 print_last_match_branch(b
, indent
);
195 #endif /* TRE_DEBUG */
198 /* Merge the tre_last_matched_branch_pre_t of src into dst, creating a new
199 one if needed. If tag_id > 0, add that tag as well (a negative tag_id will
200 create an unset tre_last_matched_branch_pre_t. */
202 tre_merge_branches(tre_mem_t mem
, tre_ast_node_t
*dst
, tre_ast_node_t
*src
,
203 int tag_id
, int num_tags
)
205 tre_last_matched_branch_pre_t
*db
= dst
->last_matched_branch
;
206 tre_last_matched_branch_pre_t
*sb
= (src
? src
->last_matched_branch
: NULL
);
212 bitstr_t
*l
= db
->tags
;
213 bitstr_t
*r
= sb
->tags
;
214 int i
= bitstr_size(num_tags
);
218 /* db and sb are the info from two parallel sub-trees, so the tags
219 must be mutually exclusive, and we can just add their numbers */
220 db
->n_tags
+= sb
->n_tags
;
221 db
->tot_tags
+= sb
->tot_tags
;
222 if (db
->last_matched
)
224 if (sb
->last_matched
)
226 tre_last_matched_pre_t
*u
= db
->last_matched
;
230 u
->next
= sb
->last_matched
;
231 db
->n_last_matched
+= sb
->n_last_matched
;
232 db
->tot_branches
+= sb
->tot_branches
;
233 db
->tot_last_matched
+= sb
->tot_last_matched
;
236 else if (sb
->last_matched
)
238 db
->last_matched
= sb
->last_matched
;
239 db
->n_last_matched
= sb
->n_last_matched
;
240 db
->tot_branches
= sb
->tot_branches
;
241 db
->tot_last_matched
= sb
->tot_last_matched
;
252 db
= tre_mem_calloc(mem
, sizeof(tre_last_matched_branch_pre_t
)
253 + bitstr_size(num_tags
));
256 db
->tot_branches
= 1;
260 /* tag_id is a new tag, and shouldn't exist in db's tags,
261 so we can always increment n_tags */
262 bit_set(db
->tags
, tag_id
);
267 dst
->last_matched_branch
= db
;
272 /* Inserts a catenation node to the root of the tree given in `node'.
273 As the left child a new tag with number `tag_id' to `node' is added,
274 and the right child is the old root. */
276 tre_add_tag_left(tre_mem_t mem
, tre_ast_node_t
*node
, int tag_id
)
280 DPRINT(("add_tag_left: tag %d\n", tag_id
));
282 c
= tre_mem_alloc(mem
, sizeof(*c
));
285 c
->left
= tre_ast_new_literal(mem
, TAG
, tag_id
, -1);
288 c
->right
= tre_mem_calloc(mem
, sizeof(tre_ast_node_t
));
289 if (c
->right
== NULL
)
292 c
->right
->obj
= node
->obj
;
293 c
->right
->type
= node
->type
;
294 c
->right
->last_matched_branch
= node
->last_matched_branch
;
295 c
->right
->nullable
= -1;
296 c
->right
->submatch_id
= -1;
298 node
->type
= CATENATION
;
299 node
->original
= c
->right
;
303 /* Inserts a catenation node to the root of the tree given in `node'.
304 As the right child a new tag with number `tag_id' to `node' is added,
305 and the left child is the old root. */
307 tre_add_tag_right(tre_mem_t mem
, tre_ast_node_t
*node
, int tag_id
)
311 DPRINT(("tre_add_tag_right: tag %d\n", tag_id
));
313 c
= tre_mem_alloc(mem
, sizeof(*c
));
316 c
->right
= tre_ast_new_literal(mem
, TAG
, tag_id
, -1);
317 if (c
->right
== NULL
)
319 c
->left
= tre_mem_calloc(mem
, sizeof(tre_ast_node_t
));
323 c
->left
->obj
= node
->obj
;
324 c
->left
->type
= node
->type
;
325 c
->left
->last_matched_branch
= node
->last_matched_branch
;
326 c
->left
->nullable
= -1;
327 c
->left
->submatch_id
= -1;
329 node
->type
= CATENATION
;
330 node
->original
= c
->left
;
336 ADDTAGS_RECURSE_NOT_TOP_UNION
,
337 ADDTAGS_AFTER_ITERATION
,
338 ADDTAGS_AFTER_UNION_LEFT
,
339 ADDTAGS_AFTER_UNION_RIGHT
,
340 ADDTAGS_AFTER_CAT_LEFT
,
341 ADDTAGS_AFTER_CAT_RIGHT
,
342 ADDTAGS_SET_SUBMATCH_END
,
343 ADDTAGS_UNION_RECURSE
,
344 ADDTAGS_UNION_RIGHT_RECURSE
,
345 ADDTAGS_AFTER_UNION_TOP
,
346 } tre_addtags_symbol_t
;
349 COPY_LAST_MATCHED_BRANCH
,
350 COPY_LAST_MATCHED_BRANCH_NEXT
,
352 COPY_LAST_MATCHED_NEXT
,
356 #define REGSET_UNSET ((unsigned)-1)
358 /* Go through `regset' and set submatch data for submatches that are
361 tre_purge_regset(unsigned *regset
, tre_tnfa_t
*tnfa
, int tag
)
365 for (i
= 0; regset
[i
] != REGSET_UNSET
; i
++)
367 int id
= regset
[i
] / 2;
368 int start
= !(regset
[i
] % 2);
369 if (id
>= SUBMATCH_ID_INVISIBLE_START
)
371 DPRINT((" Using tag %d for %s offset of "
372 "submatch %d\n", tag
,
373 start
? "start" : "end", id
));
375 tnfa
->submatch_data
[id
].so_tag
= tag
;
377 tnfa
->submatch_data
[id
].eo_tag
= tag
;
383 #define REGSET_HAS_STARTS 0x1
384 #define REGSET_HAS_ENDS 0x2
387 /* Adds tags to appropriate locations in the parse tree in `tree', so that
388 subexpressions marked for submatch addressing can be traced. */
390 tre_add_tags(tre_mem_t mem
, tre_stack_t
*stack
, tre_ast_node_t
*tree
,
393 reg_errcode_t status
= REG_OK
;
394 tre_addtags_symbol_t symbol
;
395 tre_ast_node_t
*node
= tree
; /* Tree node we are currently looking at. */
396 int bottom
= tre_stack_num_objects(stack
);
397 /* True for first pass (counting number of needed tags) */
398 int first_pass
= (mem
== NULL
|| tnfa
== NULL
);
399 unsigned *regset
, *orig_regset
;
400 int regset_contains
= 0;
401 int num_tags
= 0; /* Total number of tags. */
402 int num_minimals
= 0; /* Number of special minimal tags. */
403 int tag
= 0; /* The tag that is to be added next. */
404 int next_tag
= 1; /* Next tag to use after this one. */
405 int minimal_tag
= -1; /* Tag that marks the beginning of a minimal match. */
406 int *reorder_tags
= NULL
; /* Tag reorder array: a pair for each reorder,
407 * the first is the tag to reorder, the second
408 * is the tag after which the first is reordered */
409 int *rtp
; /* Pointer used to fill in reorder_tags and
411 int *to_reorder
; /* Transform array converting sequential order to
412 * that specified by reorder_tags */
415 tre_tag_direction_t direction
= TRE_TAG_LEFT_MAXIMIZE
;
418 DPRINT(("Initializing direction to %s\n", tag_dir_str
[direction
]));
420 tnfa
->minimal_tags
[0] = -1;
423 regset
= xmalloc(sizeof(*regset
) * ((tnfa
->num_submatches
424 + tnfa
->num_submatches_invisible
+ 1) * 2));
430 regset
[0] = REGSET_UNSET
;
431 orig_regset
= regset
;
435 /* Allocate all memory for reorder_tags, tag_order, to_seq_order and
436 * to_reorder in one batch (assuming all are the same type) */
437 rtp
= reorder_tags
= xmalloc(sizeof(*reorder_tags
) *
438 ((2 * tnfa
->num_reorder_tags
+ 1) +
440 if (reorder_tags
== NULL
)
443 goto error_reorder_tags
;
445 to_reorder
= reorder_tags
+ (2 * tnfa
->num_reorder_tags
+ 1);
448 STACK_PUSH(stack
, voidptr
, node
);
449 STACK_PUSH(stack
, int, ADDTAGS_RECURSE
);
451 while (tre_stack_num_objects(stack
) > bottom
)
453 if (status
!= REG_OK
)
456 symbol
= (tre_addtags_symbol_t
)tre_stack_pop_int(stack
);
461 case ADDTAGS_SET_SUBMATCH_END
:
465 id
= tre_stack_pop_int(stack
);
466 node
= tre_stack_pop_voidptr(stack
);
467 /* Add end of this submatch to regset. */
468 for (i
= 0; regset
[i
] != REGSET_UNSET
; i
++);
469 regset
[i
] = id
* 2 + 1;
471 regset_contains
|= REGSET_HAS_ENDS
;
473 /* Always put a tag after a minimal iterator. */
474 if (minimal_tag
>= 0)
479 DPRINT((" ADDTAGS_SET_SUBMATCH_END: node->num_tags = %d\n",
485 status
= tre_merge_branches(mem
, node
, NULL
, tag
,
487 if (status
!= REG_OK
)
489 status
= tre_add_tag_right(mem
, node
, tag
);
490 if (status
!= REG_OK
)
492 tnfa
->tag_directions
[tag
] = TRE_TAG_MINIMIZE
;
493 DPRINT(("Setting t%d direction to %s\n", tag
,
494 tag_dir_str
[tnfa
->tag_directions
[tag
]]));
495 DPRINT(("Minimal %d, %d\n", minimal_tag
, tag
));
496 for (i
= 0; tnfa
->minimal_tags
[i
] >= 0; i
++);
497 tnfa
->minimal_tags
[i
] = tag
;
498 tnfa
->minimal_tags
[i
+ 1] = minimal_tag
;
499 tnfa
->minimal_tags
[i
+ 2] = -1;
501 DPRINT((" Minimal end: t%d reordered to "
502 "after t%d\n", tag
, minimal_tag
));
503 /* Append to tag_order, move "tag" after
506 *rtp
++ = minimal_tag
;
509 tre_purge_regset(regset
, tnfa
, tag
);
513 DPRINT((" ADDTAGS_SET_SUBMATCH_END num_tags++ tag=%d\n", tag
));
514 regset
[0] = REGSET_UNSET
;
523 case ADDTAGS_RECURSE_NOT_TOP_UNION
:
524 /* Like ADDTAGS_RECURSE, except that top_union is set to zero,
525 * indicating that if a union is being processed, it is not the
526 * top-most of a series */
528 goto do_addtags_recurse
;
530 case ADDTAGS_RECURSE
:
531 /* Setting top_union to 1 means that if a union is begin processed,
532 * it is the top-most of a series, and should recurse through the
533 * series to set the left_tag and right_tag values */
537 node
= tre_stack_pop_voidptr(stack
);
539 id
= node
->submatch_id
;
545 /* Add start of this submatch to regset. */
546 for (i
= 0; regset
[i
] != REGSET_UNSET
; i
++);
549 regset_contains
|= REGSET_HAS_STARTS
;
551 /* Add end of this submatch to regset after processing this
553 STACK_PUSH(stack
, voidptr
, node
);
554 STACK_PUSHX(stack
, int, id
);
555 STACK_PUSHX(stack
, int, ADDTAGS_SET_SUBMATCH_END
);
562 tre_literal_t
*lit
= node
->obj
;
564 if (!IS_SPECIAL(lit
) || IS_BACKREF(lit
) || IS_EMPTY(lit
) || IS_ASSERTION(lit
))
566 DPRINT(("Literal %d-%d\n",
567 (int)lit
->code_min
, (int)lit
->code_max
));
570 /* Regset is not empty, so add a tag before the
571 literal or backref. */
574 DPRINT((" ADDTAGS_RECURSE:LITERAL node->num_tags = 1\n"));
579 status
= tre_merge_branches(mem
, node
, NULL
, tag
,
581 if (status
!= REG_OK
)
583 status
= tre_add_tag_left(mem
, node
, tag
);
584 if (status
!= REG_OK
)
586 if (regset_contains
== REGSET_HAS_STARTS
)
587 tnfa
->tag_directions
[tag
] = TRE_TAG_LEFT_MAXIMIZE
;
589 tnfa
->tag_directions
[tag
] = direction
;
590 DPRINT(("Setting t%d direction to %s\n", tag
,
591 tag_dir_str
[tnfa
->tag_directions
[tag
]]));
592 tre_purge_regset(regset
, tnfa
, tag
);
596 int b
= lit
->code_max
;
597 int t
= tnfa
->submatch_data
[b
].so_tag
;
598 /* Fail if the referenced submatch hasn't been
600 if (tnfa
->submatch_data
[b
].eo_tag
< 0)
602 status
= REG_ESUBREG
;
607 DPRINT((" Backref %d start: "
608 "t%d reordered to before t%d\n",
612 /* Append to tag_order, move "tag" after
619 DPRINT((" Backref %d start: "
620 "(t%d already before t%d)\n",
622 #endif /* TRE_DEBUG */
626 DPRINT((" ADDTAGS_RECURSE:LITERAL num_tags++ tag=%d\n",
628 regset
[0] = REGSET_UNSET
;
637 assert(!IS_TAG(lit
));
643 tre_catenation_t
*cat
= node
->obj
;
644 tre_ast_node_t
*left
= cat
->left
;
645 tre_ast_node_t
*right
= cat
->right
;
646 int reserved_tag
= -1;
647 DPRINT(("Catenation, next_tag = %d\n", next_tag
));
650 /* After processing right child. */
651 STACK_PUSHX(stack
, voidptr
, node
);
652 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_CAT_RIGHT
);
654 /* Process right child. */
655 STACK_PUSHX(stack
, voidptr
, right
);
656 STACK_PUSHX(stack
, int, ADDTAGS_RECURSE
);
658 /* After processing left child. */
659 STACK_PUSHX(stack
, int, next_tag
+ left
->num_tags
);
660 DPRINT((" Pushing %d for after left\n",
661 next_tag
+ left
->num_tags
));
662 if (left
->num_tags
> 0 && right
->num_tags
> 0)
664 /* Reserve the next tag to the right child. */
665 DPRINT((" ADDTAGS_RECURSE:CATENATION num_tags++ "
666 "Reserving next_tag %d to right child\n",
668 reserved_tag
= next_tag
;
671 STACK_PUSHX(stack
, int, reserved_tag
);
672 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_CAT_LEFT
);
674 /* Process left child. */
675 STACK_PUSHX(stack
, voidptr
, left
);
676 STACK_PUSHX(stack
, int, ADDTAGS_RECURSE
);
682 tre_iteration_t
*iter
= node
->obj
;
683 DPRINT(("Iteration\n"));
686 STACK_PUSHX(stack
, int, regset_contains
!= 0);
687 STACK_PUSHX(stack
, int, tag
);
688 STACK_PUSHX(stack
, voidptr
, node
);
689 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_ITERATION
);
691 STACK_PUSHX(stack
, voidptr
, iter
->arg
);
692 STACK_PUSHX(stack
, int, ADDTAGS_RECURSE
);
694 /* Regset is not empty, so add a tag here (this always happens
695 because iterators always get submatch id, even if in the
701 status
= tre_merge_branches(mem
, node
, NULL
, tag
,
703 if (status
!= REG_OK
)
705 status
= tre_add_tag_left(mem
, node
, tag
);
706 if (status
!= REG_OK
)
708 if (regset_contains
== REGSET_HAS_STARTS
&& tag
!= 0)
709 tnfa
->tag_directions
[tag
] = iter
->minimal
?
711 TRE_TAG_LEFT_MAXIMIZE
;
713 tnfa
->tag_directions
[tag
] = direction
;
714 DPRINT(("Setting t%d direction to %s\n", tag
,
715 tag_dir_str
[tnfa
->tag_directions
[tag
]]));
716 tre_purge_regset(regset
, tnfa
, tag
);
719 DPRINT((" ADDTAGS_RECURSE:ITERATION num_tags++ tag=%d\n",
721 regset
[0] = REGSET_UNSET
;
727 direction
= TRE_TAG_LEFT_MAXIMIZE
;
728 DPRINT((" Setting direction to %s\n", tag_dir_str
[direction
]));
734 tre_ast_node_t
*left
;
735 tre_ast_node_t
*right
;
742 DPRINT((" UNION num_tags++ tag=%d\n", tag
));
749 /* For the top union, walk the tree of consecutive unions,
750 * setting the left_tag and right_tag values in increasing
751 * order (left to right priority) */
753 (node
->num_submatches
-
754 (node
->submatch_id
>= 0 &&
755 node
->submatch_id
< SUBMATCH_ID_INVISIBLE_START
)) > 0)
758 int last
= tre_stack_num_objects(stack
);
760 STACK_PUSH(stack
, voidptr
, node
);
761 STACK_PUSH(stack
, int, ADDTAGS_UNION_RECURSE
);
763 while (tre_stack_num_objects(stack
) > last
)
765 symbol
= (tre_addtags_symbol_t
)tre_stack_pop_int(stack
);
768 case ADDTAGS_UNION_RECURSE
:
769 n
= tre_stack_pop_voidptr(stack
);
773 /* Since the top union has num_submatches > 0,
774 * we set all the consecutive union's
775 * make_branches to 1 to force the generation
776 * of end tags for each union branch. */
777 n
->make_branches
= 1;
779 STACK_PUSH(stack
, voidptr
, n
);
780 STACK_PUSH(stack
, int,
781 ADDTAGS_UNION_RIGHT_RECURSE
);
783 if (left
->type
== UNION
)
785 STACK_PUSH(stack
, voidptr
, left
);
786 STACK_PUSH(stack
, int,
787 ADDTAGS_UNION_RECURSE
);
791 DPRINT((" ADDTAGS_UNION_RECURSE "
792 "num_tags++ tag=%d\n", tag
));
800 case ADDTAGS_UNION_RIGHT_RECURSE
:
801 n
= tre_stack_pop_voidptr(stack
);
805 if (right
->type
== UNION
)
807 STACK_PUSH(stack
, voidptr
, right
);
808 STACK_PUSH(stack
, int,
809 ADDTAGS_UNION_RECURSE
);
813 DPRINT((" ADDTAGS_UNION_RIGHT_RECURSE "
814 "num_tags++ tag=%d\n", tag
));
815 uni
->right_tag
= tag
;
827 } /* end switch(symbol) */
828 } /* end while(tre_stack_num_objects(stack) > last */
831 STACK_PUSHX(stack
, int, front_tag
);
832 STACK_PUSHX(stack
, voidptr
, node
);
833 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_UNION_TOP
);
835 } /* end if (top_union && ...) */
841 /* After processing right child. */
842 STACK_PUSHX(stack
, voidptr
, regset
);
843 STACK_PUSHX(stack
, int, regset_contains
!= 0);
844 STACK_PUSHX(stack
, voidptr
, node
);
845 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_UNION_RIGHT
);
847 /* Process right child. */
848 STACK_PUSHX(stack
, voidptr
, right
);
849 STACK_PUSHX(stack
, int, ADDTAGS_RECURSE_NOT_TOP_UNION
);
851 /* After processing left child. */
852 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_UNION_LEFT
);
854 /* Process left child. */
855 STACK_PUSHX(stack
, voidptr
, left
);
856 STACK_PUSHX(stack
, int, ADDTAGS_RECURSE_NOT_TOP_UNION
);
858 /* Regset is not empty, so add a tag here. */
863 status
= tre_merge_branches(mem
, node
, NULL
, front_tag
,
865 if (status
!= REG_OK
)
867 status
= tre_add_tag_left(mem
, node
, front_tag
);
868 if (status
!= REG_OK
)
870 if (regset_contains
== REGSET_HAS_STARTS
)
871 tnfa
->tag_directions
[front_tag
] = TRE_TAG_LEFT_MAXIMIZE
;
873 tnfa
->tag_directions
[front_tag
] = direction
;
874 DPRINT(("Setting t%d direction to %s\n", front_tag
,
875 tag_dir_str
[tnfa
->tag_directions
[front_tag
]]));
876 tre_purge_regset(regset
, tnfa
, front_tag
);
879 regset
[0] = REGSET_UNSET
;
885 } /* end switch (node->type) */
887 break; /* end case: ADDTAGS_RECURSE */
889 case ADDTAGS_AFTER_ITERATION
:
891 tre_iteration_t
*iter
;
892 tre_ast_node_t
*orig
;
895 node
= tre_stack_pop_voidptr(stack
);
896 orig
= node
->original
? node
->original
: node
;
897 iter
= (tre_iteration_t
*)orig
->obj
;
898 enter_tag
= tre_stack_pop_int(stack
);
900 minimal_tag
= enter_tag
;
902 DPRINT(("After iteration\n"));
905 node
->num_tags
= iter
->arg
->num_tags
+ tre_stack_pop_int(stack
);
906 DPRINT((" ADDTAGS_AFTER_ITERATION: node->num_tags = %d\n",
911 /* node->last_matched_branch will have the start tag (the tag
912 just *before* the iteration). iter->arg->last_matched_branch
913 will have the tag(s) inside the iteration, the ones that
914 may need to be reset if the iteration doesn't match. So
915 before we merge iter->arg into node, we need to set up
916 a new tre_last_matched_t and tre_last_matched_branch_t,
917 using any of the inside tags as cmp_tag (we choose the first
918 tag found by bit_ffs). If there are no inside tags, we
919 don't bother creating the extra structures. */
920 tre_last_matched_branch_pre_t
*b
=
921 iter
->arg
->last_matched_branch
;
923 if (b
&& b
->n_tags
> 0)
925 tre_last_matched_pre_t
*u
;
927 bit_ffs(b
->tags
, num_tags
, &b
->cmp_tag
);
928 DPRINT((" ADDTAGS_AFTER_ITERATION: n_tags=%d "
929 "cmp_tag = %d\n", b
->n_tags
, b
->cmp_tag
));
931 u
= tre_mem_calloc(mem
, sizeof(tre_last_matched_pre_t
) +
932 sizeof(tre_last_matched_branch_pre_t
)
933 + bitstr_size(tnfa
->num_tags
));
941 u
->start_tag
= b
->cmp_tag
;
942 u
->tot_branches
= b
->tot_branches
;
943 u
->tot_last_matched
= 1 + b
->tot_last_matched
;
944 u
->tot_tags
= b
->tot_tags
;
946 b
= (tre_last_matched_branch_pre_t
*)(u
+ 1);
948 b
->n_last_matched
= 1;
949 b
->tot_branches
= 1 + u
->tot_branches
;
950 b
->tot_last_matched
= u
->tot_last_matched
;
951 b
->tot_tags
= u
->tot_tags
;
953 iter
->arg
->last_matched_branch
= b
;
955 status
= tre_merge_branches(mem
, node
, iter
->arg
, 0,
957 if (status
!= REG_OK
)
962 /* Add a union with a left EMPTY literal and the right
963 being iter->arg. This should force the tags inside
964 the minimal iteration to prefer being unset */
965 if (iter
->min
== 0 && iter
->max
<= 1)
967 tre_ast_node_t
*u
, *e
;
969 e
= tre_ast_new_literal(mem
, EMPTY
, -1, -1);
975 u
= tre_ast_new_union(mem
, e
, iter
->arg
);
984 direction
= TRE_TAG_MINIMIZE
;
987 direction
= TRE_TAG_MAXIMIZE
;
988 DPRINT((" Setting direction to %s\n", tag_dir_str
[direction
]));
993 case ADDTAGS_AFTER_CAT_LEFT
:
995 int new_tag
= tre_stack_pop_int(stack
);
996 next_tag
= tre_stack_pop_int(stack
);
997 DPRINT(("After cat left, tag = %d, next_tag = %d\n",
1001 DPRINT((" Setting tag to %d\n", new_tag
));
1007 case ADDTAGS_AFTER_CAT_RIGHT
:
1009 tre_catenation_t
*cat
;
1011 DPRINT(("After cat right\n"));
1012 node
= tre_stack_pop_voidptr(stack
);
1016 node
->num_tags
= cat
->left
->num_tags
+ cat
->right
->num_tags
;
1017 DPRINT((" ADDTAGS_AFTER_CAT_RIGHT: node->num_tags = %d\n",
1022 status
= tre_merge_branches(mem
, cat
->left
, cat
->right
, 0,
1024 if (status
!= REG_OK
)
1026 status
= tre_merge_branches(mem
, node
, cat
->left
, 0,
1032 case ADDTAGS_AFTER_UNION_LEFT
:
1033 DPRINT(("After union left\n"));
1034 /* Lift the bottom of the `regset' array so that when processing
1035 the right operand the items currently in the array are
1036 invisible. The original bottom was saved at ADDTAGS_UNION and
1037 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1038 while (*regset
!= REGSET_UNSET
)
1040 regset_contains
= 0;
1043 case ADDTAGS_AFTER_UNION_RIGHT
:
1046 tre_ast_node_t
*orig
;
1048 /* Note: node may not be a UNION, but a CATENATION with a left
1049 * tag. So that is why we pass the original node->obj on the
1050 * stack, to get the union's true values. */
1052 DPRINT(("After union right\n"));
1053 node
= tre_stack_pop_voidptr(stack
);
1054 orig
= node
->original
? node
->original
: node
;
1055 uni
= (tre_union_t
*)orig
->obj
;
1056 added_tags
= tre_stack_pop_int(stack
);
1059 node
->num_tags
= uni
->left
->num_tags
+ uni
->right
->num_tags
1061 if (uni
->left_tag
> 0)
1063 if (uni
->right_tag
> 0)
1065 DPRINT((" ADDTAGS_AFTER_UNION_RIGHT: node->num_tags = %d\n",
1068 regset
= tre_stack_pop_voidptr(stack
);
1070 /* Add tags after both children, the left child gets a smaller
1071 tag than the right child. This guarantees that we prefer
1072 the left child over the right child. */
1073 /* XXX - This is not always necessary (if the children have
1074 tags which must be seen for every match of that child). */
1075 if (!first_pass
&& node
->make_branches
)
1077 tre_last_matched_branch_pre_t
*lb
=
1078 uni
->left
->last_matched_branch
;
1079 tre_last_matched_branch_pre_t
*rb
=
1080 uni
->right
->last_matched_branch
;
1081 tre_last_matched_pre_t
*lu
=
1082 uni
->left
->last_matched_in_progress
;
1083 tre_last_matched_pre_t
*ru
=
1084 uni
->right
->last_matched_in_progress
;
1085 tre_last_matched_pre_t
*u
;
1086 /* We don't need to call tre_merge_branches because these
1087 * tags don't participate in submatch ranges, so don't need
1088 * to be recorded. But we do set the cmp_tag entry of the
1089 * tre_last_matched_branch_pre_t, so we might call
1090 * tre_merge_branches if we need to create an empty
1091 * tre_last_matched_branch_pre_t. */
1092 if (uni
->left_tag
> 0)
1094 DPRINT(("Setting t%d direction to maximize\n",
1096 status
= tre_add_tag_right(mem
, uni
->left
, uni
->left_tag
);
1097 if (status
!= REG_OK
)
1099 tnfa
->tag_directions
[uni
->left_tag
] = TRE_TAG_MAXIMIZE
;
1102 status
= tre_merge_branches(mem
, uni
->left
, NULL
, -1,
1104 if (status
!= REG_OK
)
1106 lb
= uni
->left
->last_matched_branch
;
1108 lb
->cmp_tag
= uni
->left_tag
;
1110 if (uni
->right_tag
> 0)
1112 DPRINT(("Setting t%d direction to maximize\n",
1114 status
= tre_add_tag_right(mem
, uni
->right
, uni
->right_tag
);
1115 if (status
!= REG_OK
)
1117 tnfa
->tag_directions
[uni
->right_tag
] = TRE_TAG_MAXIMIZE
;
1120 status
= tre_merge_branches(mem
, uni
->right
, NULL
, -1,
1122 if (status
!= REG_OK
)
1124 rb
= uni
->right
->last_matched_branch
;
1126 rb
->cmp_tag
= uni
->right_tag
;
1128 /* Now merge the tre_last_matched_branch_pre_t into a
1129 tre_last_matched_pre_t */
1134 /* Create a new tre_last_matched_pre_t */
1135 u
= tre_mem_calloc(mem
, sizeof(tre_last_matched_pre_t
));
1138 status
= REG_ESPACE
;
1141 u
->tot_last_matched
= 1;
1147 u
->tot_branches
+= lb
->tot_branches
;
1148 u
->tot_last_matched
+= lb
->tot_last_matched
;
1149 u
->tot_tags
+= lb
->tot_tags
;
1154 u
->tot_branches
+= rb
->tot_branches
;
1155 u
->tot_last_matched
+= rb
->tot_last_matched
;
1156 u
->tot_tags
+= rb
->tot_tags
;
1163 u
->tot_branches
+= rb
->tot_branches
;
1164 u
->tot_last_matched
+= rb
->tot_last_matched
;
1165 u
->tot_tags
+= rb
->tot_tags
;
1170 /* Use ru, and add lb */
1174 lb
->next
= u
->branches
;
1177 u
->tot_branches
+= lb
->tot_branches
;
1178 u
->tot_last_matched
+= lb
->tot_last_matched
;
1179 u
->tot_tags
+= lb
->tot_tags
;
1183 else if (ru
== NULL
)
1185 /* Use lu, and add rb */
1189 rb
->next
= u
->branches
;
1192 u
->tot_branches
+= rb
->tot_branches
;
1193 u
->tot_last_matched
+= rb
->tot_last_matched
;
1194 u
->tot_tags
+= rb
->tot_tags
;
1199 /* Merge lu and ru into lu */
1204 tre_last_matched_branch_pre_t
*b
= lu
->branches
;
1205 while (b
->next
) b
= b
->next
;
1206 b
->next
= ru
->branches
;
1207 lu
->n_branches
+= ru
->n_branches
;
1210 else if (ru
->branches
)
1212 lu
->branches
= ru
->branches
;
1213 lu
->n_branches
= ru
->n_branches
;
1215 lu
->tot_branches
+= ru
->tot_branches
;
1216 lu
->tot_last_matched
+= ru
->tot_last_matched
- 1;
1217 lu
->tot_tags
+= ru
->tot_tags
;
1220 node
->last_matched_in_progress
= u
;
1222 direction
= TRE_TAG_MAXIMIZE
;
1226 case ADDTAGS_AFTER_UNION_TOP
: /* only called when not first_pass */
1228 tre_last_matched_branch_pre_t
*b
;
1229 tre_last_matched_pre_t
*u
;
1232 DPRINT(("After union top\n"));
1233 node
= tre_stack_pop_voidptr(stack
);
1234 start_tag
= tre_stack_pop_int(stack
);
1235 b
= tre_mem_calloc(mem
, sizeof(tre_last_matched_branch_pre_t
)
1236 + bitstr_size(tnfa
->num_tags
));
1239 status
= REG_ESPACE
;
1243 u
= node
->last_matched_in_progress
;
1244 u
->start_tag
= start_tag
;
1245 b
->tot_branches
= 1 + u
->tot_branches
;
1246 b
->tot_last_matched
= u
->tot_last_matched
;
1247 b
->tot_tags
= u
->tot_tags
;
1248 b
->last_matched
= u
;
1249 b
->n_last_matched
= 1;
1250 node
->last_matched_branch
= b
;
1251 node
->last_matched_in_progress
= NULL
;
1259 } /* end switch(symbol) */
1260 } /* end while(tre_stack_num_objects(stack) > bottom) */
1262 if (status
!= REG_OK
)
1264 DPRINT(("Error during %s pass\n", first_pass
? "first" : "second"));
1265 goto error_post_compile
;
1271 if (num_tags
!= tnfa
->num_tags
)
1273 DPRINT(("num_tags(%d) != tnfa->num_tags(%d)\n", num_tags
,
1275 status
= REG_BADPAT
;
1276 goto error_post_compile
;
1279 tre_purge_regset(regset
, tnfa
, tag
);
1280 DPRINT(("Setting t%d to %s\n", num_tags
,
1281 tag_dir_str
[direction
]));
1282 tnfa
->tag_directions
[num_tags
] = direction
;
1284 if (rtp
> reorder_tags
+ 2 * tnfa
->num_reorder_tags
)
1286 DPRINT(("Processed %d reorder tags instead of %d\n",
1287 (int)(rtp
- reorder_tags
) / 2, tnfa
->num_reorder_tags
));
1288 status
= REG_BADPAT
;
1289 goto error_post_compile
;
1293 if (reorder_tags
[0] >= 0)
1295 DPRINT(("reorder_tags:\n"));
1296 for (rtp
= reorder_tags
; *rtp
>= 0;)
1298 DPRINT(("%d after ", *rtp
++));
1299 DPRINT(("%d\n", *rtp
++));
1303 DPRINT(("No reorder_tags\n"));
1304 #endif /* TRE_DEBUG */
1306 /* Initialize to_reorder */
1307 for (i
= 0; i
< num_tags
; i
++)
1309 /* Use to_seq_order to convert reorder_tags values, and use those to
1310 * reorder to_reorder */
1311 for (rtp
= reorder_tags
; *rtp
>= 0;)
1316 /* Skip reordering the final tag */
1319 DPRINT(("Skipping reorder of %d\n", ti
));
1323 /* The number of the tag to reorder */
1324 high
= to_reorder
[ti
];
1325 /* Reorder after this tag */
1326 low
= to_reorder
[*rtp
++];
1328 DPRINT(("ti=%d high=%d low=%d\n", ti
, high
, low
));
1331 DPRINT(("Tag %d already before %d\n", high
, low
));
1334 for (j
= 0; j
< num_tags
; j
++)
1335 if (to_reorder
[j
] > low
&& to_reorder
[j
] < high
)
1337 to_reorder
[ti
] = low
+ 1;
1339 DPRINT(("to_reorder=("));
1340 for (j
= 0; j
< num_tags
; j
++)
1342 DPRINT(("%d", to_reorder
[j
]));
1343 if (j
< num_tags
- 1)
1347 #endif /* TRE_DEBUG */
1349 /* Determine if reordering in really necessary */
1351 int need_reorder
= 0;
1352 for (i
= 0; i
< num_tags
; i
++)
1353 if(to_reorder
[i
] != i
)
1358 /* If need_reorder is not set, free reorder_tags, and set to NULL,
1359 * indicating no reordering is needed */
1362 DPRINT(("Don't need to reorder\n"));
1363 xfree(reorder_tags
);
1364 reorder_tags
= NULL
;
1372 tre_tag_direction_t
*new_tag_directions
;
1374 DPRINT(("to_reorder:"));
1375 for (i
= 0; i
< num_tags
; i
++)
1376 DPRINT((" %d->%d", i
, to_reorder
[i
]));
1378 #endif /* TRE_DEBUG */
1380 DPRINT(("Reordering submatch_data\n"));
1381 for (i
= 0; i
< tnfa
->num_submatches
; i
++)
1384 int so
= tnfa
->submatch_data
[i
].so_tag
;
1385 int eo
= tnfa
->submatch_data
[i
].eo_tag
;
1386 #endif /* TRE_DEBUG */
1387 tnfa
->submatch_data
[i
].so_tag
=
1388 to_reorder
[tnfa
->submatch_data
[i
].so_tag
];
1389 tnfa
->submatch_data
[i
].eo_tag
=
1390 tnfa
->submatch_data
[i
].eo_tag
< num_tags
?
1391 to_reorder
[tnfa
->submatch_data
[i
].eo_tag
] :
1392 tnfa
->submatch_data
[i
].eo_tag
;
1393 DPRINT(("pmatch[%d]: {%d, %d}->{%d, %d}\n", i
, so
, eo
,
1394 tnfa
->submatch_data
[i
].so_tag
,
1395 tnfa
->submatch_data
[i
].eo_tag
));
1398 DPRINT(("Reordering tag_directions\n"));
1399 /* We only allocate num_tags directions and reorder them. The
1400 * num_tags-th direction (end tag) is left unchanged. */
1401 new_tag_directions
= xmalloc(sizeof(*new_tag_directions
) * num_tags
);
1402 if (new_tag_directions
== NULL
)
1404 status
= REG_ESPACE
;
1405 goto error_post_compile
;
1407 for (i
= 0; i
< num_tags
; i
++)
1409 new_tag_directions
[to_reorder
[i
]] = tnfa
->tag_directions
[i
];
1412 for (i
= 0; i
< num_tags
; i
++)
1414 DPRINT(("t%d %s->%s\n", i
,
1415 tag_dir_str
[tnfa
->tag_directions
[i
]],
1416 tag_dir_str
[new_tag_directions
[i
]]));
1418 DPRINT(("t%d %s->%s\n", num_tags
,
1419 tag_dir_str
[tnfa
->tag_directions
[num_tags
]],
1420 tag_dir_str
[tnfa
->tag_directions
[num_tags
]]));
1421 #endif /* TRE_DEBUG */
1422 memcpy(tnfa
->tag_directions
, new_tag_directions
, sizeof(*new_tag_directions
) * num_tags
);
1423 xfree(new_tag_directions
);
1425 DPRINT(("Reordering minimal_tags\n"));
1426 for (i
= 0; tnfa
->minimal_tags
[i
] >= 0; i
++)
1427 tnfa
->minimal_tags
[i
] = tnfa
->minimal_tags
[i
] < num_tags
?
1428 to_reorder
[tnfa
->minimal_tags
[i
]] :
1429 tnfa
->minimal_tags
[i
];
1431 DPRINT(("Reordering AST tags\n"));
1432 STACK_PUSH(stack
, voidptr
, tree
);
1433 while (status
== REG_OK
&& tre_stack_num_objects(stack
) > bottom
)
1435 node
= tre_stack_pop_voidptr(stack
);
1441 tre_literal_t
*lit
= (tre_literal_t
*)node
->obj
;
1443 lit
->code_max
= to_reorder
[lit
->code_max
];
1449 tre_union_t
*uni
= (tre_union_t
*)node
->obj
;
1450 STACK_PUSHX(stack
, voidptr
, uni
->right
);
1451 STACK_PUSHX(stack
, voidptr
, uni
->left
);
1457 tre_catenation_t
*cat
= (tre_catenation_t
*)node
->obj
;
1458 STACK_PUSHX(stack
, voidptr
, cat
->right
);
1459 STACK_PUSHX(stack
, voidptr
, cat
->left
);
1465 tre_iteration_t
*iter
= (tre_iteration_t
*)node
->obj
;
1466 STACK_PUSHX(stack
, voidptr
, iter
->arg
);
1475 if (status
!= REG_OK
)
1477 DPRINT(("Error while reordering tags\n"));
1478 goto error_post_compile
;
1485 if (tree
->last_matched_branch
)
1487 tre_last_matched_branch_t
*buf
, *b
, *bb
;
1488 tre_last_matched_branch_pre_t
*bp
;
1489 tre_last_matched_t
*u
, *uu
;
1490 tre_last_matched_pre_t
*up
;
1494 tre_last_matched_branch_t
*_b
;
1495 tre_last_matched_t
*_u
;
1498 DPRINT(("last_match_branch_pre:\n"));
1499 print_last_match_branch_pre(tree
->last_matched_branch
, 0, num_tags
);
1500 #endif /* TRE_DEBUG */
1501 buf
= (tre_last_matched_branch_t
*)xcalloc(1,
1502 tree
->last_matched_branch
->tot_branches
1503 * sizeof(tre_last_matched_branch_t
) +
1504 tree
->last_matched_branch
->tot_last_matched
1505 * sizeof(tre_last_matched_t
) +
1506 tree
->last_matched_branch
->tot_tags
*
1510 status
= REG_ESPACE
;
1511 goto error_post_compile
;
1515 u
= (tre_last_matched_t
*)(b
+
1516 tree
->last_matched_branch
->tot_branches
);
1517 t
= (int *)(u
+ tree
->last_matched_branch
->tot_last_matched
);
1522 #endif /* TRE_DEBUG */
1523 DPRINT(("Copying info_pre to info\n"));
1524 STACK_PUSH(stack
, voidptr
, tree
->last_matched_branch
);
1525 STACK_PUSH(stack
, int, 1);
1526 STACK_PUSH(stack
, int, COPY_LAST_MATCHED_BRANCH
);
1528 while (status
== REG_OK
&& tre_stack_num_objects(stack
) > bottom
)
1530 switch (tre_stack_pop_int(stack
))
1532 case COPY_LAST_MATCHED_BRANCH
:
1533 i
= tre_stack_pop_int(stack
);
1534 /* The tre_last_matched_branch_pre_t * is still on the
1536 STACK_PUSHX(stack
, voidptr
, b
);
1537 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED_BRANCH_NEXT
);
1541 case COPY_LAST_MATCHED_BRANCH_NEXT
:
1542 bb
= tre_stack_pop_voidptr(stack
);
1543 bp
= tre_stack_pop_voidptr(stack
);
1544 bb
->n_last_matched
= bp
->n_last_matched
;
1545 bb
->cmp_tag
= bp
->cmp_tag
;
1549 n
= bb
->n_tags
= bp
->n_tags
;
1551 for (i
= 0; i
< num_tags
; i
++)
1552 if (bit_test(bp
->tags
, i
))
1561 STACK_PUSHX(stack
, voidptr
, bp
->next
);
1562 STACK_PUSHX(stack
, voidptr
, bb
+ 1);
1563 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED_BRANCH_NEXT
);
1565 if (bp
->n_last_matched
> 0)
1567 bb
->last_matched
= u
;
1568 STACK_PUSHX(stack
, voidptr
, bp
->last_matched
);
1569 STACK_PUSHX(stack
, int, bp
->n_last_matched
);
1570 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED
);
1574 case COPY_LAST_MATCHED
:
1575 i
= tre_stack_pop_int(stack
);
1576 /* The tre_last_matched_pre_t * is still on the stack */
1577 STACK_PUSHX(stack
, voidptr
, u
);
1578 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED_NEXT
);
1582 case COPY_LAST_MATCHED_NEXT
:
1583 uu
= tre_stack_pop_voidptr(stack
);
1584 up
= tre_stack_pop_voidptr(stack
);
1585 uu
->n_branches
= up
->n_branches
;
1587 uu
->start_tag
= up
->start_tag
;
1590 STACK_PUSHX(stack
, voidptr
, up
->next
);
1591 STACK_PUSHX(stack
, voidptr
, uu
+ 1);
1592 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED_NEXT
);
1594 STACK_PUSHX(stack
, voidptr
, up
->branches
);
1595 STACK_PUSHX(stack
, int, up
->n_branches
);
1596 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED_BRANCH
);
1600 if (status
!= REG_OK
)
1601 goto error_post_compile
;
1603 DPRINT(("last_matched_branch:\n"));
1604 print_last_match_branch(buf
, 0);
1605 if (b
!= _b
+ tree
->last_matched_branch
->tot_branches
)
1606 DPRINT(("b/%p != _b + tree->last_matched_branch->tot_branches/%p\n",
1607 b
, _b
+ tree
->last_matched_branch
->tot_branches
));
1608 if (u
!= _u
+ tree
->last_matched_branch
->tot_last_matched
)
1609 DPRINT(("u/%p != _u + "
1610 "tree->last_matched_branch->tot_last_matched/%p\n",
1611 u
, _u
+ tree
->last_matched_branch
->tot_last_matched
));
1612 if (t
!= _t
+ tree
->last_matched_branch
->tot_tags
)
1613 DPRINT(("t/%p != _t + tree->last_matched_branch->tot_tags/%p\n",
1614 t
, _t
+ tree
->last_matched_branch
->tot_tags
));
1615 #endif /* TRE_DEBUG */
1616 tnfa
->last_matched_branch
= buf
;
1620 DPRINT(("No last_match_branch_pre\n"));
1621 #endif /* TRE_DEBUG */
1624 DPRINT(("tre_add_tags: %s complete. Number of tags %d.\n",
1625 first_pass
? "First pass" : "Second pass", num_tags
));
1627 tre_ast_print(tree
);
1628 #endif /* TRE_DEBUG */
1629 DPRINT(("tre_add_tags: tree->num_tags=%d num_tags=%d\n", tree
->num_tags
,
1631 assert(tree
->num_tags
== num_tags
);
1632 tnfa
->end_tag
= num_tags
;
1633 tnfa
->num_tags
= num_tags
;
1634 tnfa
->num_minimals
= num_minimals
;
1636 xfree(reorder_tags
);
1646 AST to TNFA compilation routines.
1652 } tre_copyast_symbol_t
;
1654 /* Flags for tre_copy_ast(). */
1655 #define COPY_REMOVE_TAGS 1
1656 #define COPY_MAXIMIZE_FIRST_TAG 2
1658 static reg_errcode_t
1659 tre_copy_ast(tre_mem_t mem
, tre_stack_t
*stack
, tre_ast_node_t
*ast
,
1660 int flags
, int *pos_add
, tre_tag_direction_t
*tag_directions
,
1661 tre_ast_node_t
**copy
, int *max_pos
)
1663 reg_errcode_t status
= REG_OK
;
1664 int bottom
= tre_stack_num_objects(stack
);
1667 tre_ast_node_t
**result
= copy
;
1668 tre_copyast_symbol_t symbol
;
1670 STACK_PUSH(stack
, voidptr
, ast
);
1671 STACK_PUSH(stack
, int, COPY_RECURSE
);
1673 while (status
== REG_OK
&& tre_stack_num_objects(stack
) > bottom
)
1675 tre_ast_node_t
*node
;
1676 if (status
!= REG_OK
)
1679 symbol
= (tre_copyast_symbol_t
)tre_stack_pop_int(stack
);
1682 case COPY_SET_RESULT_PTR
:
1683 result
= tre_stack_pop_voidptr(stack
);
1686 node
= tre_stack_pop_voidptr(stack
);
1691 tre_literal_t
*lit
= node
->obj
;
1692 int pos
= lit
->position
;
1693 int min
= lit
->code_min
;
1694 int max
= lit
->code_max
;
1695 tre_bracket_match_list_t
*list
= !IS_SPECIAL(lit
) ?
1696 lit
->u
.bracket_match_list
:
1698 if (!IS_SPECIAL(lit
) || IS_BACKREF(lit
))
1700 /* XXX - e.g. [ab] has only one position but two
1701 nodes, so we are creating holes in the state space
1702 here. Not fatal, just wastes memory. */
1706 else if (IS_TAG(lit
) && (flags
& COPY_REMOVE_TAGS
))
1708 /* Change this tag to empty. */
1712 else if (IS_TAG(lit
) && (flags
& COPY_MAXIMIZE_FIRST_TAG
)
1715 /* Maximize the first tag. */
1716 if (tag_directions
[max
] == TRE_TAG_LEFT_MAXIMIZE
)
1717 tag_directions
[max
] = TRE_TAG_MAXIMIZE
;
1720 *result
= tre_ast_new_literal(mem
, min
, max
, pos
);
1721 if (*result
== NULL
)
1722 status
= REG_ESPACE
;
1727 if (!IS_SPECIAL(lit
))
1728 ((tre_literal_t
*)(*result
)->obj
)->u
.bracket_match_list
1734 tre_union_t
*uni
= node
->obj
;
1736 *result
= tre_ast_new_union(mem
, uni
->left
, uni
->right
);
1737 if (*result
== NULL
)
1739 status
= REG_ESPACE
;
1742 tmp
= (*result
)->obj
;
1743 result
= &tmp
->left
;
1744 STACK_PUSHX(stack
, voidptr
, uni
->right
);
1745 STACK_PUSHX(stack
, int, COPY_RECURSE
);
1746 STACK_PUSHX(stack
, voidptr
, &tmp
->right
);
1747 STACK_PUSHX(stack
, int, COPY_SET_RESULT_PTR
);
1748 STACK_PUSHX(stack
, voidptr
, uni
->left
);
1749 STACK_PUSHX(stack
, int, COPY_RECURSE
);
1754 tre_catenation_t
*cat
= node
->obj
;
1755 tre_catenation_t
*tmp
;
1756 *result
= tre_ast_new_catenation(mem
, cat
->left
, cat
->right
);
1757 if (*result
== NULL
)
1759 status
= REG_ESPACE
;
1762 tmp
= (*result
)->obj
;
1765 result
= &tmp
->left
;
1767 STACK_PUSHX(stack
, voidptr
, cat
->right
);
1768 STACK_PUSHX(stack
, int, COPY_RECURSE
);
1769 STACK_PUSHX(stack
, voidptr
, &tmp
->right
);
1770 STACK_PUSHX(stack
, int, COPY_SET_RESULT_PTR
);
1771 STACK_PUSHX(stack
, voidptr
, cat
->left
);
1772 STACK_PUSHX(stack
, int, COPY_RECURSE
);
1777 tre_iteration_t
*iter
= node
->obj
;
1778 STACK_PUSHX(stack
, voidptr
, iter
->arg
);
1779 STACK_PUSHX(stack
, int, COPY_RECURSE
);
1780 *result
= tre_ast_new_iter(mem
, iter
->arg
, iter
->min
,
1781 iter
->max
, iter
->minimal
);
1782 if (*result
== NULL
)
1784 status
= REG_ESPACE
;
1787 iter
= (*result
)->obj
;
1788 result
= &iter
->arg
;
1798 *pos_add
+= num_copied
;
1805 } tre_expand_ast_symbol_t
;
1807 /* Expands each iteration node that has a finite nonzero minimum or maximum
1808 iteration count to a catenated sequence of copies of the node. */
1809 static reg_errcode_t
1810 tre_expand_ast(tre_mem_t mem
, tre_stack_t
*stack
, tre_ast_node_t
*ast
,
1811 int *position
, tre_tag_direction_t
*tag_directions
,
1814 reg_errcode_t status
= REG_OK
;
1815 int bottom
= tre_stack_num_objects(stack
);
1817 int pos_add_total
= 0;
1820 /* Current approximate matching parameters. */
1821 int params
[TRE_PARAM_LAST
];
1822 /* Approximate parameter nesting level. */
1823 int params_depth
= 0;
1824 #endif /* TRE_APPROX */
1828 #endif /* TRE_APPROX */
1831 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
1832 params
[i
] = TRE_PARAM_DEFAULT
;
1833 #endif /* TRE_APPROX */
1835 STACK_PUSHR(stack
, voidptr
, ast
);
1836 STACK_PUSHR(stack
, int, EXPAND_RECURSE
);
1837 while (status
== REG_OK
&& tre_stack_num_objects(stack
) > bottom
)
1839 tre_ast_node_t
*node
;
1840 tre_expand_ast_symbol_t symbol
;
1842 if (status
!= REG_OK
)
1845 DPRINT(("pos_add %d\n", pos_add
));
1847 symbol
= (tre_expand_ast_symbol_t
)tre_stack_pop_int(stack
);
1848 node
= tre_stack_pop_voidptr(stack
);
1851 case EXPAND_RECURSE
:
1856 tre_literal_t
*lit
= node
->obj
;
1857 if (!IS_SPECIAL(lit
) || IS_BACKREF(lit
))
1859 lit
->position
+= pos_add
;
1860 if (lit
->position
> max_pos
)
1861 max_pos
= lit
->position
;
1867 tre_union_t
*uni
= node
->obj
;
1868 STACK_PUSHX(stack
, voidptr
, uni
->right
);
1869 STACK_PUSHX(stack
, int, EXPAND_RECURSE
);
1870 STACK_PUSHX(stack
, voidptr
, uni
->left
);
1871 STACK_PUSHX(stack
, int, EXPAND_RECURSE
);
1876 tre_catenation_t
*cat
= node
->obj
;
1877 STACK_PUSHX(stack
, voidptr
, cat
->right
);
1878 STACK_PUSHX(stack
, int, EXPAND_RECURSE
);
1879 STACK_PUSHX(stack
, voidptr
, cat
->left
);
1880 STACK_PUSHX(stack
, int, EXPAND_RECURSE
);
1885 tre_iteration_t
*iter
= node
->obj
;
1886 STACK_PUSHX(stack
, int, pos_add
);
1887 STACK_PUSHX(stack
, voidptr
, node
);
1888 STACK_PUSHX(stack
, int, EXPAND_AFTER_ITER
);
1889 STACK_PUSHX(stack
, voidptr
, iter
->arg
);
1890 STACK_PUSHX(stack
, int, EXPAND_RECURSE
);
1891 /* If we are going to expand this node at EXPAND_AFTER_ITER
1892 then don't increase the `pos' fields of the nodes now, it
1893 will get done when expanding. */
1894 if (iter
->min
> 1 || iter
->max
> 1)
1905 case EXPAND_AFTER_ITER
:
1907 tre_iteration_t
*iter
= node
->obj
;
1909 pos_add
= tre_stack_pop_int(stack
);
1910 pos_add_last
= pos_add
;
1911 /* Originally (in tre_parse_bound), if min == 0 && max == 0, we
1912 immediate replace the whole iteration with EMPTY. This
1913 unfortunately drops any submatches, and messes up setting the
1914 pmatch values (we can get tags of -1, and tag values in the
1915 billions). So we left it there and replace with EMPTY here. */
1916 if (iter
->min
== 0 && iter
->max
== 0)
1918 tre_ast_node_t
*empty
= tre_ast_new_literal(mem
, EMPTY
, -1, -1);
1921 node
->obj
= empty
->obj
;
1922 node
->type
= empty
->type
;
1924 else if (iter
->min
> 1 || iter
->max
> 1)
1926 tre_ast_node_t
*seq1
= NULL
, *seq2
= NULL
;
1928 int pos_add_save
= pos_add
;
1930 /* Create a catenated sequence of copies of the node. */
1931 for (j
= 0; j
< iter
->min
; j
++)
1933 tre_ast_node_t
*copy
;
1934 /* Remove tags from all but the last copy. */
1935 int flags
= ((j
+ 1 < iter
->min
)
1937 : COPY_MAXIMIZE_FIRST_TAG
);
1938 DPRINT((" pos_add %d\n", pos_add
));
1939 pos_add_save
= pos_add
;
1940 status
= tre_copy_ast(mem
, stack
, iter
->arg
, flags
,
1941 &pos_add
, tag_directions
, ©
,
1943 if (status
!= REG_OK
)
1946 seq1
= tre_ast_new_catenation(mem
, seq1
, copy
);
1953 if (iter
->max
== -1)
1955 /* No upper limit. */
1956 pos_add_save
= pos_add
;
1957 status
= tre_copy_ast(mem
, stack
, iter
->arg
, 0,
1958 &pos_add
, NULL
, &seq2
, &max_pos
);
1959 if (status
!= REG_OK
)
1961 seq2
= tre_ast_new_iter(mem
, seq2
, 0, -1, 0);
1967 for (j
= iter
->min
; j
< iter
->max
; j
++)
1969 tre_ast_node_t
*tmp
, *copy
;
1970 pos_add_save
= pos_add
;
1971 status
= tre_copy_ast(mem
, stack
, iter
->arg
, 0,
1972 &pos_add
, NULL
, ©
, &max_pos
);
1973 if (status
!= REG_OK
)
1976 seq2
= tre_ast_new_catenation(mem
, copy
, seq2
);
1981 tmp
= tre_ast_new_literal(mem
, EMPTY
, -1, -1);
1984 seq2
= tre_ast_new_union(mem
, tmp
, seq2
);
1990 pos_add
= pos_add_save
;
1993 else if (seq2
!= NULL
)
1994 seq1
= tre_ast_new_catenation(mem
, seq1
, seq2
);
1997 node
->obj
= seq1
->obj
;
1998 node
->type
= seq1
->type
;
2002 pos_add_total
+= pos_add
- pos_add_last
;
2003 if (iter_depth
== 0)
2004 pos_add
= pos_add_total
;
2007 /* If approximate parameters are specified, surround the result
2008 with two parameter setting nodes. The one on the left sets
2009 the specified parameters, and the one on the right restores
2010 the old parameters. */
2013 tre_ast_node_t
*tmp_l
, *tmp_r
, *tmp_node
, *node_copy
;
2016 tmp_l
= tre_ast_new_literal(mem
, PARAMETER
, 0, -1);
2019 ((tre_literal_t
*)tmp_l
->obj
)->u
.params
= iter
->params
;
2020 iter
->params
[TRE_PARAM_DEPTH
] = params_depth
+ 1;
2021 tmp_r
= tre_ast_new_literal(mem
, PARAMETER
, 0, -1);
2024 old_params
= tre_mem_alloc(mem
, sizeof(*old_params
)
2028 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
2029 old_params
[i
] = params
[i
];
2030 ((tre_literal_t
*)tmp_r
->obj
)->u
.params
= old_params
;
2031 old_params
[TRE_PARAM_DEPTH
] = params_depth
;
2032 /* XXX - this is the only place where ast_new_node is
2033 needed -- should be moved inside AST module. */
2034 node_copy
= tre_ast_new_node(mem
, ITERATION
,
2035 sizeof(tre_iteration_t
));
2038 node_copy
->obj
= node
->obj
;
2039 tmp_node
= tre_ast_new_catenation(mem
, tmp_l
, node_copy
);
2042 tmp_node
= tre_ast_new_catenation(mem
, tmp_node
, tmp_r
);
2045 /* Replace the contents of `node' with `tmp_node'. */
2046 memcpy(node
, tmp_node
, sizeof(*node
));
2047 node
->obj
= tmp_node
->obj
;
2048 node
->type
= tmp_node
->type
;
2050 if (params_depth
> *max_depth
)
2051 *max_depth
= params_depth
;
2053 #endif /* TRE_APPROX */
2062 *position
+= pos_add_total
;
2064 /* `max_pos' should never be larger than `*position' if the above
2065 code works, but just an extra safeguard let's make sure
2066 `*position' is set large enough so enough memory will be
2067 allocated for the transition table. */
2068 if (max_pos
> *position
)
2069 *position
= max_pos
;
2072 DPRINT(("Expanded AST:\n"));
2074 DPRINT(("*position %d, max_pos %d\n", *position
, max_pos
));
2080 static tre_pos_and_tags_t
*
2081 tre_set_empty(tre_mem_t mem
)
2083 tre_pos_and_tags_t
*new_set
;
2085 new_set
= tre_mem_calloc(mem
, sizeof(*new_set
));
2086 if (new_set
== NULL
)
2089 new_set
[0].position
= -1;
2090 new_set
[0].code_min
= -1;
2091 new_set
[0].code_max
= -1;
2096 static tre_pos_and_tags_t
*
2097 tre_set_one(tre_mem_t mem
, int position
, int code_min
, int code_max
,
2098 tre_bracket_match_list_t
*bracket_match_list
, int backref
)
2100 tre_pos_and_tags_t
*new_set
;
2102 new_set
= tre_mem_calloc(mem
, sizeof(*new_set
) * 2);
2103 if (new_set
== NULL
)
2106 new_set
[0].position
= position
;
2107 new_set
[0].code_min
= code_min
;
2108 new_set
[0].code_max
= code_max
;
2109 new_set
[0].bracket_match_list
= bracket_match_list
;
2110 new_set
[0].backref
= backref
;
2111 new_set
[1].position
= -1;
2112 new_set
[1].code_min
= -1;
2113 new_set
[1].code_max
= -1;
2118 static tre_pos_and_tags_t
*
2119 tre_set_union(tre_mem_t mem
, tre_pos_and_tags_t
*set1
, tre_pos_and_tags_t
*set2
,
2120 int *tags
, int assertions
, int *params
)
2123 tre_pos_and_tags_t
*new_set
;
2127 for (num_tags
= 0; tags
!= NULL
&& tags
[num_tags
] >= 0; num_tags
++);
2128 for (s1
= 0; set1
[s1
].position
>= 0; s1
++);
2129 for (s2
= 0; set2
[s2
].position
>= 0; s2
++);
2130 new_set
= tre_mem_calloc(mem
, sizeof(*new_set
) * (s1
+ s2
+ 1));
2134 for (s1
= 0; set1
[s1
].position
>= 0; s1
++)
2136 new_set
[s1
].position
= set1
[s1
].position
;
2137 new_set
[s1
].code_min
= set1
[s1
].code_min
;
2138 new_set
[s1
].code_max
= set1
[s1
].code_max
;
2139 new_set
[s1
].assertions
= set1
[s1
].assertions
| assertions
;
2140 new_set
[s1
].bracket_match_list
= set1
[s1
].bracket_match_list
;
2141 new_set
[s1
].backref
= set1
[s1
].backref
;
2142 if (set1
[s1
].tags
== NULL
&& tags
== NULL
)
2143 new_set
[s1
].tags
= NULL
;
2146 for (i
= 0; set1
[s1
].tags
!= NULL
&& set1
[s1
].tags
[i
] >= 0; i
++);
2147 new_tags
= tre_mem_alloc(mem
, (sizeof(*new_tags
)
2148 * (i
+ num_tags
+ 1)));
2149 if (new_tags
== NULL
)
2151 for (j
= 0; j
< i
; j
++)
2152 new_tags
[j
] = set1
[s1
].tags
[j
];
2153 for (i
= 0; i
< num_tags
; i
++)
2154 new_tags
[j
+ i
] = tags
[i
];
2155 new_tags
[j
+ i
] = -1;
2156 new_set
[s1
].tags
= new_tags
;
2158 if (set1
[s1
].params
)
2159 new_set
[s1
].params
= set1
[s1
].params
;
2162 if (!new_set
[s1
].params
)
2163 new_set
[s1
].params
= params
;
2166 new_set
[s1
].params
= tre_mem_alloc(mem
, sizeof(*params
) *
2168 if (!new_set
[s1
].params
)
2170 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
2171 if (params
[i
] != TRE_PARAM_UNSET
)
2172 new_set
[s1
].params
[i
] = params
[i
];
2177 for (s2
= 0; set2
[s2
].position
>= 0; s2
++)
2179 new_set
[s1
+ s2
].position
= set2
[s2
].position
;
2180 new_set
[s1
+ s2
].code_min
= set2
[s2
].code_min
;
2181 new_set
[s1
+ s2
].code_max
= set2
[s2
].code_max
;
2182 /* XXX - why not | assertions here as well? */
2183 new_set
[s1
+ s2
].assertions
= set2
[s2
].assertions
;
2184 new_set
[s1
+ s2
].bracket_match_list
= set2
[s2
].bracket_match_list
;
2185 new_set
[s1
+ s2
].backref
= set2
[s2
].backref
;
2186 if (set2
[s2
].tags
== NULL
)
2187 new_set
[s1
+ s2
].tags
= NULL
;
2190 for (i
= 0; set2
[s2
].tags
[i
] >= 0; i
++);
2191 new_tags
= tre_mem_alloc(mem
, sizeof(*new_tags
) * (i
+ 1));
2192 if (new_tags
== NULL
)
2194 for (j
= 0; j
< i
; j
++)
2195 new_tags
[j
] = set2
[s2
].tags
[j
];
2197 new_set
[s1
+ s2
].tags
= new_tags
;
2199 if (set2
[s2
].params
)
2200 new_set
[s1
+ s2
].params
= set2
[s2
].params
;
2203 if (!new_set
[s1
+ s2
].params
)
2204 new_set
[s1
+ s2
].params
= params
;
2207 new_set
[s1
+ s2
].params
= tre_mem_alloc(mem
, sizeof(*params
) *
2209 if (!new_set
[s1
+ s2
].params
)
2211 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
2212 if (params
[i
] != TRE_PARAM_UNSET
)
2213 new_set
[s1
+ s2
].params
[i
] = params
[i
];
2217 new_set
[s1
+ s2
].position
= -1;
2221 /* Finds the empty path through `node' which is the one that should be
2222 taken according to POSIX.2 rules, and adds the tags on that path to
2223 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2224 set to the number of tags seen on the path. */
2225 static reg_errcode_t
2226 tre_match_empty(tre_stack_t
*stack
, tre_ast_node_t
*node
, int *tags
,
2227 int *assertions
, int *params
, int *num_tags_seen
,
2232 tre_catenation_t
*cat
;
2233 tre_iteration_t
*iter
;
2235 int bottom
= tre_stack_num_objects(stack
);
2236 reg_errcode_t status
= REG_OK
;
2242 status
= tre_stack_push_voidptr(stack
, node
);
2244 /* Walk through the tree recursively. */
2245 while (status
== REG_OK
&& tre_stack_num_objects(stack
) > bottom
)
2247 node
= tre_stack_pop_voidptr(stack
);
2252 lit
= (tre_literal_t
*)node
->obj
;
2253 switch (lit
->code_min
)
2256 if (lit
->code_max
>= 0)
2260 /* Add the tag to `tags'. */
2261 for (i
= 0; tags
[i
] >= 0; i
++)
2262 if (tags
[i
] == lit
->code_max
)
2266 tags
[i
] = lit
->code_max
;
2275 assert(lit
->code_max
>= 1
2276 || lit
->code_max
<= ASSERT_LAST
);
2277 if (assertions
!= NULL
)
2278 *assertions
|= lit
->code_max
;
2282 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
2283 params
[i
] = lit
->u
.params
[i
];
2284 if (params_seen
!= NULL
)
2296 /* Subexpressions starting earlier take priority over ones
2297 starting later, so we prefer the left subexpression over the
2298 right subexpression. */
2299 uni
= (tre_union_t
*)node
->obj
;
2300 if (uni
->left
->nullable
)
2301 STACK_PUSHX(stack
, voidptr
, uni
->left
)
2302 else if (uni
->right
->nullable
)
2303 STACK_PUSHX(stack
, voidptr
, uni
->right
)
2309 /* The path must go through both children. */
2310 cat
= (tre_catenation_t
*)node
->obj
;
2311 assert(cat
->left
->nullable
);
2312 assert(cat
->right
->nullable
);
2313 STACK_PUSHX(stack
, voidptr
, cat
->left
);
2314 STACK_PUSHX(stack
, voidptr
, cat
->right
);
2318 /* A match with an empty string is preferred over no match at
2319 all, so we go through the argument if possible. */
2320 iter
= (tre_iteration_t
*)node
->obj
;
2321 if (iter
->arg
->nullable
)
2322 STACK_PUSHX(stack
, voidptr
, iter
->arg
);
2338 NFL_POST_CATENATION
,
2340 } tre_nfl_stack_symbol_t
;
2343 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2344 the nodes of the AST `tree'. */
2345 static reg_errcode_t
2346 tre_compute_nfl(tre_mem_t mem
, tre_stack_t
*stack
, tre_ast_node_t
*tree
)
2348 int bottom
= tre_stack_num_objects(stack
);
2350 STACK_PUSHR(stack
, voidptr
, tree
);
2351 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2353 while (tre_stack_num_objects(stack
) > bottom
)
2355 tre_nfl_stack_symbol_t symbol
;
2356 tre_ast_node_t
*node
;
2358 symbol
= (tre_nfl_stack_symbol_t
)tre_stack_pop_int(stack
);
2359 node
= tre_stack_pop_voidptr(stack
);
2367 tre_literal_t
*lit
= (tre_literal_t
*)node
->obj
;
2368 if (IS_BACKREF(lit
))
2370 /* Back references: nullable = false, firstpos = {i},
2373 node
->firstpos
= tre_set_one(mem
, lit
->position
, 0,
2374 TRE_CHAR_MAX
, NULL
, -1);
2375 if (!node
->firstpos
)
2377 node
->lastpos
= tre_set_one(mem
, lit
->position
, 0,
2379 (int)lit
->code_max
);
2383 else if (lit
->code_min
< 0)
2385 /* Tags, empty strings, params, and zero width assertions:
2386 nullable = true, firstpos = {}, and lastpos = {}. */
2388 node
->firstpos
= tre_set_empty(mem
);
2389 if (!node
->firstpos
)
2391 node
->lastpos
= tre_set_empty(mem
);
2397 /* Literal at position i: nullable = false, firstpos = {i},
2401 tre_set_one(mem
, lit
->position
, (int)lit
->code_min
,
2402 (int)lit
->code_max
, NULL
, -1);
2403 if (!node
->firstpos
)
2405 node
->lastpos
= tre_set_one(mem
, lit
->position
,
2408 lit
->u
.bracket_match_list
,
2417 /* Compute the attributes for the two subtrees, and after that
2419 STACK_PUSHR(stack
, voidptr
, node
);
2420 STACK_PUSHR(stack
, int, NFL_POST_UNION
);
2421 STACK_PUSHR(stack
, voidptr
, ((tre_union_t
*)node
->obj
)->right
);
2422 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2423 STACK_PUSHR(stack
, voidptr
, ((tre_union_t
*)node
->obj
)->left
);
2424 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2428 /* Compute the attributes for the two subtrees, and after that
2430 STACK_PUSHR(stack
, voidptr
, node
);
2431 STACK_PUSHR(stack
, int, NFL_POST_CATENATION
);
2432 STACK_PUSHR(stack
, voidptr
, ((tre_catenation_t
*)node
->obj
)->right
);
2433 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2434 STACK_PUSHR(stack
, voidptr
, ((tre_catenation_t
*)node
->obj
)->left
);
2435 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2439 /* Compute the attributes for the subtree, and after that for
2441 STACK_PUSHR(stack
, voidptr
, node
);
2442 STACK_PUSHR(stack
, int, NFL_POST_ITERATION
);
2443 STACK_PUSHR(stack
, voidptr
, ((tre_iteration_t
*)node
->obj
)->arg
);
2444 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2447 break; /* end case: NFL_RECURSE */
2449 case NFL_POST_UNION
:
2451 tre_union_t
*uni
= (tre_union_t
*)node
->obj
;
2452 node
->nullable
= uni
->left
->nullable
|| uni
->right
->nullable
;
2453 node
->firstpos
= tre_set_union(mem
, uni
->left
->firstpos
,
2454 uni
->right
->firstpos
, NULL
, 0, NULL
);
2455 if (!node
->firstpos
)
2457 node
->lastpos
= tre_set_union(mem
, uni
->left
->lastpos
,
2458 uni
->right
->lastpos
, NULL
, 0, NULL
);
2464 case NFL_POST_ITERATION
:
2466 int num_tags
, *tags
, assertions
, params_seen
;
2468 reg_errcode_t status
;
2469 tre_iteration_t
*iter
= (tre_iteration_t
*)node
->obj
;
2471 /* From Ville Laurikari's original 2001 Master's thesis, the
2472 firstpos(n) and lastpos(n) of an iteration is just the
2473 corresponding values of the iteration's argument. Unfortunately,
2474 this isn't sufficient for the following BRE:
2476 \(a*\)*b\(\1\) matched against ab
2478 The backreference wants to force the first subexpression to
2479 be the empty string, but there is no transition for this. So
2480 we need to modify the lastpos(n) of an iteration to be the
2481 equivalent of that of catentation. Using the same notation as
2482 in the thesis, lastpos(n) is redefined as:
2484 if nullable(c1) then
2486 addtags(lastpos(c1),
2491 where c1 is the argument node. firstpos(n) remains the same. */
2493 /* Compute lastpos. */
2494 if (iter
->min
== 0 || iter
->arg
->nullable
)
2497 if (iter
->arg
->nullable
)
2499 /* The arg matches the empty string. Make a first pass
2500 with tre_match_empty() to get the number of tags and
2502 status
= tre_match_empty(stack
, iter
->arg
,
2503 NULL
, NULL
, NULL
, &num_tags
,
2505 if (status
!= REG_OK
)
2507 /* Allocate arrays for the tags and parameters. */
2508 tags
= xmalloc(sizeof(int) * (num_tags
+ 1));
2516 params
= tre_mem_alloc(mem
, sizeof(*params
)
2524 /* Second pass with tre_mach_empty() to get the list of
2525 tags and parameters. */
2526 status
= tre_match_empty(stack
, iter
->arg
, tags
,
2527 &assertions
, params
, NULL
, NULL
);
2528 if (status
!= REG_OK
)
2534 tre_set_union(mem
, iter
->arg
->lastpos
, iter
->arg
->lastpos
,
2535 tags
, assertions
, params
);
2541 node
->lastpos
= iter
->arg
->lastpos
;
2546 node
->lastpos
= iter
->arg
->lastpos
;
2548 node
->firstpos
= iter
->arg
->firstpos
;
2552 case NFL_POST_CATENATION
:
2554 int num_tags
, *tags
, assertions
, params_seen
;
2556 reg_errcode_t status
;
2557 tre_catenation_t
*cat
= node
->obj
;
2558 node
->nullable
= cat
->left
->nullable
&& cat
->right
->nullable
;
2560 /* Compute firstpos. */
2561 if (cat
->left
->nullable
)
2563 /* The left side matches the empty string. Make a first pass
2564 with tre_match_empty() to get the number of tags and
2566 status
= tre_match_empty(stack
, cat
->left
,
2567 NULL
, NULL
, NULL
, &num_tags
,
2569 if (status
!= REG_OK
)
2571 /* Allocate arrays for the tags and parameters. */
2572 tags
= xmalloc(sizeof(*tags
) * (num_tags
+ 1));
2580 params
= tre_mem_alloc(mem
, sizeof(*params
)
2588 /* Second pass with tre_mach_empty() to get the list of
2589 tags and parameters. */
2590 status
= tre_match_empty(stack
, cat
->left
, tags
,
2591 &assertions
, params
, NULL
, NULL
);
2592 if (status
!= REG_OK
)
2598 tre_set_union(mem
, cat
->right
->firstpos
, cat
->left
->firstpos
,
2599 tags
, assertions
, params
);
2601 if (!node
->firstpos
)
2606 node
->firstpos
= cat
->left
->firstpos
;
2609 /* Compute lastpos. */
2610 if (cat
->right
->nullable
)
2612 /* The right side matches the empty string. Make a first pass
2613 with tre_match_empty() to get the number of tags and
2615 status
= tre_match_empty(stack
, cat
->right
,
2616 NULL
, NULL
, NULL
, &num_tags
,
2618 if (status
!= REG_OK
)
2620 /* Allocate arrays for the tags and parameters. */
2621 tags
= xmalloc(sizeof(int) * (num_tags
+ 1));
2629 params
= tre_mem_alloc(mem
, sizeof(*params
)
2637 /* Second pass with tre_mach_empty() to get the list of
2638 tags and parameters. */
2639 status
= tre_match_empty(stack
, cat
->right
, tags
,
2640 &assertions
, params
, NULL
, NULL
);
2641 if (status
!= REG_OK
)
2647 tre_set_union(mem
, cat
->left
->lastpos
, cat
->right
->lastpos
,
2648 tags
, assertions
, params
);
2655 node
->lastpos
= cat
->right
->lastpos
;
2670 /* Adds a transition from each position in `p1' to each position in `p2'. */
2671 static reg_errcode_t
2672 tre_make_trans(tre_pos_and_tags_t
*p1
, tre_pos_and_tags_t
*p2
,
2673 tre_tnfa_transition_t
*transitions
,
2674 int *counts
, int *offs
)
2676 tre_pos_and_tags_t
*orig_p2
= p2
;
2677 tre_tnfa_transition_t
*trans
;
2678 int i
, j
, k
, l
, dup
, prev_p2_pos
;
2680 if (transitions
!= NULL
)
2681 while (p1
->position
>= 0)
2685 while (p2
->position
>= 0)
2687 /* Optimization: if this position was already handled, skip it. */
2688 if (p2
->position
== prev_p2_pos
)
2693 prev_p2_pos
= p2
->position
;
2694 /* Set `trans' to point to the next unused transition from
2695 position `p1->position'. */
2696 trans
= transitions
+ offs
[p1
->position
];
2697 while (trans
->state
!= NULL
)
2700 /* If we find a previous transition from `p1->position' to
2701 `p2->position', it is overwritten. This can happen only
2702 if there are nested loops in the regexp, like in "((a)*)*".
2703 In POSIX.2 repetition using the outer loop is always
2704 preferred over using the inner loop. Therefore the
2705 transition for the inner loop is useless and can be thrown
2707 /* XXX - The same position is used for all nodes in a bracket
2708 expression, so this optimization cannot be used (it will
2709 break bracket expressions) unless I figure out a way to
2711 if (trans
->state_id
== p2
->position
)
2720 if (trans
->state
== NULL
)
2721 (trans
+ 1)->state
= NULL
;
2722 /* Use the character ranges, assertions, etc. from `p1' for
2723 the transition from `p1' to `p2'. */
2724 trans
->code_min
= p1
->code_min
;
2725 trans
->code_max
= p1
->code_max
;
2726 trans
->state
= transitions
+ offs
[p2
->position
];
2727 trans
->state_id
= p2
->position
;
2728 trans
->assertions
= p1
->assertions
| p2
->assertions
2729 | (p1
->bracket_match_list
!= NULL
? ASSERT_BRACKET_MATCH
: 0);
2730 if (p1
->backref
>= 0)
2732 assert((trans
->assertions
& ASSERT_BRACKET_MATCH
) == 0);
2733 assert(p2
->backref
< 0);
2734 trans
->u
.backref
= p1
->backref
;
2735 trans
->assertions
|= ASSERT_BACKREF
;
2737 if (p1
->bracket_match_list
!= NULL
)
2739 trans
->u
.bracket_match_list
=
2740 xmalloc(SIZEOF_BRACKET_MATCH_LIST(p1
->bracket_match_list
));
2741 if (trans
->u
.bracket_match_list
== NULL
)
2743 memcpy(trans
->u
.bracket_match_list
, p1
->bracket_match_list
,
2744 SIZEOF_BRACKET_MATCH_LIST(p1
->bracket_match_list
));
2747 /* Find out how many tags this transition has. */
2749 if (p1
->tags
!= NULL
)
2750 while(p1
->tags
[i
] >= 0)
2753 if (p2
->tags
!= NULL
)
2754 while(p2
->tags
[j
] >= 0)
2757 /* If we are overwriting a transition, free the old tag array. */
2758 if (trans
->tags
!= NULL
)
2762 /* If there were any tags, allocate an array and fill it. */
2765 trans
->tags
= xmalloc(sizeof(*trans
->tags
) * (i
+ j
+ 1));
2769 if (p1
->tags
!= NULL
)
2770 while(p1
->tags
[i
] >= 0)
2772 trans
->tags
[i
] = p1
->tags
[i
];
2777 if (p2
->tags
!= NULL
)
2778 while (p2
->tags
[j
] >= 0)
2780 /* Don't add duplicates. */
2782 for (k
= 0; k
< i
; k
++)
2783 if (trans
->tags
[k
] == p2
->tags
[j
])
2789 trans
->tags
[l
++] = p2
->tags
[j
];
2792 trans
->tags
[l
] = -1;
2795 /* Set the parameter array. If both `p2' and `p1' have same
2796 parameters, the values in `p2' override those in `p1'. */
2797 if (p1
->params
|| p2
->params
)
2800 trans
->params
= xmalloc(sizeof(*trans
->params
)
2804 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
2806 trans
->params
[i
] = TRE_PARAM_UNSET
;
2807 if (p1
->params
&& p1
->params
[i
] != TRE_PARAM_UNSET
)
2808 trans
->params
[i
] = p1
->params
[i
];
2809 if (p2
->params
&& p2
->params
[i
] != TRE_PARAM_UNSET
)
2810 trans
->params
[i
] = p2
->params
[i
];
2816 xfree(trans
->params
);
2817 trans
->params
= NULL
;
2825 DPRINT((" %2d -> %2d on %3d", p1
->position
, p2
->position
,
2827 if (p1
->code_max
!= p1
->code_min
)
2828 DPRINT(("-%3d", p1
->code_max
));
2832 DPRINT((", tags ["));
2835 DPRINT(("%d", *tags
));
2842 if (trans
->assertions
)
2843 DPRINT((", assert %d", trans
->assertions
));
2844 if (trans
->assertions
& ASSERT_BACKREF
)
2845 DPRINT((", backref %d", trans
->u
.backref
));
2846 else if (trans
->assertions
& ASSERT_BRACKET_MATCH
)
2847 DPRINT((", bracket_match_list %p",
2848 trans
->u
.bracket_match_list
));
2852 tre_print_params(trans
->params
);
2856 #endif /* TRE_DEBUG */
2862 /* Compute a maximum limit for the number of transitions leaving
2864 while (p1
->position
>= 0)
2867 while (p2
->position
>= 0)
2869 counts
[p1
->position
]++;
2877 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2878 labelled with one character range (there are no transitions on empty
2879 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2881 static reg_errcode_t
2882 tre_ast_to_tnfa(tre_ast_node_t
*node
, tre_tnfa_transition_t
*transitions
,
2883 int *counts
, int *offs
)
2886 tre_catenation_t
*cat
;
2887 tre_iteration_t
*iter
;
2888 reg_errcode_t errcode
= REG_OK
;
2890 /* XXX - recurse using a stack!. */
2896 uni
= (tre_union_t
*)node
->obj
;
2897 errcode
= tre_ast_to_tnfa(uni
->left
, transitions
, counts
, offs
);
2898 if (errcode
!= REG_OK
)
2900 errcode
= tre_ast_to_tnfa(uni
->right
, transitions
, counts
, offs
);
2904 cat
= (tre_catenation_t
*)node
->obj
;
2905 /* Add a transition from each position in cat->left->lastpos
2906 to each position in cat->right->firstpos. */
2907 errcode
= tre_make_trans(cat
->left
->lastpos
, cat
->right
->firstpos
,
2908 transitions
, counts
, offs
);
2909 if (errcode
!= REG_OK
)
2911 errcode
= tre_ast_to_tnfa(cat
->left
, transitions
, counts
, offs
);
2912 if (errcode
!= REG_OK
)
2914 errcode
= tre_ast_to_tnfa(cat
->right
, transitions
, counts
, offs
);
2918 iter
= (tre_iteration_t
*)node
->obj
;
2919 assert(iter
->max
== -1 || iter
->max
== 1);
2921 if (iter
->max
== -1)
2923 assert(iter
->min
== 0 || iter
->min
== 1);
2924 /* Add a transition from each last position in the iterated
2925 expression to each first position. */
2926 errcode
= tre_make_trans(iter
->arg
->lastpos
, iter
->arg
->firstpos
,
2927 transitions
, counts
, offs
);
2928 if (errcode
!= REG_OK
)
2931 errcode
= tre_ast_to_tnfa(iter
->arg
, transitions
, counts
, offs
);
2938 #define ERROR_EXIT(err) \
2942 if (/*CONSTCOND*/1) \
2945 while (/*CONSTCOND*/0)
2949 tre_compile(regex_t
*preg
, const tre_char_t
*regex
, size_t n
, int cflags
,
2953 tre_ast_node_t
*tree
, *tmp_ast_l
, *tmp_ast_r
;
2954 tre_pos_and_tags_t
*p
;
2955 int *counts
= NULL
, *offs
= NULL
;
2957 tre_tnfa_transition_t
*transitions
, *initial
;
2958 tre_tnfa_t
*tnfa
= NULL
;
2959 tre_submatch_data_t
*submatch_data
= NULL
;
2960 tre_tag_direction_t
*tag_directions
= NULL
;
2961 reg_errcode_t errcode
;
2964 /* Parse context. */
2965 tre_parse_ctx_t parse_ctx
;
2967 /* Allocate a stack used throughout the compilation process for various
2969 stack
= tre_stack_new(512, 10240, 128);
2972 /* Allocate a fast memory allocator. */
2973 mem
= tre_mem_new();
2976 tre_stack_destroy(stack
);
2980 /* Parse the regexp. */
2981 memset(&parse_ctx
, 0, sizeof(parse_ctx
));
2982 parse_ctx
.mem
= mem
;
2983 parse_ctx
.stack
= stack
;
2984 parse_ctx
.re
= regex
;
2986 /* Only allow REG_UNGREEDY to be set if both REG_ENHANCED and REG_EXTENDED
2988 if ((cflags
& (REG_ENHANCED
| REG_EXTENDED
)) != (REG_ENHANCED
| REG_EXTENDED
))
2989 cflags
&= ~REG_UNGREEDY
;
2990 parse_ctx
.cflags
= cflags
;
2991 parse_ctx
.max_backref
= -1;
2992 parse_ctx
.loc
= loc
;
2993 parse_ctx
.submatch_id_invisible
= SUBMATCH_ID_INVISIBLE_START
;
2995 DPRINT(("tre_compile: parsing '%.*" STRF
"'\n", (int)n
, regex
));
2996 errcode
= tre_parse(&parse_ctx
);
2997 if (errcode
!= REG_OK
)
2998 ERROR_EXIT(errcode
);
2999 preg
->re_nsub
= parse_ctx
.submatch_id
- 1;
3000 tree
= parse_ctx
.result
;
3002 /* Back references and approximate matching cannot currently be used
3003 in the same regexp. */
3004 if (parse_ctx
.max_backref
>= 0 && parse_ctx
.have_approx
)
3005 ERROR_EXIT(REG_BADPAT
);
3008 tre_ast_print(tree
);
3009 #endif /* TRE_DEBUG */
3011 /* Referring to nonexistent subexpressions is illegal. */
3012 if (parse_ctx
.max_backref
> (int)preg
->re_nsub
)
3013 ERROR_EXIT(REG_ESUBREG
);
3015 /* Allocate the TNFA struct. */
3016 tnfa
= xcalloc(1, sizeof(tre_tnfa_t
));
3018 ERROR_EXIT(REG_ESPACE
);
3019 tnfa
->have_backrefs
= parse_ctx
.max_backref
>= 0;
3020 tnfa
->have_approx
= parse_ctx
.have_approx
;
3021 tnfa
->num_submatches
= parse_ctx
.submatch_id
;
3022 tnfa
->num_submatches_invisible
= parse_ctx
.submatch_id_invisible
3023 - SUBMATCH_ID_INVISIBLE_START
;
3024 tnfa
->num_reorder_tags
= parse_ctx
.num_reorder_tags
;
3025 tnfa
->loc
= parse_ctx
.loc
;
3027 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
3028 regexp does not have back references, this can be skipped. */
3029 if (tnfa
->num_reorder_tags
> 0 || !(cflags
& REG_NOSUB
))
3031 DPRINT(("tre_compile: setting up tags\n"));
3033 /* Figure out how many tags we will need. */
3034 errcode
= tre_add_tags(NULL
, stack
, tree
, tnfa
);
3035 if (errcode
!= REG_OK
)
3036 ERROR_EXIT(errcode
);
3038 tre_ast_print(tree
);
3039 #endif /* TRE_DEBUG */
3041 if (tnfa
->num_tags
> 0)
3043 tag_directions
= xmalloc(sizeof(*tag_directions
)
3044 * (tnfa
->num_tags
+ 1));
3045 if (tag_directions
== NULL
)
3046 ERROR_EXIT(REG_ESPACE
);
3047 tnfa
->tag_directions
= tag_directions
;
3048 memset(tag_directions
, -1,
3049 sizeof(*tag_directions
) * (tnfa
->num_tags
+ 1));
3051 tnfa
->minimal_tags
= xcalloc((unsigned)tnfa
->num_tags
* 2 + 3,
3052 sizeof(tnfa
->minimal_tags
));
3053 if (tnfa
->minimal_tags
== NULL
)
3054 ERROR_EXIT(REG_ESPACE
);
3056 submatch_data
= xcalloc((unsigned)parse_ctx
.submatch_id
,
3057 sizeof(*submatch_data
));
3058 if (submatch_data
== NULL
)
3059 ERROR_EXIT(REG_ESPACE
);
3060 /* Set the eo_tag value to -1 to indicate that that corresponding
3061 * submatch has not be completed yet */
3062 for (i
= 0; i
< parse_ctx
.submatch_id
; i
++)
3064 submatch_data
[i
].eo_tag
= -1;
3066 tnfa
->submatch_data
= submatch_data
;
3068 errcode
= tre_add_tags(mem
, stack
, tree
, tnfa
);
3069 if (errcode
!= REG_OK
)
3070 ERROR_EXIT(errcode
);
3073 for (i
= 0; i
< parse_ctx
.submatch_id
; i
++)
3074 DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3075 i
, submatch_data
[i
].so_tag
, submatch_data
[i
].eo_tag
));
3076 for (i
= 0; i
<= tnfa
->num_tags
; i
++)
3077 DPRINT(("t%d is %s\n", i
, tag_dir_str
[tag_directions
[i
]]));
3078 #endif /* TRE_DEBUG */
3081 /* Expand iteration nodes. */
3082 errcode
= tre_expand_ast(mem
, stack
, tree
, &parse_ctx
.position
,
3083 tag_directions
, &tnfa
->params_depth
);
3084 if (errcode
!= REG_OK
)
3085 ERROR_EXIT(errcode
);
3087 /* Add a dummy node for the final state.
3088 XXX - For certain patterns this dummy node can be optimized away,
3089 for example "a*" or "ab*". Figure out a simple way to detect
3090 this possibility. */
3092 tmp_ast_r
= tre_ast_new_literal(mem
, 0, 0, parse_ctx
.position
++);
3093 if (tmp_ast_r
== NULL
)
3094 ERROR_EXIT(REG_ESPACE
);
3096 tree
= tre_ast_new_catenation(mem
, tmp_ast_l
, tmp_ast_r
);
3098 ERROR_EXIT(REG_ESPACE
);
3101 tre_ast_print(tree
);
3102 DPRINT(("Number of states: %d\n", parse_ctx
.position
));
3104 for (i
= 0; i
< parse_ctx
.submatch_id
; i
++)
3105 DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3106 i
, submatch_data
[i
].so_tag
, submatch_data
[i
].eo_tag
));
3108 for (i
= 0; i
<= tnfa
->num_tags
; i
++)
3109 DPRINT(("t%d is %s\n", i
, tag_dir_str
[tag_directions
[i
]]));
3110 #endif /* TRE_DEBUG */
3112 errcode
= tre_compute_nfl(mem
, stack
, tree
);
3113 if (errcode
!= REG_OK
)
3114 ERROR_EXIT(errcode
);
3116 counts
= xmalloc(sizeof(int) * parse_ctx
.position
);
3118 ERROR_EXIT(REG_ESPACE
);
3120 offs
= xmalloc(sizeof(int) * parse_ctx
.position
);
3122 ERROR_EXIT(REG_ESPACE
);
3124 for (i
= 0; i
< parse_ctx
.position
; i
++)
3126 tre_ast_to_tnfa(tree
, NULL
, counts
, NULL
);
3129 for (i
= 0; i
< parse_ctx
.position
; i
++)
3132 add
+= counts
[i
] + 1;
3135 transitions
= xcalloc((unsigned)add
+ 1, sizeof(*transitions
));
3136 if (transitions
== NULL
)
3137 ERROR_EXIT(REG_ESPACE
);
3138 tnfa
->transitions
= transitions
;
3139 tnfa
->num_transitions
= add
;
3141 DPRINT(("Converting to TNFA:\n"));
3142 errcode
= tre_ast_to_tnfa(tree
, transitions
, counts
, offs
);
3143 if (errcode
!= REG_OK
)
3144 ERROR_EXIT(errcode
);
3147 /* Set first_char only if there is only one character that can be the
3148 first character of a match */
3149 tnfa
->first_char
= -1;
3150 if (!tmp_ast_l
->nullable
)
3153 for (p
= tree
->firstpos
; scanning
&& p
->position
>= 0; p
++)
3155 tre_tnfa_transition_t
*j
= transitions
+ offs
[p
->position
];
3156 while (j
->state
!= NULL
)
3158 if (j
->code_min
<= j
->code_max
)
3160 if (j
->code_max
!= j
->code_min
|| j
->code_min
== -1 || tnfa
->first_char
!= -1)
3162 tnfa
->first_char
= -1;
3166 tnfa
->first_char
= j
->code_min
;
3172 if (tnfa
->first_char
>= 0)
3173 DPRINT(("first char must be %d\n", tnfa
->first_char
));
3174 #endif /* TRE_DEBUG */
3179 while (p
->position
>= 0)
3186 DPRINT(("initial: %d", p
->position
));
3194 DPRINT(("%d", *tags
));
3200 DPRINT((", assert %d", p
->assertions
));
3204 tre_print_params(p
->params
);
3208 #endif /* TRE_DEBUG */
3213 initial
= xcalloc((unsigned)i
+ 1, sizeof(tre_tnfa_transition_t
));
3214 if (initial
== NULL
)
3215 ERROR_EXIT(REG_ESPACE
);
3216 tnfa
->initial
= initial
;
3219 for (p
= tree
->firstpos
; p
->position
>= 0; p
++)
3221 initial
[i
].state
= transitions
+ offs
[p
->position
];
3222 initial
[i
].state_id
= p
->position
;
3223 initial
[i
].tags
= NULL
;
3224 /* Copy the arrays p->tags, and p->params, they are allocated
3225 from a tre_mem object. */
3229 for (j
= 0; p
->tags
[j
] >= 0; j
++);
3230 initial
[i
].tags
= xmalloc(sizeof(*p
->tags
) * (j
+ 1));
3231 if (!initial
[i
].tags
)
3232 ERROR_EXIT(REG_ESPACE
);
3233 memcpy(initial
[i
].tags
, p
->tags
, sizeof(*p
->tags
) * (j
+ 1));
3235 initial
[i
].params
= NULL
;
3238 initial
[i
].params
= xmalloc(sizeof(*p
->params
) * TRE_PARAM_LAST
);
3239 if (!initial
[i
].params
)
3240 ERROR_EXIT(REG_ESPACE
);
3241 memcpy(initial
[i
].params
, p
->params
,
3242 sizeof(*p
->params
) * TRE_PARAM_LAST
);
3244 initial
[i
].assertions
= p
->assertions
;
3247 initial
[i
].state
= NULL
;
3249 tnfa
->num_transitions
= add
;
3250 tnfa
->final
= transitions
+ offs
[tree
->lastpos
[0].position
];
3251 tnfa
->num_states
= parse_ctx
.position
;
3252 tnfa
->cflags
= cflags
;
3254 DPRINT(("final state %d (%p)\n", tree
->lastpos
[0].position
,
3255 (void *)tnfa
->final
));
3257 tre_mem_destroy(mem
);
3258 tre_stack_destroy(stack
);
3262 #ifdef TRE_USE_SYSTEM_REGEX_H
3263 preg
->re_magic
= RE_MAGIC
;
3264 #endif /* TRE_USE_SYSTEM_REGEX_H */
3265 preg
->TRE_REGEX_T_FIELD
= (void *)tnfa
;
3266 xlocale_retain(tnfa
->loc
);
3270 /* Free everything that was allocated and return the error code. */
3271 tre_mem_destroy(mem
);
3273 tre_stack_destroy(stack
);
3279 /* Set tnfa into preg, so that calling tre_free() will free the contents
3280 of tnfa. NULL out the loc field since we never retained the locale. */
3281 preg
->TRE_REGEX_T_FIELD
= (void *)tnfa
;
3282 if(tnfa
) tnfa
->loc
= NULL
;
3292 tre_free(regex_t
*preg
)
3296 tre_tnfa_transition_t
*trans
;
3298 #ifdef TRE_USE_SYSTEM_REGEX_H
3300 #endif /* TRE_USE_SYSTEM_REGEX_H */
3301 tnfa
= (void *)preg
->TRE_REGEX_T_FIELD
;
3304 preg
->TRE_REGEX_T_FIELD
= NULL
;
3306 for (i
= 0; i
< tnfa
->num_transitions
; i
++)
3307 if (tnfa
->transitions
[i
].state
)
3309 if (tnfa
->transitions
[i
].tags
)
3310 xfree(tnfa
->transitions
[i
].tags
);
3311 if (tnfa
->transitions
[i
].assertions
& ASSERT_BRACKET_MATCH
)
3312 xfree(tnfa
->transitions
[i
].u
.bracket_match_list
);
3313 if (tnfa
->transitions
[i
].params
)
3314 xfree(tnfa
->transitions
[i
].params
);
3316 if (tnfa
->transitions
)
3317 xfree(tnfa
->transitions
);
3321 for (trans
= tnfa
->initial
; trans
->state
; trans
++)
3326 xfree(trans
->params
);
3328 xfree(tnfa
->initial
);
3331 if (tnfa
->submatch_data
)
3333 xfree(tnfa
->submatch_data
);
3336 if (tnfa
->tag_directions
)
3337 xfree(tnfa
->tag_directions
);
3338 if (tnfa
->minimal_tags
)
3339 xfree(tnfa
->minimal_tags
);
3342 xlocale_release(tnfa
->loc
);
3344 if (tnfa
->last_matched_branch
)
3345 xfree(tnfa
->last_matched_branch
);
3353 static char str
[256];
3358 (void) tre_config(TRE_CONFIG_VERSION
, &version
);
3359 (void) snprintf(str
, sizeof(str
), "TRE %s (BSD)", version
);
3365 tre_config(int query
, void *result
)
3367 int *int_result
= result
;
3368 const char **string_result
= result
;
3372 case TRE_CONFIG_APPROX
:
3375 #else /* !TRE_APPROX */
3377 #endif /* !TRE_APPROX */
3380 case TRE_CONFIG_WCHAR
:
3383 #else /* !TRE_WCHAR */
3385 #endif /* !TRE_WCHAR */
3388 case TRE_CONFIG_MULTIBYTE
:
3389 #ifdef TRE_MULTIBYTE
3391 #else /* !TRE_MULTIBYTE */
3393 #endif /* !TRE_MULTIBYTE */
3396 case TRE_CONFIG_SYSTEM_ABI
:
3397 #ifdef TRE_CONFIG_SYSTEM_ABI
3399 #else /* !TRE_CONFIG_SYSTEM_ABI */
3401 #endif /* !TRE_CONFIG_SYSTEM_ABI */
3404 case TRE_CONFIG_VERSION
:
3405 *string_result
= TRE_VERSION
;