1 /* kwset.c - search for any of a set of keywords.
2 Copyright (C) 1989, 1998, 2000, 2005, 2007, 2009-2020 Free Software
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
20 /* Written August 1989 by Mike Haertel. */
22 /* For the Aho-Corasick algorithm, see:
23 Aho AV, Corasick MJ. Efficient string matching: an aid to
24 bibliographic search. CACM 18, 6 (1975), 333-40
25 <https://dx.doi.org/10.1145/360825.360855>, which describes the
26 failure function used below.
28 For the Boyer-Moore algorithm, see: Boyer RS, Moore JS.
29 A fast string searching algorithm. CACM 20, 10 (1977), 762-72
30 <https://dx.doi.org/10.1145/359842.359859>.
32 For a survey of more-recent string matching algorithms that might
33 help improve performance, see: Faro S, Lecroq T. The exact online
34 string matching problem: a review of the most recent results.
35 ACM Computing Surveys 45, 2 (2013), 13
36 <https://dx.doi.org/10.1145/2431211.2431212>. */
43 #include <sys/types.h>
51 #define obstack_chunk_alloc xmalloc
52 #define obstack_chunk_free free
60 /* Balanced tree of edges and labels leaving a given trie node. */
63 struct tree
*llink
; /* Left link; MUST be first field. */
64 struct tree
*rlink
; /* Right link (to larger labels). */
65 struct trie
*trie
; /* Trie node pointed to by this edge. */
66 unsigned char label
; /* Label on this edge. */
67 char balance
; /* Difference in depths of subtrees. */
70 /* Node of a trie representing a set of keywords. */
73 /* If an accepting node, this is either 2*W + 1 where W is the word
74 index, or is SIZE_MAX if Aho-Corasick is in use and FAIL
75 specifies where to look for more info. If not an accepting node,
79 struct tree
*links
; /* Tree of edges leaving this node. */
80 struct trie
*parent
; /* Parent of this node. */
81 struct trie
*next
; /* List of all trie nodes in level order. */
82 struct trie
*fail
; /* Aho-Corasick failure function. */
83 ptrdiff_t depth
; /* Depth of this node from the root. */
84 ptrdiff_t shift
; /* Shift function for search failures. */
85 ptrdiff_t maxshift
; /* Max shift of self and descendants. */
88 /* Structure returned opaquely to the caller, containing everything. */
91 struct obstack obstack
; /* Obstack for node allocation. */
92 ptrdiff_t words
; /* Number of words in the trie. */
93 struct trie
*trie
; /* The trie itself. */
94 ptrdiff_t mind
; /* Minimum depth of an accepting node. */
95 ptrdiff_t maxd
; /* Maximum depth of any node. */
96 unsigned char delta
[NCHAR
]; /* Delta table for rapid search. */
97 struct trie
*next
[NCHAR
]; /* Table of children of the root. */
98 char *target
; /* Target string if there's only one. */
99 ptrdiff_t *shift
; /* Used in Boyer-Moore search for one
101 char const *trans
; /* Character translation table. */
103 /* This helps to match a terminal byte, which is the first byte
104 for Aho-Corasick, and the last byte for Boyer-More. If all the
105 patterns have the same terminal byte (after translation via TRANS
106 if TRANS is nonnull), then this is that byte as an unsigned char.
107 Otherwise this is -1 if there is disagreement among the strings
108 about terminal bytes, and -2 if there are no terminal bytes and
109 no disagreement because all the patterns are empty. */
112 /* This helps to match a terminal byte. If 0 <= GC1HELP, B is
113 terminal when B == GC1 || B == GC1HELP (note that GC1 == GCHELP
114 is common here). This is typically faster than evaluating
115 to_uchar (TRANS[B]) == GC1. */
118 /* If the string has two or more bytes, this is the penultimate byte,
119 after translation via TRANS if TRANS is nonnull. This variable
120 is used only by Boyer-Moore. */
123 /* kwsexec implementation. */
124 ptrdiff_t (*kwsexec
) (kwset_t
, char const *, ptrdiff_t,
125 struct kwsmatch
*, bool);
128 /* Use TRANS to transliterate C. A null TRANS does no transliteration. */
130 tr (char const *trans
, char c
)
132 return trans
? trans
[U(c
)] : c
;
135 static ptrdiff_t acexec (kwset_t
, char const *, ptrdiff_t,
136 struct kwsmatch
*, bool);
137 static ptrdiff_t bmexec (kwset_t
, char const *, ptrdiff_t,
138 struct kwsmatch
*, bool);
140 /* Return a newly allocated keyword set. A nonnull TRANS specifies a
141 table of character translations to be applied to all pattern and
144 kwsalloc (char const *trans
)
146 struct kwset
*kwset
= xmalloc (sizeof *kwset
);
148 obstack_init (&kwset
->obstack
);
150 kwset
->trie
= obstack_alloc (&kwset
->obstack
, sizeof *kwset
->trie
);
151 kwset
->trie
->accepting
= 0;
152 kwset
->trie
->links
= NULL
;
153 kwset
->trie
->parent
= NULL
;
154 kwset
->trie
->next
= NULL
;
155 kwset
->trie
->fail
= NULL
;
156 kwset
->trie
->depth
= 0;
157 kwset
->trie
->shift
= 0;
158 kwset
->mind
= PTRDIFF_MAX
;
160 kwset
->target
= NULL
;
161 kwset
->trans
= trans
;
162 kwset
->kwsexec
= acexec
;
167 /* This upper bound is valid for CHAR_BIT >= 4 and
168 exact for CHAR_BIT in { 4..11, 13, 15, 17, 19 }. */
169 enum { DEPTH_SIZE
= CHAR_BIT
+ CHAR_BIT
/ 2 };
171 /* Add the given string to the contents of the keyword set. */
173 kwsincr (kwset_t kwset
, char const *text
, ptrdiff_t len
)
176 struct trie
*trie
= kwset
->trie
;
177 char const *trans
= kwset
->trans
;
178 bool reverse
= kwset
->kwsexec
== bmexec
;
183 /* Descend the trie (built of keywords) character-by-character,
184 installing new nodes when necessary. */
187 unsigned char uc
= reverse
? *--text
: *text
++;
188 unsigned char label
= trans
? trans
[uc
] : uc
;
190 /* Descend the tree of outgoing links for this trie node,
191 looking for the current character and keeping track
192 of the path followed. */
193 struct tree
*cur
= trie
->links
;
194 struct tree
*links
[DEPTH_SIZE
];
195 enum { L
, R
} dirs
[DEPTH_SIZE
];
196 links
[0] = (struct tree
*) &trie
->links
;
200 while (cur
&& label
!= cur
->label
)
203 if (label
< cur
->label
)
204 dirs
[depth
++] = L
, cur
= cur
->llink
;
206 dirs
[depth
++] = R
, cur
= cur
->rlink
;
209 /* The current character doesn't have an outgoing link at
210 this trie node, so build a new trie node and install
211 a link in the current trie node's tree. */
214 cur
= obstack_alloc (&kwset
->obstack
, sizeof *cur
);
217 cur
->trie
= obstack_alloc (&kwset
->obstack
, sizeof *cur
->trie
);
218 cur
->trie
->accepting
= 0;
219 cur
->trie
->links
= NULL
;
220 cur
->trie
->parent
= trie
;
221 cur
->trie
->next
= NULL
;
222 cur
->trie
->fail
= NULL
;
223 cur
->trie
->depth
= trie
->depth
+ 1;
224 cur
->trie
->shift
= 0;
228 /* Install the new tree node in its parent. */
229 if (dirs
[--depth
] == L
)
230 links
[depth
]->llink
= cur
;
232 links
[depth
]->rlink
= cur
;
234 /* Back up the tree fixing the balance flags. */
235 while (depth
&& !links
[depth
]->balance
)
237 if (dirs
[depth
] == L
)
238 --links
[depth
]->balance
;
240 ++links
[depth
]->balance
;
244 /* Rebalance the tree by pointer rotations if necessary. */
245 if (depth
&& ((dirs
[depth
] == L
&& --links
[depth
]->balance
)
246 || (dirs
[depth
] == R
&& ++links
[depth
]->balance
)))
248 struct tree
*t
, *r
, *l
, *rl
, *lr
;
250 switch (links
[depth
]->balance
)
253 switch (dirs
[depth
+ 1])
256 r
= links
[depth
], t
= r
->llink
, rl
= t
->rlink
;
257 t
->rlink
= r
, r
->llink
= rl
;
258 t
->balance
= r
->balance
= 0;
261 r
= links
[depth
], l
= r
->llink
, t
= l
->rlink
;
262 rl
= t
->rlink
, lr
= t
->llink
;
263 t
->llink
= l
, l
->rlink
= lr
, t
->rlink
= r
, r
->llink
= rl
;
264 l
->balance
= t
->balance
!= 1 ? 0 : -1;
265 r
->balance
= t
->balance
!= (char) -1 ? 0 : 1;
273 switch (dirs
[depth
+ 1])
276 l
= links
[depth
], t
= l
->rlink
, lr
= t
->llink
;
277 t
->llink
= l
, l
->rlink
= lr
;
278 t
->balance
= l
->balance
= 0;
281 l
= links
[depth
], r
= l
->rlink
, t
= r
->llink
;
282 lr
= t
->llink
, rl
= t
->rlink
;
283 t
->llink
= l
, l
->rlink
= lr
, t
->rlink
= r
, r
->llink
= rl
;
284 l
->balance
= t
->balance
!= 1 ? 0 : -1;
285 r
->balance
= t
->balance
!= (char) -1 ? 0 : 1;
296 if (dirs
[depth
- 1] == L
)
297 links
[depth
- 1]->llink
= t
;
299 links
[depth
- 1]->rlink
= t
;
306 /* Mark the node finally reached as accepting, encoding the
307 index number of this word in the keyword set so far. */
308 if (!trie
->accepting
)
310 size_t words
= kwset
->words
;
311 trie
->accepting
= 2 * words
+ 1;
315 /* Keep track of the longest and shortest string of the keyword set. */
316 if (trie
->depth
< kwset
->mind
)
317 kwset
->mind
= trie
->depth
;
318 if (trie
->depth
> kwset
->maxd
)
319 kwset
->maxd
= trie
->depth
;
323 kwswords (kwset_t kwset
)
328 /* Enqueue the trie nodes referenced from the given tree in the
331 enqueue (struct tree
*tree
, struct trie
**last
)
335 enqueue (tree
->llink
, last
);
336 enqueue (tree
->rlink
, last
);
337 (*last
) = (*last
)->next
= tree
->trie
;
340 /* Compute the Aho-Corasick failure function for the trie nodes referenced
341 from the given tree, given the failure function for their parent as
342 well as a last resort failure node. */
344 treefails (struct tree
const *tree
, struct trie
const *fail
,
345 struct trie
*recourse
, bool reverse
)
352 treefails (tree
->llink
, fail
, recourse
, reverse
);
353 treefails (tree
->rlink
, fail
, recourse
, reverse
);
355 /* Find, in the chain of fails going back to the root, the first
356 node that has a descendant on the current label. */
360 while (cur
&& tree
->label
!= cur
->label
)
361 if (tree
->label
< cur
->label
)
367 tree
->trie
->fail
= cur
->trie
;
368 if (!reverse
&& cur
->trie
->accepting
&& !tree
->trie
->accepting
)
369 tree
->trie
->accepting
= SIZE_MAX
;
375 tree
->trie
->fail
= recourse
;
378 /* Set delta entries for the links of the given tree such that
379 the preexisting delta value is larger than the current depth. */
381 treedelta (struct tree
const *tree
, ptrdiff_t depth
, unsigned char delta
[])
385 treedelta (tree
->llink
, depth
, delta
);
386 treedelta (tree
->rlink
, depth
, delta
);
387 if (depth
< delta
[tree
->label
])
388 delta
[tree
->label
] = depth
;
391 /* Return true if A has every label in B. */
392 static bool _GL_ATTRIBUTE_PURE
393 hasevery (struct tree
const *a
, struct tree
const *b
)
397 if (!hasevery (a
, b
->llink
))
399 if (!hasevery (a
, b
->rlink
))
401 while (a
&& b
->label
!= a
->label
)
402 if (b
->label
< a
->label
)
409 /* Compute a vector, indexed by character code, of the trie nodes
410 referenced from the given tree. */
412 treenext (struct tree
const *tree
, struct trie
*next
[])
416 treenext (tree
->llink
, next
);
417 treenext (tree
->rlink
, next
);
418 next
[tree
->label
] = tree
->trie
;
421 /* Prepare a built keyword set for use. */
423 kwsprep (kwset_t kwset
)
425 char const *trans
= kwset
->trans
;
427 unsigned char deltabuf
[NCHAR
];
428 unsigned char *delta
= trans
? deltabuf
: kwset
->delta
;
429 struct trie
*curr
, *last
;
431 /* Use Boyer-Moore if just one pattern, Aho-Corasick otherwise. */
432 bool reverse
= kwset
->words
== 1;
438 /* Enqueue the immediate descendants in the level order queue. */
439 for (curr
= last
= kwset
->trie
; curr
; curr
= curr
->next
)
440 enqueue (curr
->links
, &last
);
442 /* Looking for just one string. Extract it from the trie. */
443 kwset
->target
= obstack_alloc (&kwset
->obstack
, kwset
->mind
);
444 for (i
= 0, curr
= kwset
->trie
; i
< kwset
->mind
; ++i
)
446 kwset
->target
[i
] = curr
->links
->label
;
450 new_kwset
= kwsalloc (kwset
->trans
);
451 new_kwset
->kwsexec
= bmexec
;
452 kwsincr (new_kwset
, kwset
->target
, kwset
->mind
);
453 obstack_free (&kwset
->obstack
, NULL
);
458 /* Initial values for the delta table; will be changed later. The
459 delta entry for a given character is the smallest depth of any
460 node at which an outgoing edge is labeled by that character. */
461 memset (delta
, MIN (kwset
->mind
, UCHAR_MAX
), sizeof deltabuf
);
463 /* Traverse the nodes of the trie in level order, simultaneously
464 computing the delta table, failure function, and shift function. */
465 for (curr
= last
= kwset
->trie
; curr
; curr
= curr
->next
)
467 /* Enqueue the immediate descendants in the level order queue. */
468 enqueue (curr
->links
, &last
);
470 /* Update the delta table for the descendants of this node. */
471 treedelta (curr
->links
, curr
->depth
, delta
);
473 /* Compute the failure function for the descendants of this node. */
474 treefails (curr
->links
, curr
->fail
, kwset
->trie
, reverse
);
478 curr
->shift
= kwset
->mind
;
479 curr
->maxshift
= kwset
->mind
;
481 /* Update the shifts at each node in the current node's chain
482 of fails back to the root. */
484 for (fail
= curr
->fail
; fail
; fail
= fail
->fail
)
486 /* If the current node has some outgoing edge that the fail
487 doesn't, then the shift at the fail should be no larger
488 than the difference of their depths. */
489 if (!hasevery (fail
->links
, curr
->links
))
490 if (curr
->depth
- fail
->depth
< fail
->shift
)
491 fail
->shift
= curr
->depth
- fail
->depth
;
493 /* If the current node is accepting then the shift at the
494 fail and its descendants should be no larger than the
495 difference of their depths. */
496 if (curr
->accepting
&& fail
->maxshift
> curr
->depth
- fail
->depth
)
497 fail
->maxshift
= curr
->depth
- fail
->depth
;
504 /* Traverse the trie in level order again, fixing up all nodes whose
505 shift exceeds their inherited maxshift. */
506 for (curr
= kwset
->trie
->next
; curr
; curr
= curr
->next
)
508 if (curr
->maxshift
> curr
->parent
->maxshift
)
509 curr
->maxshift
= curr
->parent
->maxshift
;
510 if (curr
->shift
> curr
->maxshift
)
511 curr
->shift
= curr
->maxshift
;
515 /* Create a vector, indexed by character code, of the outgoing links
516 from the root node. Accumulate GC1 and GC1HELP. */
517 struct trie
*nextbuf
[NCHAR
];
518 struct trie
**next
= trans
? nextbuf
: kwset
->next
;
519 memset (next
, 0, sizeof nextbuf
);
520 treenext (kwset
->trie
->links
, next
);
523 for (i
= 0; i
< NCHAR
; i
++)
529 kwset
->next
[i
] = next
[ti
];
539 gc1help
= gc1help
== ti
? i
: -1;
540 else if (i
== ti
&& gc1
== gc1help
)
547 kwset
->gc1help
= gc1help
;
551 /* Looking for just one string. Extract it from the trie. */
552 kwset
->target
= obstack_alloc (&kwset
->obstack
, kwset
->mind
);
553 for (i
= kwset
->mind
- 1, curr
= kwset
->trie
; i
>= 0; --i
)
555 kwset
->target
[i
] = curr
->links
->label
;
561 /* Looking for the delta2 shift that might be made after a
562 backwards match has failed. Extract it from the trie. */
564 = obstack_alloc (&kwset
->obstack
,
565 sizeof *kwset
->shift
* (kwset
->mind
- 1));
566 for (i
= 0, curr
= kwset
->trie
->next
; i
< kwset
->mind
- 1; ++i
)
568 kwset
->shift
[i
] = curr
->shift
;
572 /* The penultimate byte. */
573 kwset
->gc2
= tr (trans
, kwset
->target
[kwset
->mind
- 2]);
577 /* Fix things up for any translation table. */
579 for (i
= 0; i
< NCHAR
; ++i
)
580 kwset
->delta
[i
] = delta
[U(trans
[i
])];
583 /* Delta2 portion of a Boyer-Moore search. *TP is the string text
584 pointer; it is updated in place. EP is the end of the string text,
585 and SP the end of the pattern. LEN is the pattern length; it must
586 be at least 2. TRANS, if nonnull, is the input translation table.
587 GC1 and GC2 are the last and second-from last bytes of the pattern,
588 transliterated by TRANS; the caller precomputes them for
589 efficiency. If D1 is nonnull, it is a delta1 table for shifting *TP
590 when failing. KWSET->shift says how much to shift. */
592 bm_delta2_search (char const **tpp
, char const *ep
, char const *sp
,
594 char const *trans
, char gc1
, char gc2
,
595 unsigned char const *d1
, kwset_t kwset
)
597 char const *tp
= *tpp
;
598 ptrdiff_t d
= len
, skip
= 0;
603 if (tr (trans
, tp
[-2]) == gc2
)
606 if (tr (trans
, tp
[-i
]) != tr (trans
, sp
[-i
]))
610 for (i
= d
+ skip
+ 1; i
<= len
; ++i
)
611 if (tr (trans
, tp
[-i
]) != tr (trans
, sp
[-i
]))
621 tp
+= d
= kwset
->shift
[i
- 2];
624 if (tr (trans
, tp
[-1]) != gc1
)
637 /* Return the address of the first byte in the buffer S (of size N)
638 that matches the terminal byte specified by KWSET, or NULL if there
639 is no match. KWSET->gc1 should be nonnegative. */
641 memchr_kwset (char const *s
, ptrdiff_t n
, kwset_t kwset
)
643 char const *slim
= s
+ n
;
644 if (kwset
->gc1help
< 0)
646 for (; s
< slim
; s
++)
647 if (kwset
->next
[U(*s
)])
652 int small_heuristic
= 2;
653 size_t small_bytes
= small_heuristic
* sizeof (unsigned long int);
656 if (kwset
->next
[U(*s
)])
659 if ((uintptr_t) s
% small_bytes
== 0)
660 return memchr2 (s
, kwset
->gc1
, kwset
->gc1help
, slim
- s
);
666 /* Fast Boyer-Moore search (inlinable version). */
667 static inline ptrdiff_t _GL_ATTRIBUTE_PURE
668 bmexec_trans (kwset_t kwset
, char const *text
, ptrdiff_t size
)
671 unsigned char const *d1
;
672 char const *ep
, *sp
, *tp
;
674 ptrdiff_t len
= kwset
->mind
;
675 char const *trans
= kwset
->trans
;
683 tp
= memchr_kwset (text
, size
, kwset
);
684 return tp
? tp
- text
: -1;
688 sp
= kwset
->target
+ len
;
690 char gc1
= kwset
->gc1
;
691 char gc2
= kwset
->gc2
;
693 /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
695 if (!INT_MULTIPLY_WRAPV (len
, 12, &len12
) && len12
< size
)
696 /* 11 is not a bug, the initial offset happens only once. */
697 for (ep
= text
+ size
- 11 * len
; tp
<= ep
; )
699 char const *tp0
= tp
;
700 d
= d1
[U(tp
[-1])], tp
+= d
;
701 d
= d1
[U(tp
[-1])], tp
+= d
;
704 d
= d1
[U(tp
[-1])], tp
+= d
;
705 d
= d1
[U(tp
[-1])], tp
+= d
;
706 d
= d1
[U(tp
[-1])], tp
+= d
;
709 d
= d1
[U(tp
[-1])], tp
+= d
;
710 d
= d1
[U(tp
[-1])], tp
+= d
;
711 d
= d1
[U(tp
[-1])], tp
+= d
;
714 d
= d1
[U(tp
[-1])], tp
+= d
;
715 d
= d1
[U(tp
[-1])], tp
+= d
;
717 /* As a heuristic, prefer memchr to seeking by
718 delta1 when the latter doesn't advance much. */
719 int advance_heuristic
= 16 * sizeof (long);
720 if (advance_heuristic
<= tp
- tp0
)
723 tp
= memchr_kwset (tp
, text
+ size
- tp
, kwset
);
732 if (bm_delta2_search (&tp
, ep
, sp
, len
, trans
, gc1
, gc2
, d1
, kwset
))
736 /* Now only a few characters are left to search. Carefully avoid
737 ever producing an out-of-bounds pointer. */
742 d
= d1
[U((tp
+= d
)[-1])];
745 if (bm_delta2_search (&tp
, ep
, sp
, len
, trans
, gc1
, gc2
, NULL
, kwset
))
752 /* Fast Boyer-Moore search. */
754 bmexec (kwset_t kwset
, char const *text
, ptrdiff_t size
,
755 struct kwsmatch
*kwsmatch
, bool longest
)
757 /* Help the compiler inline in two ways, depending on whether
758 kwset->trans is null. */
759 ptrdiff_t ret
= (IGNORE_DUPLICATE_BRANCH_WARNING
761 ? bmexec_trans (kwset
, text
, size
)
762 : bmexec_trans (kwset
, text
, size
)));
766 kwsmatch
->offset
[0] = ret
;
767 kwsmatch
->size
[0] = kwset
->mind
;
773 /* Hairy multiple string search with the Aho-Corasick algorithm.
774 (inlinable version) */
775 static inline ptrdiff_t
776 acexec_trans (kwset_t kwset
, char const *text
, ptrdiff_t len
,
777 struct kwsmatch
*kwsmatch
, bool longest
)
779 struct trie
const *trie
, *accept
;
780 char const *tp
, *left
, *lim
;
781 struct tree
const *tree
;
784 /* Initialize register copies and look for easy ways out. */
785 if (len
< kwset
->mind
)
787 trans
= kwset
->trans
;
792 if (!trie
->accepting
)
795 int gc1
= kwset
->gc1
;
801 while (! (trie
= kwset
->next
[c
= tr (trans
, *tp
++)]))
807 tp
= memchr_kwset (tp
, lim
- tp
, kwset
);
810 c
= tr (trans
, *tp
++);
811 trie
= kwset
->next
[c
];
820 c
= tr (trans
, *tp
++);
822 for (tree
= trie
->links
; c
!= tree
->label
; )
824 tree
= c
< tree
->label
? tree
->llink
: tree
->rlink
;
830 trie
= kwset
->next
[c
];
854 while (accept
->accepting
== SIZE_MAX
)
855 accept
= accept
->fail
;
856 left
= tp
- accept
->depth
;
858 /* Try left-most longest match. */
863 struct trie
const *accept1
;
865 unsigned char c
= tr (trans
, *tp
++);
870 while (tree
&& c
!= tree
->label
)
871 tree
= c
< tree
->label
? tree
->llink
: tree
->rlink
;
873 while (!tree
&& (trie
= trie
->fail
) && accept
->depth
<= trie
->depth
);
881 while (accept1
->accepting
== SIZE_MAX
)
882 accept1
= accept1
->fail
;
883 left1
= tp
- accept1
->depth
;
893 kwsmatch
->index
= accept
->accepting
/ 2;
894 kwsmatch
->offset
[0] = left
- text
;
895 kwsmatch
->size
[0] = accept
->depth
;
900 /* Hairy multiple string search with Aho-Corasick algorithm. */
902 acexec (kwset_t kwset
, char const *text
, ptrdiff_t size
,
903 struct kwsmatch
*kwsmatch
, bool longest
)
906 /* Help the compiler inline in two ways, depending on whether
907 kwset->trans is null. */
908 return (IGNORE_DUPLICATE_BRANCH_WARNING
910 ? acexec_trans (kwset
, text
, size
, kwsmatch
, longest
)
911 : acexec_trans (kwset
, text
, size
, kwsmatch
, longest
)));
914 /* Find the first instance of a KWSET member in TEXT, which has SIZE bytes.
915 Return the offset (into TEXT) of the first byte of the matching substring,
916 or -1 if no match is found. Upon a match, store details in
917 *KWSMATCH: index of matched keyword, start offset (same as the return
918 value), and length. If LONGEST, find the longest match; otherwise
919 any match will do. */
921 kwsexec (kwset_t kwset
, char const *text
, ptrdiff_t size
,
922 struct kwsmatch
*kwsmatch
, bool longest
)
924 return kwset
->kwsexec (kwset
, text
, size
, kwsmatch
, longest
);
927 /* Free the components of the given keyword set. */
929 kwsfree (kwset_t kwset
)
931 obstack_free (&kwset
->obstack
, NULL
);