2 tre_regexec.c - TRE POSIX compatible matching functions (and more).
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
11 #endif /* HAVE_CONFIG_H */
14 /* AIX requires this to be the first thing in the file. */
22 # ifndef alloca /* predefined by HP cc +Olibcalls */
28 #endif /* TRE_USE_ALLOCA */
35 #endif /* HAVE_WCHAR_H */
38 #endif /* HAVE_WCTYPE_H */
41 #endif /* !TRE_WCHAR */
44 #endif /* HAVE_MALLOC_H */
47 #include "tre-internal.h"
48 #include "tre-match-utils.h"
53 /* For each tre_last_matched_t in the lm array, find the last matched branch by
54 comparing the touch value of the cmp_tag's. For all other branches, reset
55 the corresponding tags. If reset_all is non-zero, reset all tags in all
56 branches. Recurse into the nested last matched structures, clearing tags as
59 tre_reset_last_matched_branches(tre_tag_t
*tags
, const tre_last_matched_t
*lm
,
60 int n
, int start_tag
, int reset_all
)
63 tre_last_matched_branch_t
*b
;
65 DPRINT(("tre_reset_last_matched_branches: n=%d start_tag=%d reset_all=%d\n",
66 n
, start_tag
, reset_all
));
69 if (lm
->n_branches
== 1)
74 DPRINT((" b->cmp_tag=%d %d <? %d\n", b
->cmp_tag
,
75 tre_tag_touch_get(tags
, b
->cmp_tag
),
76 tre_tag_touch_get(tags
, start_tag
)));
77 reset
= (reset_all
|| tre_tag_touch_get(tags
, b
->cmp_tag
) <
78 tre_tag_touch_get(tags
, start_tag
));
87 for (i
= b
->n_tags
, t
= b
->tags
; i
> 0; i
--, t
++)
89 DPRINT((" Resetting t%d\n", *t
));
90 tre_tag_reset(tags
, *t
);
93 if (b
->n_last_matched
> 0)
94 tre_reset_last_matched_branches(tags
, b
->last_matched
,
96 lm
->start_tag
, reset
);
104 #endif /* TRE_DEBUG */
106 for (i
= lm
->n_branches
, b
= lm
->branches
; i
> 0; i
--, b
++)
109 int touch
= tre_tag_touch_get(tags
, t
);
115 #endif /* TRE_DEBUG */
118 DPRINT((" Last touched end tag t%d=%d\n", last
, max
));
121 for (i
= lm
->n_branches
, b
= lm
->branches
; i
> 0; i
--, b
++)
123 reset
= (reset_all
|| tre_tag_touch_get(tags
, b
->cmp_tag
) < max
);
129 for (j
= b
->n_tags
, t
= b
->tags
; j
> 0; j
--, t
++)
131 DPRINT((" Resetting t%d\n", *t
));
132 tre_tag_reset(tags
, *t
);
135 if (b
->n_last_matched
> 0)
136 tre_reset_last_matched_branches(tags
, b
->last_matched
,
138 lm
->start_tag
, reset
);
145 /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
148 tre_fill_pmatch(size_t nmatch
, regmatch_t pmatch
[], int cflags
,
149 const tre_tnfa_t
*tnfa
, const tre_tag_t
*intags
, int match_eo
)
153 if (cflags
& REG_NOSUB
) return REG_OK
;
156 if (match_eo
>= 0 && intags
)
158 const tre_tag_t
*tags
= intags
;
159 tre_submatch_data_t
*submatch_data
;
161 if (tnfa
->last_matched_branch
&&
162 tnfa
->last_matched_branch
->n_last_matched
> 0)
165 #ifdef TRE_USE_ALLOCA
166 t
= alloca(sizeof(*t
) * tnfa
->num_tags
);
167 #else /* !TRE_USE_ALLOCA */
168 t
= xmalloc(sizeof(*t
) * tnfa
->num_tags
);
169 #endif /* !TRE_USE_ALLOCA */
170 if (!t
) return REG_ESPACE
;
171 memcpy(t
, intags
, tnfa
->num_tags
* sizeof(tre_tag_t
));
172 tre_reset_last_matched_branches(t
,
173 tnfa
->last_matched_branch
->last_matched
,
174 tnfa
->last_matched_branch
->n_last_matched
,
178 /* Construct submatch offsets from the tags. */
179 DPRINT(("end tag = t%d = %d\n", tnfa
->end_tag
, match_eo
));
180 submatch_data
= tnfa
->submatch_data
;
181 while (i
< tnfa
->num_submatches
&& i
< nmatch
)
183 if (submatch_data
[i
].so_tag
== tnfa
->end_tag
)
184 pmatch
[i
].rm_so
= match_eo
;
186 pmatch
[i
].rm_so
= tre_tag_get(tags
, submatch_data
[i
].so_tag
);
188 if (submatch_data
[i
].eo_tag
== tnfa
->end_tag
)
189 pmatch
[i
].rm_eo
= match_eo
;
191 pmatch
[i
].rm_eo
= tre_tag_get(tags
, submatch_data
[i
].eo_tag
);
193 /* If either of the endpoints were not used, this submatch
194 was not part of the match. */
195 if (pmatch
[i
].rm_so
== -1 || pmatch
[i
].rm_eo
== -1)
196 pmatch
[i
].rm_so
= pmatch
[i
].rm_eo
= -1;
198 DPRINT(("pmatch[%d] = {t%d = %qd, t%d = %qd}\n", i
,
199 submatch_data
[i
].so_tag
, pmatch
[i
].rm_so
,
200 submatch_data
[i
].eo_tag
, pmatch
[i
].rm_eo
));
203 #ifndef TRE_USE_ALLOCA
204 if (tags
!= intags
) xfree(__DECONST(tre_tag_t
*,tags
));
205 #endif /* !TRE_USE_ALLOCA */
210 pmatch
[i
].rm_so
= -1;
211 pmatch
[i
].rm_eo
= -1;
220 Wrapper functions for POSIX compatible regexp matching.
224 tre_have_backrefs(const regex_t
*preg
)
226 tre_tnfa_t
*tnfa
= (void *)preg
->TRE_REGEX_T_FIELD
;
227 return tnfa
->have_backrefs
;
232 tre_have_approx(const regex_t
*preg
)
234 tre_tnfa_t
*tnfa
= (void *)preg
->TRE_REGEX_T_FIELD
;
235 return tnfa
->have_approx
;
237 #endif /* TRE_APPROX */
240 tre_match(const tre_tnfa_t
*tnfa
, const void *string
, size_t len
,
241 tre_str_type_t type
, size_t nmatch
, regmatch_t pmatch
[],
244 reg_errcode_t status
;
245 tre_tag_t
*tags
= NULL
;
247 size_t offset
= 0, count
= 0;
248 if (tnfa
->num_tags
> 0 && nmatch
> 0)
250 #ifdef TRE_USE_ALLOCA
251 tags
= alloca(sizeof(*tags
) * tnfa
->num_tags
);
252 #else /* !TRE_USE_ALLOCA */
253 tags
= xmalloc(sizeof(*tags
) * tnfa
->num_tags
);
254 #endif /* !TRE_USE_ALLOCA */
260 (eflags
& REG_STARTEND
) && pmatch
)
262 if (pmatch
->rm_so
< 0)
264 if (len
== (size_t)-1)
266 if (pmatch
->rm_eo
< 0 || pmatch
->rm_so
> pmatch
->rm_eo
)
268 len
= pmatch
->rm_eo
- pmatch
->rm_so
;
270 count
= offset
= pmatch
->rm_so
;
271 if (type
== STR_WIDE
) offset
*= sizeof(wchar_t);
274 /* Dispatch to the appropriate matcher. */
275 if (tnfa
->have_backrefs
|| eflags
& REG_BACKTRACKING_MATCHER
)
277 /* The regex has back references, use the backtracking matcher. */
278 status
= tre_tnfa_run_backtrack(tnfa
, string
+ offset
, (int)len
, type
,
282 else if (tnfa
->have_approx
|| eflags
& REG_APPROX_MATCHER
)
284 /* The regex uses approximate matching, use the approximate matcher. */
287 tre_regaparams_default(¶ms
);
290 status
= tre_tnfa_run_approx(tnfa
, string
+ offset
, (int)len
, type
, tags
,
291 &match
, params
, eflags
, &eo
);
293 #endif /* TRE_APPROX */
296 /* Exact matching, no back references, use the parallel matcher. */
297 status
= tre_tnfa_run_parallel(tnfa
, string
+ offset
, (int)len
, type
,
301 if (status
== REG_OK
)
303 /* A match was found, so fill the submatch registers. */
304 status
= tre_fill_pmatch(nmatch
, pmatch
, tnfa
->cflags
, tnfa
, tags
, eo
);
305 /* If doing REG_STARTEND, adjust the pmatch array (we can't build
306 this into tre_fill_pmatch, because tre_tnfa_run_backtrack calls
307 tre_fill_pmatch itself). */
308 if (status
== REG_OK
&& !(tnfa
->cflags
& REG_NOSUB
) &&
309 (eflags
& REG_STARTEND
) && pmatch
&& nmatch
> 0)
313 for (i
= nmatch
, p
= pmatch
; i
> 0; p
++, i
--)
315 if (p
->rm_so
>= 0) p
->rm_so
+= count
;
316 if (p
->rm_eo
>= 0) p
->rm_eo
+= count
;
320 #ifndef TRE_USE_ALLOCA
323 #endif /* !TRE_USE_ALLOCA */
328 tre_regnexec(const regex_t
*preg
, const char *str
, size_t len
,
329 size_t nmatch
, regmatch_t pmatch
[], int eflags
)
331 tre_tnfa_t
*tnfa
= (void *)preg
->TRE_REGEX_T_FIELD
;
332 tre_str_type_t type
= (TRE_MB_CUR_MAX_L(tnfa
->loc
) == 1) ? STR_BYTE
: STR_MBS
;
334 #ifdef TRE_USE_SYSTEM_REGEX_H
335 if (preg
->re_magic
!= RE_MAGIC
) return REG_BADPAT
;
336 #endif /* TRE_USE_SYSTEM_REGEX_H */
338 return tre_match(tnfa
, str
, len
, type
, nmatch
, pmatch
, eflags
);
342 tre_regexec(const regex_t
* __restrict preg
, const char * __restrict str
,
343 size_t nmatch
, regmatch_t pmatch
[__restrict_arr
], int eflags
)
345 return tre_regnexec(preg
, str
, (size_t)-1, nmatch
, pmatch
, eflags
);
352 tre_regwnexec(const regex_t
*preg
, const wchar_t *str
, size_t len
,
353 size_t nmatch
, regmatch_t pmatch
[], int eflags
)
355 tre_tnfa_t
*tnfa
= (void *)preg
->TRE_REGEX_T_FIELD
;
357 #ifdef TRE_USE_SYSTEM_REGEX_H
358 if (preg
->re_magic
!= RE_MAGIC
) return REG_BADPAT
;
359 #endif /* TRE_USE_SYSTEM_REGEX_H */
361 return tre_match(tnfa
, str
, len
, STR_WIDE
, nmatch
, pmatch
, eflags
);
365 tre_regwexec(const regex_t
*preg
, const wchar_t *str
,
366 size_t nmatch
, regmatch_t pmatch
[], int eflags
)
368 return tre_regwnexec(preg
, str
, (size_t)-1, nmatch
, pmatch
, eflags
);
371 #endif /* TRE_WCHAR */
376 Wrapper functions for approximate regexp matching.
380 tre_match_approx(const tre_tnfa_t
*tnfa
, const void *string
, size_t len
,
381 tre_str_type_t type
, regamatch_t
*match
, regaparams_t params
,
384 reg_errcode_t status
;
385 tre_tag_t
*tags
= NULL
;
387 size_t offset
= 0, count
= 0;
389 /* If the regexp does not use approximate matching features, the
390 maximum cost is zero, and the approximate matcher isn't forced,
391 use the exact matcher instead. */
392 if (params
.max_cost
== 0 && !tnfa
->have_approx
393 && !(eflags
& REG_APPROX_MATCHER
))
394 return tre_match(tnfa
, string
, len
, type
, match
->nmatch
, match
->pmatch
,
397 /* Back references are not supported by the approximate matcher. */
398 if (tnfa
->have_backrefs
)
401 if (tnfa
->num_tags
> 0 && match
->nmatch
> 0)
404 tags
= alloca(sizeof(*tags
) * tnfa
->num_tags
);
405 #else /* !TRE_USE_ALLOCA */
406 tags
= xmalloc(sizeof(*tags
) * tnfa
->num_tags
);
407 #endif /* !TRE_USE_ALLOCA */
413 (eflags
& REG_STARTEND
) && match
->pmatch
)
415 if (match
->pmatch
->rm_so
< 0)
417 if (len
== (size_t)-1)
419 if (match
->pmatch
->rm_eo
< 0 || match
->pmatch
->rm_so
>
420 match
->pmatch
->rm_eo
)
422 len
= match
->pmatch
->rm_eo
- match
->pmatch
->rm_so
;
424 count
= offset
= match
->pmatch
->rm_so
;
425 if (type
== STR_WIDE
) offset
*= sizeof(wchar_t);
428 status
= tre_tnfa_run_approx(tnfa
, string
, (int)len
, type
, tags
,
429 match
, params
, eflags
, &eo
);
430 if (status
== REG_OK
)
432 status
= tre_fill_pmatch(match
->nmatch
, match
->pmatch
, tnfa
->cflags
,
434 /* If doing REG_STARTEND, adjust the pmatch array (we can't build
435 this into tre_fill_pmatch, because tre_tnfa_run_backtrack call
436 tre_fill_pmatch itself). */
437 if (status
== REG_OK
&& !(tnfa
->cflags
& REG_NOSUB
) &&
438 (eflags
& REG_STARTEND
) && match
->pmatch
&& match
->nmatch
> 0)
442 for (i
= match
->nmatch
, p
= match
->pmatch
; i
> 0; p
++, i
--)
444 if (p
->rm_so
>= 0) p
->rm_so
+= count
;
445 if (p
->rm_eo
>= 0) p
->rm_eo
+= count
;
449 #ifndef TRE_USE_ALLOCA
452 #endif /* !TRE_USE_ALLOCA */
457 tre_reganexec(const regex_t
*preg
, const char *str
, size_t len
,
458 regamatch_t
*match
, regaparams_t params
, int eflags
)
460 tre_tnfa_t
*tnfa
= (void *)preg
->TRE_REGEX_T_FIELD
;
461 tre_str_type_t type
= (TRE_MB_CUR_MAX_L(tnfa
->loc
) == 1) ? STR_BYTE
: STR_MBS
;
463 #ifdef TRE_USE_SYSTEM_REGEX_H
464 if (preg
->re_magic
!= RE_MAGIC
) return REG_BADPAT
;
465 #endif /* TRE_USE_SYSTEM_REGEX_H */
467 return tre_match_approx(tnfa
, str
, len
, type
, match
, params
, eflags
);
471 tre_regaexec(const regex_t
*preg
, const char *str
,
472 regamatch_t
*match
, regaparams_t params
, int eflags
)
474 return tre_reganexec(preg
, str
, (size_t)-1, match
, params
, eflags
);
480 tre_regawnexec(const regex_t
*preg
, const wchar_t *str
, size_t len
,
481 regamatch_t
*match
, regaparams_t params
, int eflags
)
483 tre_tnfa_t
*tnfa
= (void *)preg
->TRE_REGEX_T_FIELD
;
485 #ifdef TRE_USE_SYSTEM_REGEX_H
486 if (preg
->re_magic
!= RE_MAGIC
) return REG_BADPAT
;
487 #endif /* TRE_USE_SYSTEM_REGEX_H */
489 return tre_match_approx(tnfa
, str
, len
, STR_WIDE
,
490 match
, params
, eflags
);
494 tre_regawexec(const regex_t
*preg
, const wchar_t *str
,
495 regamatch_t
*match
, regaparams_t params
, int eflags
)
497 return tre_regawnexec(preg
, str
, (size_t)-1, match
, params
, eflags
);
500 #endif /* TRE_WCHAR */
503 tre_regaparams_default(regaparams_t
*params
)
505 memset(params
, 0, sizeof(*params
));
506 params
->cost_ins
= 1;
507 params
->cost_del
= 1;
508 params
->cost_subst
= 1;
509 params
->max_cost
= INT_MAX
;
510 params
->max_ins
= INT_MAX
;
511 params
->max_del
= INT_MAX
;
512 params
->max_subst
= INT_MAX
;
513 params
->max_err
= INT_MAX
;
516 #endif /* TRE_APPROX */