1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 static void re_string_construct_common (const char *str
, int len
,
23 RE_TRANSLATE_TYPE trans
, int icase
,
24 const re_dfa_t
*dfa
) internal_function
;
26 static int re_string_skip_chars (re_string_t
*pstr
, int new_raw_idx
,
27 wint_t *last_wc
) internal_function
;
28 #endif /* RE_ENABLE_I18N */
29 static re_dfastate_t
*create_newstate_common (re_dfa_t
*dfa
,
30 const re_node_set
*nodes
,
31 unsigned int hash
) internal_function
;
32 static reg_errcode_t
register_state (re_dfa_t
*dfa
, re_dfastate_t
*newstate
,
33 unsigned int hash
) internal_function
;
34 static re_dfastate_t
*create_ci_newstate (re_dfa_t
*dfa
,
35 const re_node_set
*nodes
,
36 unsigned int hash
) internal_function
;
37 static re_dfastate_t
*create_cd_newstate (re_dfa_t
*dfa
,
38 const re_node_set
*nodes
,
40 unsigned int hash
) internal_function
;
41 static unsigned int inline calc_state_hash (const re_node_set
*nodes
,
42 unsigned int context
) internal_function
;
44 /* Functions for string operation. */
46 /* This function allocate the buffers. It is necessary to call
47 re_string_reconstruct before using the object. */
50 re_string_allocate (pstr
, str
, len
, init_len
, trans
, icase
, dfa
)
53 int len
, init_len
, icase
;
54 RE_TRANSLATE_TYPE trans
;
60 /* Ensure at least one character fits into the buffers. */
61 if (init_len
< dfa
->mb_cur_max
)
62 init_len
= dfa
->mb_cur_max
;
63 init_buf_len
= (len
+ 1 < init_len
) ? len
+ 1: init_len
;
64 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
);
66 ret
= re_string_realloc_buffers (pstr
, init_buf_len
);
67 if (BE (ret
!= REG_NOERROR
, 0))
70 pstr
->word_char
= dfa
->word_char
;
71 pstr
->word_ops_used
= dfa
->word_ops_used
;
72 pstr
->mbs
= pstr
->mbs_allocated
? pstr
->mbs
: (unsigned char *) str
;
73 pstr
->valid_len
= (pstr
->mbs_allocated
|| dfa
->mb_cur_max
> 1) ? 0 : len
;
74 pstr
->valid_raw_len
= pstr
->valid_len
;
78 /* This function allocate the buffers, and initialize them. */
81 re_string_construct (pstr
, str
, len
, trans
, icase
, dfa
)
85 RE_TRANSLATE_TYPE trans
;
89 memset (pstr
, '\0', sizeof (re_string_t
));
90 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
);
94 ret
= re_string_realloc_buffers (pstr
, len
+ 1);
95 if (BE (ret
!= REG_NOERROR
, 0))
98 pstr
->mbs
= pstr
->mbs_allocated
? pstr
->mbs
: (unsigned char *) str
;
102 #ifdef RE_ENABLE_I18N
103 if (dfa
->mb_cur_max
> 1)
107 ret
= build_wcs_upper_buffer (pstr
);
108 if (BE (ret
!= REG_NOERROR
, 0))
110 if (pstr
->valid_raw_len
>= len
)
112 if (pstr
->bufs_len
> pstr
->valid_len
+ dfa
->mb_cur_max
)
114 ret
= re_string_realloc_buffers (pstr
, pstr
->bufs_len
* 2);
115 if (BE (ret
!= REG_NOERROR
, 0))
120 #endif /* RE_ENABLE_I18N */
121 build_upper_buffer (pstr
);
125 #ifdef RE_ENABLE_I18N
126 if (dfa
->mb_cur_max
> 1)
127 build_wcs_buffer (pstr
);
129 #endif /* RE_ENABLE_I18N */
132 re_string_translate_buffer (pstr
);
135 pstr
->valid_len
= pstr
->bufs_len
;
136 pstr
->valid_raw_len
= pstr
->bufs_len
;
144 /* Helper functions for re_string_allocate, and re_string_construct. */
147 re_string_realloc_buffers (pstr
, new_buf_len
)
151 #ifdef RE_ENABLE_I18N
152 if (pstr
->mb_cur_max
> 1)
154 wint_t *new_array
= re_realloc (pstr
->wcs
, wint_t, new_buf_len
);
155 if (BE (new_array
== NULL
, 0))
157 pstr
->wcs
= new_array
;
158 if (pstr
->offsets
!= NULL
)
160 int *new_array
= re_realloc (pstr
->offsets
, int, new_buf_len
);
161 if (BE (new_array
== NULL
, 0))
163 pstr
->offsets
= new_array
;
166 #endif /* RE_ENABLE_I18N */
167 if (pstr
->mbs_allocated
)
169 unsigned char *new_array
= re_realloc (pstr
->mbs
, unsigned char,
171 if (BE (new_array
== NULL
, 0))
173 pstr
->mbs
= new_array
;
175 pstr
->bufs_len
= new_buf_len
;
181 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
)
185 RE_TRANSLATE_TYPE trans
;
189 pstr
->raw_mbs
= (const unsigned char *) str
;
192 pstr
->trans
= (unsigned RE_TRANSLATE_TYPE
) trans
;
193 pstr
->icase
= icase
? 1 : 0;
194 pstr
->mbs_allocated
= (trans
!= NULL
|| icase
);
195 pstr
->mb_cur_max
= dfa
->mb_cur_max
;
196 pstr
->is_utf8
= dfa
->is_utf8
;
197 pstr
->map_notascii
= dfa
->map_notascii
;
198 pstr
->stop
= pstr
->len
;
199 pstr
->raw_stop
= pstr
->stop
;
202 #ifdef RE_ENABLE_I18N
204 /* Build wide character buffer PSTR->WCS.
205 If the byte sequence of the string are:
206 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
207 Then wide character buffer will be:
208 <wc1> , WEOF , <wc2> , WEOF , <wc3>
209 We use WEOF for padding, they indicate that the position isn't
210 a first byte of a multibyte character.
212 Note that this function assumes PSTR->VALID_LEN elements are already
213 built and starts from PSTR->VALID_LEN. */
216 build_wcs_buffer (pstr
)
220 unsigned char buf
[pstr
->mb_cur_max
];
222 unsigned char buf
[64];
225 int byte_idx
, end_idx
, mbclen
, remain_len
;
227 /* Build the buffers from pstr->valid_len to either pstr->len or
229 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
230 for (byte_idx
= pstr
->valid_len
; byte_idx
< end_idx
;)
235 remain_len
= end_idx
- byte_idx
;
236 prev_st
= pstr
->cur_state
;
237 /* Apply the translation if we need. */
238 if (BE (pstr
->trans
!= NULL
, 0))
242 for (i
= 0; i
< pstr
->mb_cur_max
&& i
< remain_len
; ++i
)
244 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
+ i
];
245 buf
[i
] = pstr
->mbs
[byte_idx
+ i
] = pstr
->trans
[ch
];
247 p
= (const char *) buf
;
250 p
= (const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
;
251 mbclen
= mbrtowc (&wc
, p
, remain_len
, &pstr
->cur_state
);
252 if (BE (mbclen
== (size_t) -2, 0))
254 /* The buffer doesn't have enough space, finish to build. */
255 pstr
->cur_state
= prev_st
;
258 else if (BE (mbclen
== (size_t) -1 || mbclen
== 0, 0))
260 /* We treat these cases as a singlebyte character. */
262 wc
= (wchar_t) pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
263 if (BE (pstr
->trans
!= NULL
, 0))
264 wc
= pstr
->trans
[wc
];
265 pstr
->cur_state
= prev_st
;
268 /* Write wide character and padding. */
269 pstr
->wcs
[byte_idx
++] = wc
;
270 /* Write paddings. */
271 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
272 pstr
->wcs
[byte_idx
++] = WEOF
;
274 pstr
->valid_len
= byte_idx
;
275 pstr
->valid_raw_len
= byte_idx
;
278 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
279 but for REG_ICASE. */
282 build_wcs_upper_buffer (pstr
)
286 int src_idx
, byte_idx
, end_idx
, mbclen
, remain_len
;
288 unsigned char buf
[pstr
->mb_cur_max
];
290 unsigned char buf
[64];
293 byte_idx
= pstr
->valid_len
;
294 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
296 /* The following optimization assumes that ASCII characters can be
297 mapped to wide characters with a simple cast. */
298 if (! pstr
->map_notascii
&& pstr
->trans
== NULL
&& !pstr
->offsets_needed
)
300 while (byte_idx
< end_idx
)
304 if (isascii (pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
])
305 && mbsinit (&pstr
->cur_state
))
307 /* In case of a singlebyte character. */
309 = toupper (pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
]);
310 /* The next step uses the assumption that wchar_t is encoded
311 ASCII-safe: all ASCII values can be converted like this. */
312 pstr
->wcs
[byte_idx
] = (wchar_t) pstr
->mbs
[byte_idx
];
317 remain_len
= end_idx
- byte_idx
;
318 prev_st
= pstr
->cur_state
;
319 mbclen
= mbrtowc (&wc
,
320 ((const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
321 + byte_idx
), remain_len
, &pstr
->cur_state
);
322 if (BE (mbclen
> 0, 1))
330 mbcdlen
= wcrtomb (buf
, wcu
, &prev_st
);
331 if (BE (mbclen
== mbcdlen
, 1))
332 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbclen
);
340 memcpy (pstr
->mbs
+ byte_idx
,
341 pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
, mbclen
);
342 pstr
->wcs
[byte_idx
++] = wcu
;
343 /* Write paddings. */
344 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
345 pstr
->wcs
[byte_idx
++] = WEOF
;
347 else if (mbclen
== (size_t) -1 || mbclen
== 0)
349 /* It is an invalid character or '\0'. Just use the byte. */
350 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
351 pstr
->mbs
[byte_idx
] = ch
;
352 /* And also cast it to wide char. */
353 pstr
->wcs
[byte_idx
++] = (wchar_t) ch
;
354 if (BE (mbclen
== (size_t) -1, 0))
355 pstr
->cur_state
= prev_st
;
359 /* The buffer doesn't have enough space, finish to build. */
360 pstr
->cur_state
= prev_st
;
364 pstr
->valid_len
= byte_idx
;
365 pstr
->valid_raw_len
= byte_idx
;
369 for (src_idx
= pstr
->valid_raw_len
; byte_idx
< end_idx
;)
374 remain_len
= end_idx
- byte_idx
;
375 prev_st
= pstr
->cur_state
;
376 if (BE (pstr
->trans
!= NULL
, 0))
380 for (i
= 0; i
< pstr
->mb_cur_max
&& i
< remain_len
; ++i
)
382 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ src_idx
+ i
];
383 buf
[i
] = pstr
->trans
[ch
];
385 p
= (const char *) buf
;
388 p
= (const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ src_idx
;
389 mbclen
= mbrtowc (&wc
, p
, remain_len
, &pstr
->cur_state
);
390 if (BE (mbclen
> 0, 1))
398 mbcdlen
= wcrtomb ((char *) buf
, wcu
, &prev_st
);
399 if (BE (mbclen
== mbcdlen
, 1))
400 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbclen
);
405 if (byte_idx
+ mbcdlen
> pstr
->bufs_len
)
407 pstr
->cur_state
= prev_st
;
411 if (pstr
->offsets
== NULL
)
413 pstr
->offsets
= re_malloc (int, pstr
->bufs_len
);
415 if (pstr
->offsets
== NULL
)
418 if (!pstr
->offsets_needed
)
420 for (i
= 0; i
< byte_idx
; ++i
)
421 pstr
->offsets
[i
] = i
;
422 pstr
->offsets_needed
= 1;
425 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbcdlen
);
426 pstr
->wcs
[byte_idx
] = wcu
;
427 pstr
->offsets
[byte_idx
] = src_idx
;
428 for (i
= 1; i
< mbcdlen
; ++i
)
430 pstr
->offsets
[byte_idx
+ i
]
431 = src_idx
+ (i
< mbclen
? i
: mbclen
- 1);
432 pstr
->wcs
[byte_idx
+ i
] = WEOF
;
434 pstr
->len
+= mbcdlen
- mbclen
;
435 if (pstr
->raw_stop
> src_idx
)
436 pstr
->stop
+= mbcdlen
- mbclen
;
437 end_idx
= (pstr
->bufs_len
> pstr
->len
)
438 ? pstr
->len
: pstr
->bufs_len
;
445 memcpy (pstr
->mbs
+ byte_idx
, p
, mbclen
);
447 if (BE (pstr
->offsets_needed
!= 0, 0))
450 for (i
= 0; i
< mbclen
; ++i
)
451 pstr
->offsets
[byte_idx
+ i
] = src_idx
+ i
;
455 pstr
->wcs
[byte_idx
++] = wcu
;
456 /* Write paddings. */
457 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
458 pstr
->wcs
[byte_idx
++] = WEOF
;
460 else if (mbclen
== (size_t) -1 || mbclen
== 0)
462 /* It is an invalid character or '\0'. Just use the byte. */
463 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ src_idx
];
465 if (BE (pstr
->trans
!= NULL
, 0))
466 ch
= pstr
->trans
[ch
];
467 pstr
->mbs
[byte_idx
] = ch
;
469 if (BE (pstr
->offsets_needed
!= 0, 0))
470 pstr
->offsets
[byte_idx
] = src_idx
;
473 /* And also cast it to wide char. */
474 pstr
->wcs
[byte_idx
++] = (wchar_t) ch
;
475 if (BE (mbclen
== (size_t) -1, 0))
476 pstr
->cur_state
= prev_st
;
480 /* The buffer doesn't have enough space, finish to build. */
481 pstr
->cur_state
= prev_st
;
485 pstr
->valid_len
= byte_idx
;
486 pstr
->valid_raw_len
= src_idx
;
490 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
494 re_string_skip_chars (pstr
, new_raw_idx
, last_wc
)
500 int rawbuf_idx
, mbclen
;
503 /* Skip the characters which are not necessary to check. */
504 for (rawbuf_idx
= pstr
->raw_mbs_idx
+ pstr
->valid_raw_len
;
505 rawbuf_idx
< new_raw_idx
;)
508 remain_len
= pstr
->len
- rawbuf_idx
;
509 prev_st
= pstr
->cur_state
;
510 mbclen
= mbrtowc (&wc
, (const char *) pstr
->raw_mbs
+ rawbuf_idx
,
511 remain_len
, &pstr
->cur_state
);
512 if (BE (mbclen
== (size_t) -2 || mbclen
== (size_t) -1 || mbclen
== 0, 0))
514 /* We treat these cases as a singlebyte character. */
516 pstr
->cur_state
= prev_st
;
518 /* Then proceed the next character. */
519 rawbuf_idx
+= mbclen
;
521 *last_wc
= (wint_t) wc
;
524 #endif /* RE_ENABLE_I18N */
526 /* Build the buffer PSTR->MBS, and apply the translation if we need.
527 This function is used in case of REG_ICASE. */
530 build_upper_buffer (pstr
)
533 int char_idx
, end_idx
;
534 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
536 for (char_idx
= pstr
->valid_len
; char_idx
< end_idx
; ++char_idx
)
538 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ char_idx
];
539 if (BE (pstr
->trans
!= NULL
, 0))
540 ch
= pstr
->trans
[ch
];
542 pstr
->mbs
[char_idx
] = toupper (ch
);
544 pstr
->mbs
[char_idx
] = ch
;
546 pstr
->valid_len
= char_idx
;
547 pstr
->valid_raw_len
= char_idx
;
550 /* Apply TRANS to the buffer in PSTR. */
553 re_string_translate_buffer (pstr
)
556 int buf_idx
, end_idx
;
557 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
559 for (buf_idx
= pstr
->valid_len
; buf_idx
< end_idx
; ++buf_idx
)
561 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ buf_idx
];
562 pstr
->mbs
[buf_idx
] = pstr
->trans
[ch
];
565 pstr
->valid_len
= buf_idx
;
566 pstr
->valid_raw_len
= buf_idx
;
569 /* This function re-construct the buffers.
570 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
571 convert to upper case in case of REG_ICASE, apply translation. */
574 re_string_reconstruct (pstr
, idx
, eflags
)
578 int offset
= idx
- pstr
->raw_mbs_idx
;
579 if (BE (offset
< 0, 0))
582 #ifdef RE_ENABLE_I18N
583 if (pstr
->mb_cur_max
> 1)
584 memset (&pstr
->cur_state
, '\0', sizeof (mbstate_t));
585 #endif /* RE_ENABLE_I18N */
586 pstr
->len
= pstr
->raw_len
;
587 pstr
->stop
= pstr
->raw_stop
;
589 pstr
->raw_mbs_idx
= 0;
590 pstr
->valid_raw_len
= 0;
591 pstr
->offsets_needed
= 0;
592 pstr
->tip_context
= ((eflags
& REG_NOTBOL
) ? CONTEXT_BEGBUF
593 : CONTEXT_NEWLINE
| CONTEXT_BEGBUF
);
594 if (!pstr
->mbs_allocated
)
595 pstr
->mbs
= (unsigned char *) pstr
->raw_mbs
;
599 if (BE (offset
!= 0, 1))
601 /* Are the characters which are already checked remain? */
602 if (BE (offset
< pstr
->valid_raw_len
, 1)
603 #ifdef RE_ENABLE_I18N
604 /* Handling this would enlarge the code too much.
605 Accept a slowdown in that case. */
606 && pstr
->offsets_needed
== 0
610 /* Yes, move them to the front of the buffer. */
611 pstr
->tip_context
= re_string_context_at (pstr
, offset
- 1, eflags
);
612 #ifdef RE_ENABLE_I18N
613 if (pstr
->mb_cur_max
> 1)
614 memmove (pstr
->wcs
, pstr
->wcs
+ offset
,
615 (pstr
->valid_len
- offset
) * sizeof (wint_t));
616 #endif /* RE_ENABLE_I18N */
617 if (BE (pstr
->mbs_allocated
, 0))
618 memmove (pstr
->mbs
, pstr
->mbs
+ offset
,
619 pstr
->valid_len
- offset
);
620 pstr
->valid_len
-= offset
;
621 pstr
->valid_raw_len
-= offset
;
623 assert (pstr
->valid_len
> 0);
628 /* No, skip all characters until IDX. */
629 #ifdef RE_ENABLE_I18N
630 if (BE (pstr
->offsets_needed
, 0))
632 pstr
->len
= pstr
->raw_len
- idx
+ offset
;
633 pstr
->stop
= pstr
->raw_stop
- idx
+ offset
;
634 pstr
->offsets_needed
= 0;
638 pstr
->valid_raw_len
= 0;
639 #ifdef RE_ENABLE_I18N
640 if (pstr
->mb_cur_max
> 1)
647 const unsigned char *raw
, *p
, *q
, *end
;
649 /* Special case UTF-8. Multi-byte chars start with any
650 byte other than 0x80 - 0xbf. */
651 raw
= pstr
->raw_mbs
+ pstr
->raw_mbs_idx
;
652 end
= raw
+ (offset
- pstr
->mb_cur_max
);
653 for (p
= raw
+ offset
- 1; p
>= end
; --p
)
654 if ((*p
& 0xc0) != 0x80)
658 int mlen
= raw
+ pstr
->len
- p
;
659 unsigned char buf
[6];
662 if (BE (pstr
->trans
!= NULL
, 0))
664 int i
= mlen
< 6 ? mlen
: 6;
666 buf
[i
] = pstr
->trans
[p
[i
]];
669 /* XXX Don't use mbrtowc, we know which conversion
670 to use (UTF-8 -> UCS4). */
671 memset (&cur_state
, 0, sizeof (cur_state
));
672 mlen
= mbrtowc (&wc2
, p
, mlen
, &cur_state
)
673 - (raw
+ offset
- p
);
676 memset (&pstr
->cur_state
, '\0',
678 pstr
->valid_len
= mlen
;
686 pstr
->valid_len
= re_string_skip_chars (pstr
, idx
, &wc
) - idx
;
687 if (BE (pstr
->valid_len
, 0))
689 for (wcs_idx
= 0; wcs_idx
< pstr
->valid_len
; ++wcs_idx
)
690 pstr
->wcs
[wcs_idx
] = WEOF
;
691 if (pstr
->mbs_allocated
)
692 memset (pstr
->mbs
, 255, pstr
->valid_len
);
694 pstr
->valid_raw_len
= pstr
->valid_len
;
695 pstr
->tip_context
= ((BE (pstr
->word_ops_used
!= 0, 0)
696 && IS_WIDE_WORD_CHAR (wc
))
698 : ((IS_WIDE_NEWLINE (wc
)
699 && pstr
->newline_anchor
)
700 ? CONTEXT_NEWLINE
: 0));
703 #endif /* RE_ENABLE_I18N */
705 int c
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ offset
- 1];
708 pstr
->tip_context
= (bitset_contain (pstr
->word_char
, c
)
710 : ((IS_NEWLINE (c
) && pstr
->newline_anchor
)
711 ? CONTEXT_NEWLINE
: 0));
714 if (!BE (pstr
->mbs_allocated
, 0))
717 pstr
->raw_mbs_idx
= idx
;
719 pstr
->stop
-= offset
;
721 /* Then build the buffers. */
722 #ifdef RE_ENABLE_I18N
723 if (pstr
->mb_cur_max
> 1)
727 int ret
= build_wcs_upper_buffer (pstr
);
728 if (BE (ret
!= REG_NOERROR
, 0))
732 build_wcs_buffer (pstr
);
735 #endif /* RE_ENABLE_I18N */
736 if (BE (pstr
->mbs_allocated
, 0))
739 build_upper_buffer (pstr
);
740 else if (pstr
->trans
!= NULL
)
741 re_string_translate_buffer (pstr
);
744 pstr
->valid_len
= pstr
->len
;
751 re_string_peek_byte_case (pstr
, idx
)
752 const re_string_t
*pstr
;
757 /* Handle the common (easiest) cases first. */
758 if (BE (!pstr
->mbs_allocated
, 1))
759 return re_string_peek_byte (pstr
, idx
);
761 #ifdef RE_ENABLE_I18N
762 if (pstr
->mb_cur_max
> 1
763 && ! re_string_is_single_byte_char (pstr
, pstr
->cur_idx
+ idx
))
764 return re_string_peek_byte (pstr
, idx
);
767 off
= pstr
->cur_idx
+ idx
;
768 #ifdef RE_ENABLE_I18N
769 if (pstr
->offsets_needed
)
770 off
= pstr
->offsets
[off
];
773 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ off
];
775 #ifdef RE_ENABLE_I18N
776 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
777 this function returns CAPITAL LETTER I instead of first byte of
778 DOTLESS SMALL LETTER I. The latter would confuse the parser,
779 since peek_byte_case doesn't advance cur_idx in any way. */
780 if (pstr
->offsets_needed
&& !isascii (ch
))
781 return re_string_peek_byte (pstr
, idx
);
788 re_string_fetch_byte_case (pstr
)
791 if (BE (!pstr
->mbs_allocated
, 1))
792 return re_string_fetch_byte (pstr
);
794 #ifdef RE_ENABLE_I18N
795 if (pstr
->offsets_needed
)
799 /* For tr_TR.UTF-8 [[:islower:]] there is
800 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
801 in that case the whole multi-byte character and return
802 the original letter. On the other side, with
803 [[: DOTLESS SMALL LETTER I return [[:I, as doing
804 anything else would complicate things too much. */
806 if (!re_string_first_byte (pstr
, pstr
->cur_idx
))
807 return re_string_fetch_byte (pstr
);
809 off
= pstr
->offsets
[pstr
->cur_idx
];
810 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ off
];
813 return re_string_fetch_byte (pstr
);
815 re_string_skip_bytes (pstr
,
816 re_string_char_size_at (pstr
, pstr
->cur_idx
));
821 return pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ pstr
->cur_idx
++];
825 re_string_destruct (pstr
)
828 #ifdef RE_ENABLE_I18N
830 re_free (pstr
->offsets
);
831 #endif /* RE_ENABLE_I18N */
832 if (pstr
->mbs_allocated
)
836 /* Return the context at IDX in INPUT. */
839 re_string_context_at (input
, idx
, eflags
)
840 const re_string_t
*input
;
845 /* In this case, we use the value stored in input->tip_context,
846 since we can't know the character in input->mbs[-1] here. */
847 return input
->tip_context
;
848 if (BE (idx
== input
->len
, 0))
849 return ((eflags
& REG_NOTEOL
) ? CONTEXT_ENDBUF
850 : CONTEXT_NEWLINE
| CONTEXT_ENDBUF
);
851 #ifdef RE_ENABLE_I18N
852 if (input
->mb_cur_max
> 1)
856 while(input
->wcs
[wc_idx
] == WEOF
)
859 /* It must not happen. */
860 assert (wc_idx
>= 0);
864 return input
->tip_context
;
866 wc
= input
->wcs
[wc_idx
];
867 if (BE (input
->word_ops_used
!= 0, 0) && IS_WIDE_WORD_CHAR (wc
))
869 return (IS_WIDE_NEWLINE (wc
) && input
->newline_anchor
870 ? CONTEXT_NEWLINE
: 0);
875 c
= re_string_byte_at (input
, idx
);
876 if (bitset_contain (input
->word_char
, c
))
878 return IS_NEWLINE (c
) && input
->newline_anchor
? CONTEXT_NEWLINE
: 0;
882 /* Functions for set operation. */
885 re_node_set_alloc (set
, size
)
891 set
->elems
= re_malloc (int, size
);
892 if (BE (set
->elems
== NULL
, 0))
898 re_node_set_init_1 (set
, elem
)
904 set
->elems
= re_malloc (int, 1);
905 if (BE (set
->elems
== NULL
, 0))
907 set
->alloc
= set
->nelem
= 0;
910 set
->elems
[0] = elem
;
915 re_node_set_init_2 (set
, elem1
, elem2
)
920 set
->elems
= re_malloc (int, 2);
921 if (BE (set
->elems
== NULL
, 0))
926 set
->elems
[0] = elem1
;
933 set
->elems
[0] = elem1
;
934 set
->elems
[1] = elem2
;
938 set
->elems
[0] = elem2
;
939 set
->elems
[1] = elem1
;
946 re_node_set_init_copy (dest
, src
)
948 const re_node_set
*src
;
950 dest
->nelem
= src
->nelem
;
953 dest
->alloc
= dest
->nelem
;
954 dest
->elems
= re_malloc (int, dest
->alloc
);
955 if (BE (dest
->elems
== NULL
, 0))
957 dest
->alloc
= dest
->nelem
= 0;
960 memcpy (dest
->elems
, src
->elems
, src
->nelem
* sizeof (int));
963 re_node_set_init_empty (dest
);
967 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
968 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
969 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
972 re_node_set_add_intersect (dest
, src1
, src2
)
974 const re_node_set
*src1
, *src2
;
976 int i1
, i2
, is
, id
, delta
, sbase
;
977 if (src1
->nelem
== 0 || src2
->nelem
== 0)
980 /* We need dest->nelem + 2 * elems_in_intersection; this is a
981 conservative estimate. */
982 if (src1
->nelem
+ src2
->nelem
+ dest
->nelem
> dest
->alloc
)
984 int new_alloc
= src1
->nelem
+ src2
->nelem
+ dest
->alloc
;
985 int *new_elems
= re_realloc (dest
->elems
, int, new_alloc
);
986 if (BE (new_elems
== NULL
, 0))
988 dest
->elems
= new_elems
;
989 dest
->alloc
= new_alloc
;
992 /* Find the items in the intersection of SRC1 and SRC2, and copy
993 into the top of DEST those that are not already in DEST itself. */
994 sbase
= dest
->nelem
+ src1
->nelem
+ src2
->nelem
;
995 i1
= src1
->nelem
- 1;
996 i2
= src2
->nelem
- 1;
997 id
= dest
->nelem
- 1;
1000 if (src1
->elems
[i1
] == src2
->elems
[i2
])
1002 /* Try to find the item in DEST. Maybe we could binary search? */
1003 while (id
>= 0 && dest
->elems
[id
] > src1
->elems
[i1
])
1006 if (id
< 0 || dest
->elems
[id
] != src1
->elems
[i1
])
1007 dest
->elems
[--sbase
] = src1
->elems
[i1
];
1009 if (--i1
< 0 || --i2
< 0)
1013 /* Lower the highest of the two items. */
1014 else if (src1
->elems
[i1
] < src2
->elems
[i2
])
1026 id
= dest
->nelem
- 1;
1027 is
= dest
->nelem
+ src1
->nelem
+ src2
->nelem
- 1;
1028 delta
= is
- sbase
+ 1;
1030 /* Now copy. When DELTA becomes zero, the remaining
1031 DEST elements are already in place; this is more or
1032 less the same loop that is in re_node_set_merge. */
1033 dest
->nelem
+= delta
;
1034 if (delta
> 0 && id
>= 0)
1037 if (dest
->elems
[is
] > dest
->elems
[id
])
1039 /* Copy from the top. */
1040 dest
->elems
[id
+ delta
--] = dest
->elems
[is
--];
1046 /* Slide from the bottom. */
1047 dest
->elems
[id
+ delta
] = dest
->elems
[id
];
1053 /* Copy remaining SRC elements. */
1054 memcpy (dest
->elems
, dest
->elems
+ sbase
, delta
* sizeof (int));
1059 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1060 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1062 static reg_errcode_t
1063 re_node_set_init_union (dest
, src1
, src2
)
1065 const re_node_set
*src1
, *src2
;
1068 if (src1
!= NULL
&& src1
->nelem
> 0 && src2
!= NULL
&& src2
->nelem
> 0)
1070 dest
->alloc
= src1
->nelem
+ src2
->nelem
;
1071 dest
->elems
= re_malloc (int, dest
->alloc
);
1072 if (BE (dest
->elems
== NULL
, 0))
1077 if (src1
!= NULL
&& src1
->nelem
> 0)
1078 return re_node_set_init_copy (dest
, src1
);
1079 else if (src2
!= NULL
&& src2
->nelem
> 0)
1080 return re_node_set_init_copy (dest
, src2
);
1082 re_node_set_init_empty (dest
);
1085 for (i1
= i2
= id
= 0 ; i1
< src1
->nelem
&& i2
< src2
->nelem
;)
1087 if (src1
->elems
[i1
] > src2
->elems
[i2
])
1089 dest
->elems
[id
++] = src2
->elems
[i2
++];
1092 if (src1
->elems
[i1
] == src2
->elems
[i2
])
1094 dest
->elems
[id
++] = src1
->elems
[i1
++];
1096 if (i1
< src1
->nelem
)
1098 memcpy (dest
->elems
+ id
, src1
->elems
+ i1
,
1099 (src1
->nelem
- i1
) * sizeof (int));
1100 id
+= src1
->nelem
- i1
;
1102 else if (i2
< src2
->nelem
)
1104 memcpy (dest
->elems
+ id
, src2
->elems
+ i2
,
1105 (src2
->nelem
- i2
) * sizeof (int));
1106 id
+= src2
->nelem
- i2
;
1112 /* Calculate the union set of the sets DEST and SRC. And store it to
1113 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1115 static reg_errcode_t
1116 re_node_set_merge (dest
, src
)
1118 const re_node_set
*src
;
1120 int is
, id
, sbase
, delta
;
1121 if (src
== NULL
|| src
->nelem
== 0)
1123 if (dest
->alloc
< 2 * src
->nelem
+ dest
->nelem
)
1125 int new_alloc
= 2 * (src
->nelem
+ dest
->alloc
);
1126 int *new_buffer
= re_realloc (dest
->elems
, int, new_alloc
);
1127 if (BE (new_buffer
== NULL
, 0))
1129 dest
->elems
= new_buffer
;
1130 dest
->alloc
= new_alloc
;
1133 if (BE (dest
->nelem
== 0, 0))
1135 dest
->nelem
= src
->nelem
;
1136 memcpy (dest
->elems
, src
->elems
, src
->nelem
* sizeof (int));
1140 /* Copy into the top of DEST the items of SRC that are not
1141 found in DEST. Maybe we could binary search in DEST? */
1142 for (sbase
= dest
->nelem
+ 2 * src
->nelem
,
1143 is
= src
->nelem
- 1, id
= dest
->nelem
- 1; is
>= 0 && id
>= 0; )
1145 if (dest
->elems
[id
] == src
->elems
[is
])
1147 else if (dest
->elems
[id
] < src
->elems
[is
])
1148 dest
->elems
[--sbase
] = src
->elems
[is
--];
1149 else /* if (dest->elems[id] > src->elems[is]) */
1155 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1157 memcpy (dest
->elems
+ sbase
, src
->elems
, (is
+ 1) * sizeof (int));
1160 id
= dest
->nelem
- 1;
1161 is
= dest
->nelem
+ 2 * src
->nelem
- 1;
1162 delta
= is
- sbase
+ 1;
1166 /* Now copy. When DELTA becomes zero, the remaining
1167 DEST elements are already in place. */
1168 dest
->nelem
+= delta
;
1171 if (dest
->elems
[is
] > dest
->elems
[id
])
1173 /* Copy from the top. */
1174 dest
->elems
[id
+ delta
--] = dest
->elems
[is
--];
1180 /* Slide from the bottom. */
1181 dest
->elems
[id
+ delta
] = dest
->elems
[id
];
1184 /* Copy remaining SRC elements. */
1185 memcpy (dest
->elems
, dest
->elems
+ sbase
,
1186 delta
* sizeof (int));
1195 /* Insert the new element ELEM to the re_node_set* SET.
1196 SET should not already have ELEM.
1197 return -1 if an error is occured, return 1 otherwise. */
1200 re_node_set_insert (set
, elem
)
1205 /* In case the set is empty. */
1206 if (set
->alloc
== 0)
1208 if (BE (re_node_set_init_1 (set
, elem
) == REG_NOERROR
, 1))
1214 if (BE (set
->nelem
, 0) == 0)
1216 /* We already guaranteed above that set->alloc != 0. */
1217 set
->elems
[0] = elem
;
1222 /* Realloc if we need. */
1223 if (set
->alloc
== set
->nelem
)
1226 set
->alloc
= set
->alloc
* 2;
1227 new_array
= re_realloc (set
->elems
, int, set
->alloc
);
1228 if (BE (new_array
== NULL
, 0))
1230 set
->elems
= new_array
;
1233 /* Move the elements which follows the new element. Test the
1234 first element separately to skip a check in the inner loop. */
1235 if (elem
< set
->elems
[0])
1238 for (idx
= set
->nelem
; idx
> 0; idx
--)
1239 set
->elems
[idx
] = set
->elems
[idx
- 1];
1243 for (idx
= set
->nelem
; set
->elems
[idx
- 1] > elem
; idx
--)
1244 set
->elems
[idx
] = set
->elems
[idx
- 1];
1247 /* Insert the new element. */
1248 set
->elems
[idx
] = elem
;
1253 /* Compare two node sets SET1 and SET2.
1254 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
1257 re_node_set_compare (set1
, set2
)
1258 const re_node_set
*set1
, *set2
;
1261 if (set1
== NULL
|| set2
== NULL
|| set1
->nelem
!= set2
->nelem
)
1263 for (i
= set1
->nelem
; --i
>= 0 ; )
1264 if (set1
->elems
[i
] != set2
->elems
[i
])
1269 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1272 re_node_set_contains (set
, elem
)
1273 const re_node_set
*set
;
1276 int idx
, right
, mid
;
1277 if (set
->nelem
<= 0)
1280 /* Binary search the element. */
1282 right
= set
->nelem
- 1;
1285 mid
= (idx
+ right
) / 2;
1286 if (set
->elems
[mid
] < elem
)
1291 return set
->elems
[idx
] == elem
? idx
+ 1 : 0;
1295 re_node_set_remove_at (set
, idx
)
1299 if (idx
< 0 || idx
>= set
->nelem
)
1302 for (; idx
< set
->nelem
; idx
++)
1303 set
->elems
[idx
] = set
->elems
[idx
+ 1];
1307 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1308 Or return -1, if an error will be occured. */
1311 re_dfa_add_node (dfa
, token
, mode
)
1316 if (BE (dfa
->nodes_len
>= dfa
->nodes_alloc
, 0))
1318 int new_nodes_alloc
= dfa
->nodes_alloc
* 2;
1319 re_token_t
*new_array
= re_realloc (dfa
->nodes
, re_token_t
,
1321 if (BE (new_array
== NULL
, 0))
1323 dfa
->nodes
= new_array
;
1326 int *new_nexts
, *new_indices
;
1327 re_node_set
*new_edests
, *new_eclosures
, *new_inveclosures
;
1329 new_nexts
= re_realloc (dfa
->nexts
, int, new_nodes_alloc
);
1330 new_indices
= re_realloc (dfa
->org_indices
, int, new_nodes_alloc
);
1331 new_edests
= re_realloc (dfa
->edests
, re_node_set
, new_nodes_alloc
);
1332 new_eclosures
= re_realloc (dfa
->eclosures
, re_node_set
,
1334 new_inveclosures
= re_realloc (dfa
->inveclosures
, re_node_set
,
1336 if (BE (new_nexts
== NULL
|| new_indices
== NULL
1337 || new_edests
== NULL
|| new_eclosures
== NULL
1338 || new_inveclosures
== NULL
, 0))
1340 dfa
->nexts
= new_nexts
;
1341 dfa
->org_indices
= new_indices
;
1342 dfa
->edests
= new_edests
;
1343 dfa
->eclosures
= new_eclosures
;
1344 dfa
->inveclosures
= new_inveclosures
;
1346 dfa
->nodes_alloc
= new_nodes_alloc
;
1348 dfa
->nodes
[dfa
->nodes_len
] = token
;
1349 dfa
->nodes
[dfa
->nodes_len
].opt_subexp
= 0;
1350 dfa
->nodes
[dfa
->nodes_len
].duplicated
= 0;
1351 dfa
->nodes
[dfa
->nodes_len
].constraint
= 0;
1352 return dfa
->nodes_len
++;
1355 static unsigned int inline
1356 calc_state_hash (nodes
, context
)
1357 const re_node_set
*nodes
;
1358 unsigned int context
;
1360 unsigned int hash
= nodes
->nelem
+ context
;
1362 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1363 hash
+= nodes
->elems
[i
];
1367 /* Search for the state whose node_set is equivalent to NODES.
1368 Return the pointer to the state, if we found it in the DFA.
1369 Otherwise create the new one and return it. In case of an error
1370 return NULL and set the error code in ERR.
1371 Note: - We assume NULL as the invalid state, then it is possible that
1372 return value is NULL and ERR is REG_NOERROR.
1373 - We never return non-NULL value in case of any errors, it is for
1376 static re_dfastate_t
*
1377 re_acquire_state (err
, dfa
, nodes
)
1380 const re_node_set
*nodes
;
1383 re_dfastate_t
*new_state
;
1384 struct re_state_table_entry
*spot
;
1386 if (BE (nodes
->nelem
== 0, 0))
1391 hash
= calc_state_hash (nodes
, 0);
1392 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1394 for (i
= 0 ; i
< spot
->num
; i
++)
1396 re_dfastate_t
*state
= spot
->array
[i
];
1397 if (hash
!= state
->hash
)
1399 if (re_node_set_compare (&state
->nodes
, nodes
))
1403 /* There are no appropriate state in the dfa, create the new one. */
1404 new_state
= create_ci_newstate (dfa
, nodes
, hash
);
1405 if (BE (new_state
!= NULL
, 1))
1414 /* Search for the state whose node_set is equivalent to NODES and
1415 whose context is equivalent to CONTEXT.
1416 Return the pointer to the state, if we found it in the DFA.
1417 Otherwise create the new one and return it. In case of an error
1418 return NULL and set the error code in ERR.
1419 Note: - We assume NULL as the invalid state, then it is possible that
1420 return value is NULL and ERR is REG_NOERROR.
1421 - We never return non-NULL value in case of any errors, it is for
1424 static re_dfastate_t
*
1425 re_acquire_state_context (err
, dfa
, nodes
, context
)
1428 const re_node_set
*nodes
;
1429 unsigned int context
;
1432 re_dfastate_t
*new_state
;
1433 struct re_state_table_entry
*spot
;
1435 if (nodes
->nelem
== 0)
1440 hash
= calc_state_hash (nodes
, context
);
1441 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1443 for (i
= 0 ; i
< spot
->num
; i
++)
1445 re_dfastate_t
*state
= spot
->array
[i
];
1446 if (state
->hash
== hash
1447 && state
->context
== context
1448 && re_node_set_compare (state
->entrance_nodes
, nodes
))
1451 /* There are no appropriate state in `dfa', create the new one. */
1452 new_state
= create_cd_newstate (dfa
, nodes
, context
, hash
);
1453 if (BE (new_state
!= NULL
, 1))
1462 /* Allocate memory for DFA state and initialize common properties.
1463 Return the new state if succeeded, otherwise return NULL. */
1465 static re_dfastate_t
*
1466 create_newstate_common (dfa
, nodes
, hash
)
1468 const re_node_set
*nodes
;
1471 re_dfastate_t
*newstate
;
1473 newstate
= (re_dfastate_t
*) calloc (sizeof (re_dfastate_t
), 1);
1474 if (BE (newstate
== NULL
, 0))
1476 err
= re_node_set_init_copy (&newstate
->nodes
, nodes
);
1477 if (BE (err
!= REG_NOERROR
, 0))
1482 newstate
->trtable
= NULL
;
1483 newstate
->hash
= hash
;
1487 /* Store the new state NEWSTATE whose hash value is HASH in appropriate
1488 position. Return value indicate the error code if failed. */
1490 static reg_errcode_t
1491 register_state (dfa
, newstate
, hash
)
1493 re_dfastate_t
*newstate
;
1496 struct re_state_table_entry
*spot
;
1497 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1499 if (BE (spot
->alloc
<= spot
->num
, 0))
1501 int new_alloc
= 2 * spot
->num
+ 2;
1502 re_dfastate_t
**new_array
= re_realloc (spot
->array
, re_dfastate_t
*,
1504 if (BE (new_array
== NULL
, 0))
1506 spot
->array
= new_array
;
1507 spot
->alloc
= new_alloc
;
1509 spot
->array
[spot
->num
++] = newstate
;
1513 /* Create the new state which is independ of contexts.
1514 Return the new state if succeeded, otherwise return NULL. */
1516 static re_dfastate_t
*
1517 create_ci_newstate (dfa
, nodes
, hash
)
1519 const re_node_set
*nodes
;
1524 re_dfastate_t
*newstate
;
1525 newstate
= create_newstate_common (dfa
, nodes
, hash
);
1526 if (BE (newstate
== NULL
, 0))
1528 newstate
->entrance_nodes
= &newstate
->nodes
;
1530 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1532 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1533 re_token_type_t type
= node
->type
;
1534 if (type
== CHARACTER
&& !node
->constraint
)
1537 /* If the state has the halt node, the state is a halt state. */
1538 else if (type
== END_OF_RE
)
1540 #ifdef RE_ENABLE_I18N
1541 else if (type
== COMPLEX_BRACKET
1542 || type
== OP_UTF8_PERIOD
1543 || (type
== OP_PERIOD
&& dfa
->mb_cur_max
> 1))
1544 newstate
->accept_mb
= 1;
1545 #endif /* RE_ENABLE_I18N */
1546 else if (type
== OP_BACK_REF
)
1547 newstate
->has_backref
= 1;
1548 else if (type
== ANCHOR
|| node
->constraint
)
1549 newstate
->has_constraint
= 1;
1551 err
= register_state (dfa
, newstate
, hash
);
1552 if (BE (err
!= REG_NOERROR
, 0))
1554 free_state (newstate
);
1560 /* Create the new state which is depend on the context CONTEXT.
1561 Return the new state if succeeded, otherwise return NULL. */
1563 static re_dfastate_t
*
1564 create_cd_newstate (dfa
, nodes
, context
, hash
)
1566 const re_node_set
*nodes
;
1567 unsigned int context
, hash
;
1569 int i
, nctx_nodes
= 0;
1571 re_dfastate_t
*newstate
;
1573 newstate
= create_newstate_common (dfa
, nodes
, hash
);
1574 if (BE (newstate
== NULL
, 0))
1576 newstate
->context
= context
;
1577 newstate
->entrance_nodes
= &newstate
->nodes
;
1579 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1581 unsigned int constraint
= 0;
1582 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1583 re_token_type_t type
= node
->type
;
1584 if (node
->constraint
)
1585 constraint
= node
->constraint
;
1587 if (type
== CHARACTER
&& !constraint
)
1589 /* If the state has the halt node, the state is a halt state. */
1590 else if (type
== END_OF_RE
)
1592 #ifdef RE_ENABLE_I18N
1593 else if (type
== COMPLEX_BRACKET
1594 || type
== OP_UTF8_PERIOD
1595 || (type
== OP_PERIOD
&& dfa
->mb_cur_max
> 1))
1596 newstate
->accept_mb
= 1;
1597 #endif /* RE_ENABLE_I18N */
1598 else if (type
== OP_BACK_REF
)
1599 newstate
->has_backref
= 1;
1600 else if (type
== ANCHOR
)
1601 constraint
= node
->opr
.ctx_type
;
1605 if (newstate
->entrance_nodes
== &newstate
->nodes
)
1607 newstate
->entrance_nodes
= re_malloc (re_node_set
, 1);
1608 if (BE (newstate
->entrance_nodes
== NULL
, 0))
1610 free_state (newstate
);
1613 re_node_set_init_copy (newstate
->entrance_nodes
, nodes
);
1615 newstate
->has_constraint
= 1;
1618 if (NOT_SATISFY_PREV_CONSTRAINT (constraint
,context
))
1620 re_node_set_remove_at (&newstate
->nodes
, i
- nctx_nodes
);
1625 err
= register_state (dfa
, newstate
, hash
);
1626 if (BE (err
!= REG_NOERROR
, 0))
1628 free_state (newstate
);
1636 re_dfastate_t
*state
;
1638 if (state
->entrance_nodes
!= &state
->nodes
)
1640 re_node_set_free (state
->entrance_nodes
);
1641 re_free (state
->entrance_nodes
);
1643 re_node_set_free (&state
->nodes
);
1644 re_free (state
->trtable
);