1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 static void re_string_construct_common (const char *str
, int len
,
23 RE_TRANSLATE_TYPE trans
, int icase
,
24 const re_dfa_t
*dfa
) internal_function
;
25 static re_dfastate_t
*create_ci_newstate (const re_dfa_t
*dfa
,
26 const re_node_set
*nodes
,
27 unsigned int hash
) internal_function
;
28 static re_dfastate_t
*create_cd_newstate (const re_dfa_t
*dfa
,
29 const re_node_set
*nodes
,
31 unsigned int hash
) internal_function
;
33 /* Functions for string operation. */
35 /* This function allocate the buffers. It is necessary to call
36 re_string_reconstruct before using the object. */
40 re_string_allocate (re_string_t
*pstr
, const char *str
, int len
, int init_len
,
41 RE_TRANSLATE_TYPE trans
, int icase
, const re_dfa_t
*dfa
)
46 /* Ensure at least one character fits into the buffers. */
47 if (init_len
< dfa
->mb_cur_max
)
48 init_len
= dfa
->mb_cur_max
;
49 init_buf_len
= (len
+ 1 < init_len
) ? len
+ 1: init_len
;
50 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
);
52 ret
= re_string_realloc_buffers (pstr
, init_buf_len
);
53 if (BE (ret
!= REG_NOERROR
, 0))
56 pstr
->word_char
= dfa
->word_char
;
57 pstr
->word_ops_used
= dfa
->word_ops_used
;
58 pstr
->mbs
= pstr
->mbs_allocated
? pstr
->mbs
: (unsigned char *) str
;
59 pstr
->valid_len
= (pstr
->mbs_allocated
|| dfa
->mb_cur_max
> 1) ? 0 : len
;
60 pstr
->valid_raw_len
= pstr
->valid_len
;
64 /* This function allocate the buffers, and initialize them. */
68 re_string_construct (re_string_t
*pstr
, const char *str
, int len
,
69 RE_TRANSLATE_TYPE trans
, int icase
, const re_dfa_t
*dfa
)
72 memset (pstr
, '\0', sizeof (re_string_t
));
73 re_string_construct_common (str
, len
, pstr
, trans
, icase
, dfa
);
77 ret
= re_string_realloc_buffers (pstr
, len
+ 1);
78 if (BE (ret
!= REG_NOERROR
, 0))
81 pstr
->mbs
= pstr
->mbs_allocated
? pstr
->mbs
: (unsigned char *) str
;
86 if (dfa
->mb_cur_max
> 1)
90 ret
= build_wcs_upper_buffer (pstr
);
91 if (BE (ret
!= REG_NOERROR
, 0))
93 if (pstr
->valid_raw_len
>= len
)
95 if (pstr
->bufs_len
> pstr
->valid_len
+ dfa
->mb_cur_max
)
97 ret
= re_string_realloc_buffers (pstr
, pstr
->bufs_len
* 2);
98 if (BE (ret
!= REG_NOERROR
, 0))
103 #endif /* RE_ENABLE_I18N */
104 build_upper_buffer (pstr
);
108 #ifdef RE_ENABLE_I18N
109 if (dfa
->mb_cur_max
> 1)
110 build_wcs_buffer (pstr
);
112 #endif /* RE_ENABLE_I18N */
115 re_string_translate_buffer (pstr
);
118 pstr
->valid_len
= pstr
->bufs_len
;
119 pstr
->valid_raw_len
= pstr
->bufs_len
;
127 /* Helper functions for re_string_allocate, and re_string_construct. */
131 re_string_realloc_buffers (re_string_t
*pstr
, int new_buf_len
)
133 #ifdef RE_ENABLE_I18N
134 if (pstr
->mb_cur_max
> 1)
136 wint_t *new_wcs
= re_realloc (pstr
->wcs
, wint_t, new_buf_len
);
137 if (BE (new_wcs
== NULL
, 0))
140 if (pstr
->offsets
!= NULL
)
142 int *new_offsets
= re_realloc (pstr
->offsets
, int, new_buf_len
);
143 if (BE (new_offsets
== NULL
, 0))
145 pstr
->offsets
= new_offsets
;
148 #endif /* RE_ENABLE_I18N */
149 if (pstr
->mbs_allocated
)
151 unsigned char *new_mbs
= re_realloc (pstr
->mbs
, unsigned char,
153 if (BE (new_mbs
== NULL
, 0))
157 pstr
->bufs_len
= new_buf_len
;
164 re_string_construct_common (const char *str
, int len
, re_string_t
*pstr
,
165 RE_TRANSLATE_TYPE trans
, int icase
,
168 pstr
->raw_mbs
= (const unsigned char *) str
;
172 pstr
->icase
= icase
? 1 : 0;
173 pstr
->mbs_allocated
= (trans
!= NULL
|| icase
);
174 pstr
->mb_cur_max
= dfa
->mb_cur_max
;
175 pstr
->is_utf8
= dfa
->is_utf8
;
176 pstr
->map_notascii
= dfa
->map_notascii
;
177 pstr
->stop
= pstr
->len
;
178 pstr
->raw_stop
= pstr
->stop
;
181 #ifdef RE_ENABLE_I18N
183 /* Build wide character buffer PSTR->WCS.
184 If the byte sequence of the string are:
185 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
186 Then wide character buffer will be:
187 <wc1> , WEOF , <wc2> , WEOF , <wc3>
188 We use WEOF for padding, they indicate that the position isn't
189 a first byte of a multibyte character.
191 Note that this function assumes PSTR->VALID_LEN elements are already
192 built and starts from PSTR->VALID_LEN. */
196 build_wcs_buffer (re_string_t
*pstr
)
199 unsigned char buf
[MB_LEN_MAX
];
200 assert (MB_LEN_MAX
>= pstr
->mb_cur_max
);
202 unsigned char buf
[64];
205 int byte_idx
, end_idx
, remain_len
;
208 /* Build the buffers from pstr->valid_len to either pstr->len or
210 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
211 for (byte_idx
= pstr
->valid_len
; byte_idx
< end_idx
;)
216 remain_len
= end_idx
- byte_idx
;
217 prev_st
= pstr
->cur_state
;
218 /* Apply the translation if we need. */
219 if (BE (pstr
->trans
!= NULL
, 0))
223 for (i
= 0; i
< pstr
->mb_cur_max
&& i
< remain_len
; ++i
)
225 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
+ i
];
226 buf
[i
] = pstr
->mbs
[byte_idx
+ i
] = pstr
->trans
[ch
];
228 p
= (const char *) buf
;
231 p
= (const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
;
232 mbclen
= mbrtowc (&wc
, p
, remain_len
, &pstr
->cur_state
);
233 if (BE (mbclen
== (size_t) -2, 0))
235 /* The buffer doesn't have enough space, finish to build. */
236 pstr
->cur_state
= prev_st
;
239 else if (BE (mbclen
== (size_t) -1 || mbclen
== 0, 0))
241 /* We treat these cases as a singlebyte character. */
243 wc
= (wchar_t) pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
244 if (BE (pstr
->trans
!= NULL
, 0))
245 wc
= pstr
->trans
[wc
];
246 pstr
->cur_state
= prev_st
;
249 /* Write wide character and padding. */
250 pstr
->wcs
[byte_idx
++] = wc
;
251 /* Write paddings. */
252 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
253 pstr
->wcs
[byte_idx
++] = WEOF
;
255 pstr
->valid_len
= byte_idx
;
256 pstr
->valid_raw_len
= byte_idx
;
259 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
260 but for REG_ICASE. */
264 build_wcs_upper_buffer (re_string_t
*pstr
)
267 int src_idx
, byte_idx
, end_idx
, remain_len
;
270 char buf
[MB_LEN_MAX
];
271 assert (MB_LEN_MAX
>= pstr
->mb_cur_max
);
276 byte_idx
= pstr
->valid_len
;
277 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
279 /* The following optimization assumes that ASCII characters can be
280 mapped to wide characters with a simple cast. */
281 if (! pstr
->map_notascii
&& pstr
->trans
== NULL
&& !pstr
->offsets_needed
)
283 while (byte_idx
< end_idx
)
287 if (isascii (pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
])
288 && mbsinit (&pstr
->cur_state
))
290 /* In case of a singlebyte character. */
292 = toupper (pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
]);
293 /* The next step uses the assumption that wchar_t is encoded
294 ASCII-safe: all ASCII values can be converted like this. */
295 pstr
->wcs
[byte_idx
] = (wchar_t) pstr
->mbs
[byte_idx
];
300 remain_len
= end_idx
- byte_idx
;
301 prev_st
= pstr
->cur_state
;
302 mbclen
= mbrtowc (&wc
,
303 ((const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
304 + byte_idx
), remain_len
, &pstr
->cur_state
);
305 if (BE (mbclen
+ 2 > 2, 1))
313 mbcdlen
= wcrtomb (buf
, wcu
, &prev_st
);
314 if (BE (mbclen
== mbcdlen
, 1))
315 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbclen
);
323 memcpy (pstr
->mbs
+ byte_idx
,
324 pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
, mbclen
);
325 pstr
->wcs
[byte_idx
++] = wcu
;
326 /* Write paddings. */
327 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
328 pstr
->wcs
[byte_idx
++] = WEOF
;
330 else if (mbclen
== (size_t) -1 || mbclen
== 0)
332 /* It is an invalid character or '\0'. Just use the byte. */
333 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
334 pstr
->mbs
[byte_idx
] = ch
;
335 /* And also cast it to wide char. */
336 pstr
->wcs
[byte_idx
++] = (wchar_t) ch
;
337 if (BE (mbclen
== (size_t) -1, 0))
338 pstr
->cur_state
= prev_st
;
342 /* The buffer doesn't have enough space, finish to build. */
343 pstr
->cur_state
= prev_st
;
347 pstr
->valid_len
= byte_idx
;
348 pstr
->valid_raw_len
= byte_idx
;
352 for (src_idx
= pstr
->valid_raw_len
; byte_idx
< end_idx
;)
357 remain_len
= end_idx
- byte_idx
;
358 prev_st
= pstr
->cur_state
;
359 if (BE (pstr
->trans
!= NULL
, 0))
363 for (i
= 0; i
< pstr
->mb_cur_max
&& i
< remain_len
; ++i
)
365 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ src_idx
+ i
];
366 buf
[i
] = pstr
->trans
[ch
];
368 p
= (const char *) buf
;
371 p
= (const char *) pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ src_idx
;
372 mbclen
= mbrtowc (&wc
, p
, remain_len
, &pstr
->cur_state
);
373 if (BE (mbclen
+ 2 > 2, 1))
381 mbcdlen
= wcrtomb ((char *) buf
, wcu
, &prev_st
);
382 if (BE (mbclen
== mbcdlen
, 1))
383 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbclen
);
384 else if (mbcdlen
!= (size_t) -1)
388 if (byte_idx
+ mbcdlen
> pstr
->bufs_len
)
390 pstr
->cur_state
= prev_st
;
394 if (pstr
->offsets
== NULL
)
396 pstr
->offsets
= re_malloc (int, pstr
->bufs_len
);
398 if (pstr
->offsets
== NULL
)
401 if (!pstr
->offsets_needed
)
403 for (i
= 0; i
< (size_t) byte_idx
; ++i
)
404 pstr
->offsets
[i
] = i
;
405 pstr
->offsets_needed
= 1;
408 memcpy (pstr
->mbs
+ byte_idx
, buf
, mbcdlen
);
409 pstr
->wcs
[byte_idx
] = wcu
;
410 pstr
->offsets
[byte_idx
] = src_idx
;
411 for (i
= 1; i
< mbcdlen
; ++i
)
413 pstr
->offsets
[byte_idx
+ i
]
414 = src_idx
+ (i
< mbclen
? i
: mbclen
- 1);
415 pstr
->wcs
[byte_idx
+ i
] = WEOF
;
417 pstr
->len
+= mbcdlen
- mbclen
;
418 if (pstr
->raw_stop
> src_idx
)
419 pstr
->stop
+= mbcdlen
- mbclen
;
420 end_idx
= (pstr
->bufs_len
> pstr
->len
)
421 ? pstr
->len
: pstr
->bufs_len
;
427 memcpy (pstr
->mbs
+ byte_idx
, p
, mbclen
);
430 memcpy (pstr
->mbs
+ byte_idx
, p
, mbclen
);
432 if (BE (pstr
->offsets_needed
!= 0, 0))
435 for (i
= 0; i
< mbclen
; ++i
)
436 pstr
->offsets
[byte_idx
+ i
] = src_idx
+ i
;
440 pstr
->wcs
[byte_idx
++] = wcu
;
441 /* Write paddings. */
442 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
443 pstr
->wcs
[byte_idx
++] = WEOF
;
445 else if (mbclen
== (size_t) -1 || mbclen
== 0)
447 /* It is an invalid character or '\0'. Just use the byte. */
448 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ src_idx
];
450 if (BE (pstr
->trans
!= NULL
, 0))
451 ch
= pstr
->trans
[ch
];
452 pstr
->mbs
[byte_idx
] = ch
;
454 if (BE (pstr
->offsets_needed
!= 0, 0))
455 pstr
->offsets
[byte_idx
] = src_idx
;
458 /* And also cast it to wide char. */
459 pstr
->wcs
[byte_idx
++] = (wchar_t) ch
;
460 if (BE (mbclen
== (size_t) -1, 0))
461 pstr
->cur_state
= prev_st
;
465 /* The buffer doesn't have enough space, finish to build. */
466 pstr
->cur_state
= prev_st
;
470 pstr
->valid_len
= byte_idx
;
471 pstr
->valid_raw_len
= src_idx
;
475 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
480 re_string_skip_chars (re_string_t
*pstr
, int new_raw_idx
, wint_t *last_wc
)
487 /* Skip the characters which are not necessary to check. */
488 for (rawbuf_idx
= pstr
->raw_mbs_idx
+ pstr
->valid_raw_len
;
489 rawbuf_idx
< new_raw_idx
;)
492 remain_len
= pstr
->len
- rawbuf_idx
;
493 prev_st
= pstr
->cur_state
;
494 mbclen
= mbrtowc (&wc
, (const char *) pstr
->raw_mbs
+ rawbuf_idx
,
495 remain_len
, &pstr
->cur_state
);
496 if (BE (mbclen
== (size_t) -2 || mbclen
== (size_t) -1 || mbclen
== 0, 0))
498 /* We treat these cases as a singlebyte character. */
500 pstr
->cur_state
= prev_st
;
502 /* Then proceed the next character. */
503 rawbuf_idx
+= mbclen
;
505 *last_wc
= (wint_t) wc
;
508 #endif /* RE_ENABLE_I18N */
510 /* Build the buffer PSTR->MBS, and apply the translation if we need.
511 This function is used in case of REG_ICASE. */
515 build_upper_buffer (re_string_t
*pstr
)
517 int char_idx
, end_idx
;
518 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
520 for (char_idx
= pstr
->valid_len
; char_idx
< end_idx
; ++char_idx
)
522 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ char_idx
];
523 if (BE (pstr
->trans
!= NULL
, 0))
524 ch
= pstr
->trans
[ch
];
526 pstr
->mbs
[char_idx
] = toupper (ch
);
528 pstr
->mbs
[char_idx
] = ch
;
530 pstr
->valid_len
= char_idx
;
531 pstr
->valid_raw_len
= char_idx
;
534 /* Apply TRANS to the buffer in PSTR. */
538 re_string_translate_buffer (re_string_t
*pstr
)
540 int buf_idx
, end_idx
;
541 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
543 for (buf_idx
= pstr
->valid_len
; buf_idx
< end_idx
; ++buf_idx
)
545 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ buf_idx
];
546 pstr
->mbs
[buf_idx
] = pstr
->trans
[ch
];
549 pstr
->valid_len
= buf_idx
;
550 pstr
->valid_raw_len
= buf_idx
;
553 /* This function re-construct the buffers.
554 Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
555 convert to upper case in case of REG_ICASE, apply translation. */
559 re_string_reconstruct (re_string_t
*pstr
, int idx
, int eflags
)
561 int offset
= idx
- pstr
->raw_mbs_idx
;
562 if (BE (offset
< 0, 0))
565 #ifdef RE_ENABLE_I18N
566 if (pstr
->mb_cur_max
> 1)
567 memset (&pstr
->cur_state
, '\0', sizeof (mbstate_t));
568 #endif /* RE_ENABLE_I18N */
569 pstr
->len
= pstr
->raw_len
;
570 pstr
->stop
= pstr
->raw_stop
;
572 pstr
->raw_mbs_idx
= 0;
573 pstr
->valid_raw_len
= 0;
574 pstr
->offsets_needed
= 0;
575 pstr
->tip_context
= ((eflags
& REG_NOTBOL
) ? CONTEXT_BEGBUF
576 : CONTEXT_NEWLINE
| CONTEXT_BEGBUF
);
577 if (!pstr
->mbs_allocated
)
578 pstr
->mbs
= (unsigned char *) pstr
->raw_mbs
;
582 if (BE (offset
!= 0, 1))
584 /* Are the characters which are already checked remain? */
585 if (BE (offset
< pstr
->valid_raw_len
, 1)
586 #ifdef RE_ENABLE_I18N
587 /* Handling this would enlarge the code too much.
588 Accept a slowdown in that case. */
589 && pstr
->offsets_needed
== 0
593 /* Yes, move them to the front of the buffer. */
594 pstr
->tip_context
= re_string_context_at (pstr
, offset
- 1, eflags
);
595 #ifdef RE_ENABLE_I18N
596 if (pstr
->mb_cur_max
> 1)
597 memmove (pstr
->wcs
, pstr
->wcs
+ offset
,
598 (pstr
->valid_len
- offset
) * sizeof (wint_t));
599 #endif /* RE_ENABLE_I18N */
600 if (BE (pstr
->mbs_allocated
, 0))
601 memmove (pstr
->mbs
, pstr
->mbs
+ offset
,
602 pstr
->valid_len
- offset
);
603 pstr
->valid_len
-= offset
;
604 pstr
->valid_raw_len
-= offset
;
606 assert (pstr
->valid_len
> 0);
611 /* No, skip all characters until IDX. */
612 #ifdef RE_ENABLE_I18N
613 if (BE (pstr
->offsets_needed
, 0))
615 pstr
->len
= pstr
->raw_len
- idx
+ offset
;
616 pstr
->stop
= pstr
->raw_stop
- idx
+ offset
;
617 pstr
->offsets_needed
= 0;
621 pstr
->valid_raw_len
= 0;
622 #ifdef RE_ENABLE_I18N
623 if (pstr
->mb_cur_max
> 1)
630 const unsigned char *raw
, *p
, *q
, *end
;
632 /* Special case UTF-8. Multi-byte chars start with any
633 byte other than 0x80 - 0xbf. */
634 raw
= pstr
->raw_mbs
+ pstr
->raw_mbs_idx
;
635 end
= raw
+ (offset
- pstr
->mb_cur_max
);
636 p
= raw
+ offset
- 1;
638 /* We know the wchar_t encoding is UCS4, so for the simple
639 case, ASCII characters, skip the conversion step. */
640 if (isascii (*p
) && BE (pstr
->trans
== NULL
, 1))
642 memset (&pstr
->cur_state
, '\0', sizeof (mbstate_t));
648 for (; p
>= end
; --p
)
649 if ((*p
& 0xc0) != 0x80)
653 int mlen
= raw
+ pstr
->len
- p
;
654 unsigned char buf
[6];
658 if (BE (pstr
->trans
!= NULL
, 0))
660 int i
= mlen
< 6 ? mlen
: 6;
662 buf
[i
] = pstr
->trans
[p
[i
]];
665 /* XXX Don't use mbrtowc, we know which conversion
666 to use (UTF-8 -> UCS4). */
667 memset (&cur_state
, 0, sizeof (cur_state
));
668 mbclen
= mbrtowc (&wc2
, (const char *) p
, mlen
,
670 if (raw
+ offset
- p
<= mbclen
671 && mbclen
< (size_t) -2)
673 memset (&pstr
->cur_state
, '\0',
675 pstr
->valid_len
= mbclen
- (raw
+ offset
- p
);
683 pstr
->valid_len
= re_string_skip_chars (pstr
, idx
, &wc
) - idx
;
684 if (BE (pstr
->valid_len
, 0))
686 for (wcs_idx
= 0; wcs_idx
< pstr
->valid_len
; ++wcs_idx
)
687 pstr
->wcs
[wcs_idx
] = WEOF
;
688 if (pstr
->mbs_allocated
)
689 memset (pstr
->mbs
, 255, pstr
->valid_len
);
691 pstr
->valid_raw_len
= pstr
->valid_len
;
692 pstr
->tip_context
= ((BE (pstr
->word_ops_used
!= 0, 0)
693 && IS_WIDE_WORD_CHAR (wc
))
695 : ((IS_WIDE_NEWLINE (wc
)
696 && pstr
->newline_anchor
)
697 ? CONTEXT_NEWLINE
: 0));
700 #endif /* RE_ENABLE_I18N */
702 int c
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ offset
- 1];
705 pstr
->tip_context
= (bitset_contain (pstr
->word_char
, c
)
707 : ((IS_NEWLINE (c
) && pstr
->newline_anchor
)
708 ? CONTEXT_NEWLINE
: 0));
711 if (!BE (pstr
->mbs_allocated
, 0))
714 pstr
->raw_mbs_idx
= idx
;
716 pstr
->stop
-= offset
;
718 /* Then build the buffers. */
719 #ifdef RE_ENABLE_I18N
720 if (pstr
->mb_cur_max
> 1)
724 reg_errcode_t ret
= build_wcs_upper_buffer (pstr
);
725 if (BE (ret
!= REG_NOERROR
, 0))
729 build_wcs_buffer (pstr
);
732 #endif /* RE_ENABLE_I18N */
733 if (BE (pstr
->mbs_allocated
, 0))
736 build_upper_buffer (pstr
);
737 else if (pstr
->trans
!= NULL
)
738 re_string_translate_buffer (pstr
);
741 pstr
->valid_len
= pstr
->len
;
748 internal_function
__attribute ((pure
))
749 re_string_peek_byte_case (const re_string_t
*pstr
, int idx
)
753 /* Handle the common (easiest) cases first. */
754 if (BE (!pstr
->mbs_allocated
, 1))
755 return re_string_peek_byte (pstr
, idx
);
757 #ifdef RE_ENABLE_I18N
758 if (pstr
->mb_cur_max
> 1
759 && ! re_string_is_single_byte_char (pstr
, pstr
->cur_idx
+ idx
))
760 return re_string_peek_byte (pstr
, idx
);
763 off
= pstr
->cur_idx
+ idx
;
764 #ifdef RE_ENABLE_I18N
765 if (pstr
->offsets_needed
)
766 off
= pstr
->offsets
[off
];
769 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ off
];
771 #ifdef RE_ENABLE_I18N
772 /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
773 this function returns CAPITAL LETTER I instead of first byte of
774 DOTLESS SMALL LETTER I. The latter would confuse the parser,
775 since peek_byte_case doesn't advance cur_idx in any way. */
776 if (pstr
->offsets_needed
&& !isascii (ch
))
777 return re_string_peek_byte (pstr
, idx
);
784 internal_function
__attribute ((pure
))
785 re_string_fetch_byte_case (re_string_t
*pstr
)
787 if (BE (!pstr
->mbs_allocated
, 1))
788 return re_string_fetch_byte (pstr
);
790 #ifdef RE_ENABLE_I18N
791 if (pstr
->offsets_needed
)
795 /* For tr_TR.UTF-8 [[:islower:]] there is
796 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
797 in that case the whole multi-byte character and return
798 the original letter. On the other side, with
799 [[: DOTLESS SMALL LETTER I return [[:I, as doing
800 anything else would complicate things too much. */
802 if (!re_string_first_byte (pstr
, pstr
->cur_idx
))
803 return re_string_fetch_byte (pstr
);
805 off
= pstr
->offsets
[pstr
->cur_idx
];
806 ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ off
];
809 return re_string_fetch_byte (pstr
);
811 re_string_skip_bytes (pstr
,
812 re_string_char_size_at (pstr
, pstr
->cur_idx
));
817 return pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ pstr
->cur_idx
++];
822 re_string_destruct (re_string_t
*pstr
)
824 #ifdef RE_ENABLE_I18N
826 re_free (pstr
->offsets
);
827 #endif /* RE_ENABLE_I18N */
828 if (pstr
->mbs_allocated
)
832 /* Return the context at IDX in INPUT. */
836 re_string_context_at (const re_string_t
*input
, int idx
, int eflags
)
840 /* In this case, we use the value stored in input->tip_context,
841 since we can't know the character in input->mbs[-1] here. */
842 return input
->tip_context
;
843 if (BE (idx
== input
->len
, 0))
844 return ((eflags
& REG_NOTEOL
) ? CONTEXT_ENDBUF
845 : CONTEXT_NEWLINE
| CONTEXT_ENDBUF
);
846 #ifdef RE_ENABLE_I18N
847 if (input
->mb_cur_max
> 1)
851 while(input
->wcs
[wc_idx
] == WEOF
)
854 /* It must not happen. */
855 assert (wc_idx
>= 0);
859 return input
->tip_context
;
861 wc
= input
->wcs
[wc_idx
];
862 if (BE (input
->word_ops_used
!= 0, 0) && IS_WIDE_WORD_CHAR (wc
))
864 return (IS_WIDE_NEWLINE (wc
) && input
->newline_anchor
865 ? CONTEXT_NEWLINE
: 0);
870 c
= re_string_byte_at (input
, idx
);
871 if (bitset_contain (input
->word_char
, c
))
873 return IS_NEWLINE (c
) && input
->newline_anchor
? CONTEXT_NEWLINE
: 0;
877 /* Functions for set operation. */
881 re_node_set_alloc (re_node_set
*set
, int size
)
885 set
->elems
= re_malloc (int, size
);
886 if (BE (set
->elems
== NULL
, 0))
893 re_node_set_init_1 (re_node_set
*set
, int elem
)
897 set
->elems
= re_malloc (int, 1);
898 if (BE (set
->elems
== NULL
, 0))
900 set
->alloc
= set
->nelem
= 0;
903 set
->elems
[0] = elem
;
909 re_node_set_init_2 (re_node_set
*set
, int elem1
, int elem2
)
912 set
->elems
= re_malloc (int, 2);
913 if (BE (set
->elems
== NULL
, 0))
918 set
->elems
[0] = elem1
;
925 set
->elems
[0] = elem1
;
926 set
->elems
[1] = elem2
;
930 set
->elems
[0] = elem2
;
931 set
->elems
[1] = elem1
;
939 re_node_set_init_copy (re_node_set
*dest
, const re_node_set
*src
)
941 dest
->nelem
= src
->nelem
;
944 dest
->alloc
= dest
->nelem
;
945 dest
->elems
= re_malloc (int, dest
->alloc
);
946 if (BE (dest
->elems
== NULL
, 0))
948 dest
->alloc
= dest
->nelem
= 0;
951 memcpy (dest
->elems
, src
->elems
, src
->nelem
* sizeof (int));
954 re_node_set_init_empty (dest
);
958 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
959 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
960 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
964 re_node_set_add_intersect (re_node_set
*dest
, const re_node_set
*src1
,
965 const re_node_set
*src2
)
967 int i1
, i2
, is
, id
, delta
, sbase
;
968 if (src1
->nelem
== 0 || src2
->nelem
== 0)
971 /* We need dest->nelem + 2 * elems_in_intersection; this is a
972 conservative estimate. */
973 if (src1
->nelem
+ src2
->nelem
+ dest
->nelem
> dest
->alloc
)
975 int new_alloc
= src1
->nelem
+ src2
->nelem
+ dest
->alloc
;
976 int *new_elems
= re_realloc (dest
->elems
, int, new_alloc
);
977 if (BE (new_elems
== NULL
, 0))
979 dest
->elems
= new_elems
;
980 dest
->alloc
= new_alloc
;
983 /* Find the items in the intersection of SRC1 and SRC2, and copy
984 into the top of DEST those that are not already in DEST itself. */
985 sbase
= dest
->nelem
+ src1
->nelem
+ src2
->nelem
;
986 i1
= src1
->nelem
- 1;
987 i2
= src2
->nelem
- 1;
988 id
= dest
->nelem
- 1;
991 if (src1
->elems
[i1
] == src2
->elems
[i2
])
993 /* Try to find the item in DEST. Maybe we could binary search? */
994 while (id
>= 0 && dest
->elems
[id
] > src1
->elems
[i1
])
997 if (id
< 0 || dest
->elems
[id
] != src1
->elems
[i1
])
998 dest
->elems
[--sbase
] = src1
->elems
[i1
];
1000 if (--i1
< 0 || --i2
< 0)
1004 /* Lower the highest of the two items. */
1005 else if (src1
->elems
[i1
] < src2
->elems
[i2
])
1017 id
= dest
->nelem
- 1;
1018 is
= dest
->nelem
+ src1
->nelem
+ src2
->nelem
- 1;
1019 delta
= is
- sbase
+ 1;
1021 /* Now copy. When DELTA becomes zero, the remaining
1022 DEST elements are already in place; this is more or
1023 less the same loop that is in re_node_set_merge. */
1024 dest
->nelem
+= delta
;
1025 if (delta
> 0 && id
>= 0)
1028 if (dest
->elems
[is
] > dest
->elems
[id
])
1030 /* Copy from the top. */
1031 dest
->elems
[id
+ delta
--] = dest
->elems
[is
--];
1037 /* Slide from the bottom. */
1038 dest
->elems
[id
+ delta
] = dest
->elems
[id
];
1044 /* Copy remaining SRC elements. */
1045 memcpy (dest
->elems
, dest
->elems
+ sbase
, delta
* sizeof (int));
1050 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1051 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1053 static reg_errcode_t
1055 re_node_set_init_union (re_node_set
*dest
, const re_node_set
*src1
,
1056 const re_node_set
*src2
)
1059 if (src1
!= NULL
&& src1
->nelem
> 0 && src2
!= NULL
&& src2
->nelem
> 0)
1061 dest
->alloc
= src1
->nelem
+ src2
->nelem
;
1062 dest
->elems
= re_malloc (int, dest
->alloc
);
1063 if (BE (dest
->elems
== NULL
, 0))
1068 if (src1
!= NULL
&& src1
->nelem
> 0)
1069 return re_node_set_init_copy (dest
, src1
);
1070 else if (src2
!= NULL
&& src2
->nelem
> 0)
1071 return re_node_set_init_copy (dest
, src2
);
1073 re_node_set_init_empty (dest
);
1076 for (i1
= i2
= id
= 0 ; i1
< src1
->nelem
&& i2
< src2
->nelem
;)
1078 if (src1
->elems
[i1
] > src2
->elems
[i2
])
1080 dest
->elems
[id
++] = src2
->elems
[i2
++];
1083 if (src1
->elems
[i1
] == src2
->elems
[i2
])
1085 dest
->elems
[id
++] = src1
->elems
[i1
++];
1087 if (i1
< src1
->nelem
)
1089 memcpy (dest
->elems
+ id
, src1
->elems
+ i1
,
1090 (src1
->nelem
- i1
) * sizeof (int));
1091 id
+= src1
->nelem
- i1
;
1093 else if (i2
< src2
->nelem
)
1095 memcpy (dest
->elems
+ id
, src2
->elems
+ i2
,
1096 (src2
->nelem
- i2
) * sizeof (int));
1097 id
+= src2
->nelem
- i2
;
1103 /* Calculate the union set of the sets DEST and SRC. And store it to
1104 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1106 static reg_errcode_t
1108 re_node_set_merge (re_node_set
*dest
, const re_node_set
*src
)
1110 int is
, id
, sbase
, delta
;
1111 if (src
== NULL
|| src
->nelem
== 0)
1113 if (dest
->alloc
< 2 * src
->nelem
+ dest
->nelem
)
1115 int new_alloc
= 2 * (src
->nelem
+ dest
->alloc
);
1116 int *new_buffer
= re_realloc (dest
->elems
, int, new_alloc
);
1117 if (BE (new_buffer
== NULL
, 0))
1119 dest
->elems
= new_buffer
;
1120 dest
->alloc
= new_alloc
;
1123 if (BE (dest
->nelem
== 0, 0))
1125 dest
->nelem
= src
->nelem
;
1126 memcpy (dest
->elems
, src
->elems
, src
->nelem
* sizeof (int));
1130 /* Copy into the top of DEST the items of SRC that are not
1131 found in DEST. Maybe we could binary search in DEST? */
1132 for (sbase
= dest
->nelem
+ 2 * src
->nelem
,
1133 is
= src
->nelem
- 1, id
= dest
->nelem
- 1; is
>= 0 && id
>= 0; )
1135 if (dest
->elems
[id
] == src
->elems
[is
])
1137 else if (dest
->elems
[id
] < src
->elems
[is
])
1138 dest
->elems
[--sbase
] = src
->elems
[is
--];
1139 else /* if (dest->elems[id] > src->elems[is]) */
1145 /* If DEST is exhausted, the remaining items of SRC must be unique. */
1147 memcpy (dest
->elems
+ sbase
, src
->elems
, (is
+ 1) * sizeof (int));
1150 id
= dest
->nelem
- 1;
1151 is
= dest
->nelem
+ 2 * src
->nelem
- 1;
1152 delta
= is
- sbase
+ 1;
1156 /* Now copy. When DELTA becomes zero, the remaining
1157 DEST elements are already in place. */
1158 dest
->nelem
+= delta
;
1161 if (dest
->elems
[is
] > dest
->elems
[id
])
1163 /* Copy from the top. */
1164 dest
->elems
[id
+ delta
--] = dest
->elems
[is
--];
1170 /* Slide from the bottom. */
1171 dest
->elems
[id
+ delta
] = dest
->elems
[id
];
1174 /* Copy remaining SRC elements. */
1175 memcpy (dest
->elems
, dest
->elems
+ sbase
,
1176 delta
* sizeof (int));
1185 /* Insert the new element ELEM to the re_node_set* SET.
1186 SET should not already have ELEM.
1187 return -1 if an error is occured, return 1 otherwise. */
1191 re_node_set_insert (re_node_set
*set
, int elem
)
1194 /* In case the set is empty. */
1195 if (set
->alloc
== 0)
1197 if (BE (re_node_set_init_1 (set
, elem
) == REG_NOERROR
, 1))
1203 if (BE (set
->nelem
, 0) == 0)
1205 /* We already guaranteed above that set->alloc != 0. */
1206 set
->elems
[0] = elem
;
1211 /* Realloc if we need. */
1212 if (set
->alloc
== set
->nelem
)
1215 set
->alloc
= set
->alloc
* 2;
1216 new_elems
= re_realloc (set
->elems
, int, set
->alloc
);
1217 if (BE (new_elems
== NULL
, 0))
1219 set
->elems
= new_elems
;
1222 /* Move the elements which follows the new element. Test the
1223 first element separately to skip a check in the inner loop. */
1224 if (elem
< set
->elems
[0])
1227 for (idx
= set
->nelem
; idx
> 0; idx
--)
1228 set
->elems
[idx
] = set
->elems
[idx
- 1];
1232 for (idx
= set
->nelem
; set
->elems
[idx
- 1] > elem
; idx
--)
1233 set
->elems
[idx
] = set
->elems
[idx
- 1];
1236 /* Insert the new element. */
1237 set
->elems
[idx
] = elem
;
1242 /* Insert the new element ELEM to the re_node_set* SET.
1243 SET should not already have any element greater than or equal to ELEM.
1244 Return -1 if an error is occured, return 1 otherwise. */
1248 re_node_set_insert_last (re_node_set
*set
, int elem
)
1250 /* Realloc if we need. */
1251 if (set
->alloc
== set
->nelem
)
1254 set
->alloc
= (set
->alloc
+ 1) * 2;
1255 new_elems
= re_realloc (set
->elems
, int, set
->alloc
);
1256 if (BE (new_elems
== NULL
, 0))
1258 set
->elems
= new_elems
;
1261 /* Insert the new element. */
1262 set
->elems
[set
->nelem
++] = elem
;
1266 /* Compare two node sets SET1 and SET2.
1267 return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
1270 internal_function
__attribute ((pure
))
1271 re_node_set_compare (const re_node_set
*set1
, const re_node_set
*set2
)
1274 if (set1
== NULL
|| set2
== NULL
|| set1
->nelem
!= set2
->nelem
)
1276 for (i
= set1
->nelem
; --i
>= 0 ; )
1277 if (set1
->elems
[i
] != set2
->elems
[i
])
1282 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1285 internal_function
__attribute ((pure
))
1286 re_node_set_contains (const re_node_set
*set
, int elem
)
1288 unsigned int idx
, right
, mid
;
1289 if (set
->nelem
<= 0)
1292 /* Binary search the element. */
1294 right
= set
->nelem
- 1;
1297 mid
= (idx
+ right
) / 2;
1298 if (set
->elems
[mid
] < elem
)
1303 return set
->elems
[idx
] == elem
? idx
+ 1 : 0;
1308 re_node_set_remove_at (re_node_set
*set
, int idx
)
1310 if (idx
< 0 || idx
>= set
->nelem
)
1313 for (; idx
< set
->nelem
; idx
++)
1314 set
->elems
[idx
] = set
->elems
[idx
+ 1];
1318 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1319 Or return -1, if an error will be occured. */
1323 re_dfa_add_node (re_dfa_t
*dfa
, re_token_t token
)
1325 int type
= token
.type
;
1326 if (BE (dfa
->nodes_len
>= dfa
->nodes_alloc
, 0))
1328 size_t new_nodes_alloc
= dfa
->nodes_alloc
* 2;
1329 int *new_nexts
, *new_indices
;
1330 re_node_set
*new_edests
, *new_eclosures
;
1331 re_token_t
*new_nodes
;
1333 /* Avoid overflows. */
1334 if (BE (new_nodes_alloc
< dfa
->nodes_alloc
, 0))
1337 new_nodes
= re_realloc (dfa
->nodes
, re_token_t
, new_nodes_alloc
);
1338 if (BE (new_nodes
== NULL
, 0))
1340 dfa
->nodes
= new_nodes
;
1341 new_nexts
= re_realloc (dfa
->nexts
, int, new_nodes_alloc
);
1342 new_indices
= re_realloc (dfa
->org_indices
, int, new_nodes_alloc
);
1343 new_edests
= re_realloc (dfa
->edests
, re_node_set
, new_nodes_alloc
);
1344 new_eclosures
= re_realloc (dfa
->eclosures
, re_node_set
, new_nodes_alloc
);
1345 if (BE (new_nexts
== NULL
|| new_indices
== NULL
1346 || new_edests
== NULL
|| new_eclosures
== NULL
, 0))
1348 dfa
->nexts
= new_nexts
;
1349 dfa
->org_indices
= new_indices
;
1350 dfa
->edests
= new_edests
;
1351 dfa
->eclosures
= new_eclosures
;
1352 dfa
->nodes_alloc
= new_nodes_alloc
;
1354 dfa
->nodes
[dfa
->nodes_len
] = token
;
1355 dfa
->nodes
[dfa
->nodes_len
].constraint
= 0;
1356 #ifdef RE_ENABLE_I18N
1357 dfa
->nodes
[dfa
->nodes_len
].accept_mb
=
1358 (type
== OP_PERIOD
&& dfa
->mb_cur_max
> 1) || type
== COMPLEX_BRACKET
;
1360 dfa
->nexts
[dfa
->nodes_len
] = -1;
1361 re_node_set_init_empty (dfa
->edests
+ dfa
->nodes_len
);
1362 re_node_set_init_empty (dfa
->eclosures
+ dfa
->nodes_len
);
1363 return dfa
->nodes_len
++;
1366 static inline unsigned int
1368 calc_state_hash (const re_node_set
*nodes
, unsigned int context
)
1370 unsigned int hash
= nodes
->nelem
+ context
;
1372 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1373 hash
+= nodes
->elems
[i
];
1377 /* Search for the state whose node_set is equivalent to NODES.
1378 Return the pointer to the state, if we found it in the DFA.
1379 Otherwise create the new one and return it. In case of an error
1380 return NULL and set the error code in ERR.
1381 Note: - We assume NULL as the invalid state, then it is possible that
1382 return value is NULL and ERR is REG_NOERROR.
1383 - We never return non-NULL value in case of any errors, it is for
1386 static re_dfastate_t
*
1388 re_acquire_state (reg_errcode_t
*err
, const re_dfa_t
*dfa
,
1389 const re_node_set
*nodes
)
1392 re_dfastate_t
*new_state
;
1393 struct re_state_table_entry
*spot
;
1395 if (BE (nodes
->nelem
== 0, 0))
1400 hash
= calc_state_hash (nodes
, 0);
1401 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1403 for (i
= 0 ; i
< spot
->num
; i
++)
1405 re_dfastate_t
*state
= spot
->array
[i
];
1406 if (hash
!= state
->hash
)
1408 if (re_node_set_compare (&state
->nodes
, nodes
))
1412 /* There are no appropriate state in the dfa, create the new one. */
1413 new_state
= create_ci_newstate (dfa
, nodes
, hash
);
1414 if (BE (new_state
== NULL
, 0))
1420 /* Search for the state whose node_set is equivalent to NODES and
1421 whose context is equivalent to CONTEXT.
1422 Return the pointer to the state, if we found it in the DFA.
1423 Otherwise create the new one and return it. In case of an error
1424 return NULL and set the error code in ERR.
1425 Note: - We assume NULL as the invalid state, then it is possible that
1426 return value is NULL and ERR is REG_NOERROR.
1427 - We never return non-NULL value in case of any errors, it is for
1430 static re_dfastate_t
*
1432 re_acquire_state_context (reg_errcode_t
*err
, const re_dfa_t
*dfa
,
1433 const re_node_set
*nodes
, unsigned int context
)
1436 re_dfastate_t
*new_state
;
1437 struct re_state_table_entry
*spot
;
1439 if (nodes
->nelem
== 0)
1444 hash
= calc_state_hash (nodes
, context
);
1445 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1447 for (i
= 0 ; i
< spot
->num
; i
++)
1449 re_dfastate_t
*state
= spot
->array
[i
];
1450 if (state
->hash
== hash
1451 && state
->context
== context
1452 && re_node_set_compare (state
->entrance_nodes
, nodes
))
1455 /* There are no appropriate state in `dfa', create the new one. */
1456 new_state
= create_cd_newstate (dfa
, nodes
, context
, hash
);
1457 if (BE (new_state
== NULL
, 0))
1463 /* Finish initialization of the new state NEWSTATE, and using its hash value
1464 HASH put in the appropriate bucket of DFA's state table. Return value
1465 indicates the error code if failed. */
1467 static reg_errcode_t
1468 register_state (const re_dfa_t
*dfa
, re_dfastate_t
*newstate
,
1471 struct re_state_table_entry
*spot
;
1475 newstate
->hash
= hash
;
1476 err
= re_node_set_alloc (&newstate
->non_eps_nodes
, newstate
->nodes
.nelem
);
1477 if (BE (err
!= REG_NOERROR
, 0))
1479 for (i
= 0; i
< newstate
->nodes
.nelem
; i
++)
1481 int elem
= newstate
->nodes
.elems
[i
];
1482 if (!IS_EPSILON_NODE (dfa
->nodes
[elem
].type
))
1483 re_node_set_insert_last (&newstate
->non_eps_nodes
, elem
);
1486 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1487 if (BE (spot
->alloc
<= spot
->num
, 0))
1489 int new_alloc
= 2 * spot
->num
+ 2;
1490 re_dfastate_t
**new_array
= re_realloc (spot
->array
, re_dfastate_t
*,
1492 if (BE (new_array
== NULL
, 0))
1494 spot
->array
= new_array
;
1495 spot
->alloc
= new_alloc
;
1497 spot
->array
[spot
->num
++] = newstate
;
1502 free_state (re_dfastate_t
*state
)
1504 re_node_set_free (&state
->non_eps_nodes
);
1505 re_node_set_free (&state
->inveclosure
);
1506 if (state
->entrance_nodes
!= &state
->nodes
)
1508 re_node_set_free (state
->entrance_nodes
);
1509 re_free (state
->entrance_nodes
);
1511 re_node_set_free (&state
->nodes
);
1512 re_free (state
->word_trtable
);
1513 re_free (state
->trtable
);
1517 /* Create the new state which is independ of contexts.
1518 Return the new state if succeeded, otherwise return NULL. */
1520 static re_dfastate_t
*
1522 create_ci_newstate (const re_dfa_t
*dfa
, const re_node_set
*nodes
,
1527 re_dfastate_t
*newstate
;
1529 newstate
= (re_dfastate_t
*) calloc (sizeof (re_dfastate_t
), 1);
1530 if (BE (newstate
== NULL
, 0))
1532 err
= re_node_set_init_copy (&newstate
->nodes
, nodes
);
1533 if (BE (err
!= REG_NOERROR
, 0))
1539 newstate
->entrance_nodes
= &newstate
->nodes
;
1540 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1542 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1543 re_token_type_t type
= node
->type
;
1544 if (type
== CHARACTER
&& !node
->constraint
)
1546 #ifdef RE_ENABLE_I18N
1547 newstate
->accept_mb
|= node
->accept_mb
;
1548 #endif /* RE_ENABLE_I18N */
1550 /* If the state has the halt node, the state is a halt state. */
1551 if (type
== END_OF_RE
)
1553 else if (type
== OP_BACK_REF
)
1554 newstate
->has_backref
= 1;
1555 else if (type
== ANCHOR
|| node
->constraint
)
1556 newstate
->has_constraint
= 1;
1558 err
= register_state (dfa
, newstate
, hash
);
1559 if (BE (err
!= REG_NOERROR
, 0))
1561 free_state (newstate
);
1567 /* Create the new state which is depend on the context CONTEXT.
1568 Return the new state if succeeded, otherwise return NULL. */
1570 static re_dfastate_t
*
1572 create_cd_newstate (const re_dfa_t
*dfa
, const re_node_set
*nodes
,
1573 unsigned int context
, unsigned int hash
)
1575 int i
, nctx_nodes
= 0;
1577 re_dfastate_t
*newstate
;
1579 newstate
= (re_dfastate_t
*) calloc (sizeof (re_dfastate_t
), 1);
1580 if (BE (newstate
== NULL
, 0))
1582 err
= re_node_set_init_copy (&newstate
->nodes
, nodes
);
1583 if (BE (err
!= REG_NOERROR
, 0))
1589 newstate
->context
= context
;
1590 newstate
->entrance_nodes
= &newstate
->nodes
;
1592 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1594 unsigned int constraint
= 0;
1595 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1596 re_token_type_t type
= node
->type
;
1597 if (node
->constraint
)
1598 constraint
= node
->constraint
;
1600 if (type
== CHARACTER
&& !constraint
)
1602 #ifdef RE_ENABLE_I18N
1603 newstate
->accept_mb
|= node
->accept_mb
;
1604 #endif /* RE_ENABLE_I18N */
1606 /* If the state has the halt node, the state is a halt state. */
1607 if (type
== END_OF_RE
)
1609 else if (type
== OP_BACK_REF
)
1610 newstate
->has_backref
= 1;
1611 else if (type
== ANCHOR
)
1612 constraint
= node
->opr
.ctx_type
;
1616 if (newstate
->entrance_nodes
== &newstate
->nodes
)
1618 newstate
->entrance_nodes
= re_malloc (re_node_set
, 1);
1619 if (BE (newstate
->entrance_nodes
== NULL
, 0))
1621 free_state (newstate
);
1624 re_node_set_init_copy (newstate
->entrance_nodes
, nodes
);
1626 newstate
->has_constraint
= 1;
1629 if (NOT_SATISFY_PREV_CONSTRAINT (constraint
,context
))
1631 re_node_set_remove_at (&newstate
->nodes
, i
- nctx_nodes
);
1636 err
= register_state (dfa
, newstate
, hash
);
1637 if (BE (err
!= REG_NOERROR
, 0))
1639 free_state (newstate
);