1 /* Extended regular expression matching and search library.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
31 # ifndef _RE_DEFINE_LOCALE_FUNCTIONS
32 # define _RE_DEFINE_LOCALE_FUNCTIONS 1
33 # include <locale/localeinfo.h>
34 # include <locale/elem-hash.h>
35 # include <locale/coll-lookup.h>
39 /* This is for other GNU distributions with internationalized messages. */
40 #if HAVE_LIBINTL_H || defined _LIBC
44 # define gettext(msgid) \
45 INTUSE(__dcgettext) (_libc_intl_domainname_internal, msgid, LC_MESSAGES)
48 # define gettext(msgid) (msgid)
52 /* This define is so xgettext can find the internationalizable
54 # define gettext_noop(String) String
58 #include "regex_internal.h"
60 static void re_string_construct_common (const unsigned char *str
,
61 int len
, re_string_t
*pstr
,
62 RE_TRANSLATE_TYPE trans
, int icase
);
64 static int re_string_skip_chars (re_string_t
*pstr
, int new_raw_idx
);
65 #endif /* RE_ENABLE_I18N */
66 static re_dfastate_t
*create_newstate_common (re_dfa_t
*dfa
,
67 const re_node_set
*nodes
,
69 static reg_errcode_t
register_state (re_dfa_t
*dfa
, re_dfastate_t
*newstate
,
71 static re_dfastate_t
*create_ci_newstate (re_dfa_t
*dfa
,
72 const re_node_set
*nodes
,
74 static re_dfastate_t
*create_cd_newstate (re_dfa_t
*dfa
,
75 const re_node_set
*nodes
,
78 static unsigned int inline calc_state_hash (const re_node_set
*nodes
,
79 unsigned int context
);
81 /* Functions for string operation. */
83 /* This function allocate the buffers. It is necessary to call
84 re_string_reconstruct before using the object. */
87 re_string_allocate (pstr
, str
, len
, init_len
, trans
, icase
)
89 const unsigned char *str
;
90 int len
, init_len
, icase
;
91 RE_TRANSLATE_TYPE trans
;
94 int init_buf_len
= (len
+ 1 < init_len
) ? len
+ 1: init_len
;
95 re_string_construct_common (str
, len
, pstr
, trans
, icase
);
96 pstr
->stop
= pstr
->len
;
98 ret
= re_string_realloc_buffers (pstr
, init_buf_len
);
99 if (BE (ret
!= REG_NOERROR
, 0))
102 pstr
->mbs_case
= (MBS_CASE_ALLOCATED (pstr
) ? pstr
->mbs_case
103 : (unsigned char *)str
);
104 pstr
->mbs
= MBS_ALLOCATED (pstr
) ? pstr
->mbs
: pstr
->mbs_case
;
105 pstr
->valid_len
= (MBS_CASE_ALLOCATED (pstr
) || MBS_ALLOCATED (pstr
)
106 || MB_CUR_MAX
> 1) ? pstr
->valid_len
: len
;
110 /* This function allocate the buffers, and initialize them. */
113 re_string_construct (pstr
, str
, len
, trans
, icase
)
115 const unsigned char *str
;
117 RE_TRANSLATE_TYPE trans
;
120 re_string_construct_common (str
, len
, pstr
, trans
, icase
);
121 pstr
->stop
= pstr
->len
;
122 /* Set 0 so that this function can initialize whole buffers. */
127 ret
= re_string_realloc_buffers (pstr
, len
+ 1);
128 if (BE (ret
!= REG_NOERROR
, 0))
131 pstr
->mbs_case
= (MBS_CASE_ALLOCATED (pstr
) ? pstr
->mbs_case
132 : (unsigned char *)str
);
133 pstr
->mbs
= MBS_ALLOCATED (pstr
) ? pstr
->mbs
: pstr
->mbs_case
;
137 #ifdef RE_ENABLE_I18N
139 build_wcs_upper_buffer (pstr
);
141 #endif /* RE_ENABLE_I18N */
142 build_upper_buffer (pstr
);
146 #ifdef RE_ENABLE_I18N
148 build_wcs_buffer (pstr
);
150 #endif /* RE_ENABLE_I18N */
153 re_string_translate_buffer (pstr
);
155 pstr
->valid_len
= len
;
159 /* Initialized whole buffers, then valid_len == bufs_len. */
160 pstr
->valid_len
= pstr
->bufs_len
;
164 /* Helper functions for re_string_allocate, and re_string_construct. */
167 re_string_realloc_buffers (pstr
, new_buf_len
)
171 #ifdef RE_ENABLE_I18N
174 pstr
->wcs
= re_realloc (pstr
->wcs
, wint_t, new_buf_len
);
175 if (BE (pstr
->wcs
== NULL
, 0))
178 #endif /* RE_ENABLE_I18N */
179 if (MBS_ALLOCATED (pstr
))
181 pstr
->mbs
= re_realloc (pstr
->mbs
, unsigned char, new_buf_len
);
182 if (BE (pstr
->mbs
== NULL
, 0))
185 if (MBS_CASE_ALLOCATED (pstr
))
187 pstr
->mbs_case
= re_realloc (pstr
->mbs_case
, unsigned char, new_buf_len
);
188 if (BE (pstr
->mbs_case
== NULL
, 0))
190 if (!MBS_ALLOCATED (pstr
))
191 pstr
->mbs
= pstr
->mbs_case
;
193 pstr
->bufs_len
= new_buf_len
;
199 re_string_construct_common (str
, len
, pstr
, trans
, icase
)
200 const unsigned char *str
;
203 RE_TRANSLATE_TYPE trans
;
206 memset (pstr
, '\0', sizeof (re_string_t
));
210 pstr
->icase
= icase
? 1 : 0;
213 #ifdef RE_ENABLE_I18N
215 /* Build wide character buffer PSTR->WCS.
216 If the byte sequence of the string are:
217 <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
218 Then wide character buffer will be:
219 <wc1> , WEOF , <wc2> , WEOF , <wc3>
220 We use WEOF for padding, they indicate that the position isn't
221 a first byte of a multibyte character.
223 Note that this function assumes PSTR->VALID_LEN elements are already
224 built and starts from PSTR->VALID_LEN. */
227 build_wcs_buffer (pstr
)
231 int byte_idx
, end_idx
, mbclen
, remain_len
;
232 /* Build the buffers from pstr->valid_len to either pstr->len or
234 end_idx
= (pstr
->bufs_len
> pstr
->len
)? pstr
->len
: pstr
->bufs_len
;
235 for (byte_idx
= pstr
->valid_len
; byte_idx
< end_idx
;)
238 remain_len
= end_idx
- byte_idx
;
239 prev_st
= pstr
->cur_state
;
240 mbclen
= mbrtowc (&wc
, pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
,
241 remain_len
, &pstr
->cur_state
);
242 if (BE (mbclen
== (size_t) -2, 0))
244 /* The buffer doesn't have enough space, finish to build. */
245 pstr
->cur_state
= prev_st
;
248 else if (BE (mbclen
== (size_t) -1 || mbclen
== 0, 0))
250 /* We treat these cases as a singlebyte character. */
252 wc
= (wchar_t) pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
253 pstr
->cur_state
= prev_st
;
256 /* Apply the translateion if we need. */
257 if (pstr
->trans
!= NULL
&& mbclen
== 1)
259 int ch
= pstr
->trans
[pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
]];
260 pstr
->mbs_case
[byte_idx
] = ch
;
262 /* Write wide character and padding. */
263 pstr
->wcs
[byte_idx
++] = wc
;
264 /* Write paddings. */
265 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
266 pstr
->wcs
[byte_idx
++] = WEOF
;
268 pstr
->valid_len
= byte_idx
;
271 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
272 but for REG_ICASE. */
275 build_wcs_upper_buffer (pstr
)
279 int byte_idx
, end_idx
, mbclen
, remain_len
;
280 /* Build the buffers from pstr->valid_len to either pstr->len or
282 end_idx
= (pstr
->bufs_len
> pstr
->len
)? pstr
->len
: pstr
->bufs_len
;
283 for (byte_idx
= pstr
->valid_len
; byte_idx
< end_idx
;)
286 remain_len
= end_idx
- byte_idx
;
287 prev_st
= pstr
->cur_state
;
288 mbclen
= mbrtowc (&wc
, pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
,
289 remain_len
, &pstr
->cur_state
);
290 if (BE (mbclen
== (size_t) -2, 0))
292 /* The buffer doesn't have enough space, finish to build. */
293 pstr
->cur_state
= prev_st
;
296 else if (mbclen
== 1 || mbclen
== (size_t) -1 || mbclen
== 0)
298 /* In case of a singlebyte character. */
299 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ byte_idx
];
300 /* Apply the translateion if we need. */
301 if (pstr
->trans
!= NULL
&& mbclen
== 1)
303 ch
= pstr
->trans
[ch
];
304 pstr
->mbs_case
[byte_idx
] = ch
;
306 pstr
->wcs
[byte_idx
] = iswlower (wc
) ? toupper (wc
) : wc
;
307 pstr
->mbs
[byte_idx
++] = islower (ch
) ? toupper (ch
) : ch
;
308 if (BE (mbclen
== (size_t) -1, 0))
309 pstr
->cur_state
= prev_st
;
311 else /* mbclen > 1 */
314 wcrtomb (pstr
->mbs
+ byte_idx
, towupper (wc
), &prev_st
);
316 memcpy (pstr
->mbs
+ byte_idx
,
317 pstr
->raw_mbs
+ pstr
->raw_mbs_idx
+ byte_idx
, mbclen
);
318 pstr
->wcs
[byte_idx
++] = iswlower (wc
) ? toupper (wc
) : wc
;
319 /* Write paddings. */
320 for (remain_len
= byte_idx
+ mbclen
- 1; byte_idx
< remain_len
;)
321 pstr
->wcs
[byte_idx
++] = WEOF
;
324 pstr
->valid_len
= byte_idx
;
327 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
331 re_string_skip_chars (pstr
, new_raw_idx
)
336 int rawbuf_idx
, mbclen
;
338 /* Skip the characters which are not necessary to check. */
339 for (rawbuf_idx
= pstr
->raw_mbs_idx
+ pstr
->valid_len
;
340 rawbuf_idx
< new_raw_idx
;)
342 int remain_len
= pstr
->len
- rawbuf_idx
;
343 prev_st
= pstr
->cur_state
;
344 mbclen
= mbrlen (pstr
->raw_mbs
+ rawbuf_idx
, remain_len
,
346 if (BE (mbclen
== (size_t) -2 || mbclen
== (size_t) -1 || mbclen
== 0, 0))
348 /* We treat these cases as a singlebyte character. */
350 pstr
->cur_state
= prev_st
;
352 /* Then proceed the next character. */
353 rawbuf_idx
+= mbclen
;
357 #endif /* RE_ENABLE_I18N */
359 /* Build the buffer PSTR->MBS, and apply the translation if we need.
360 This function is used in case of REG_ICASE. */
363 build_upper_buffer (pstr
)
366 int char_idx
, end_idx
;
367 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
369 for (char_idx
= pstr
->valid_len
; char_idx
< end_idx
; ++char_idx
)
371 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ char_idx
];
372 if (pstr
->trans
!= NULL
)
374 ch
= pstr
->trans
[ch
];
375 pstr
->mbs_case
[char_idx
] = ch
;
378 pstr
->mbs
[char_idx
] = toupper (ch
);
380 pstr
->mbs
[char_idx
] = ch
;
382 pstr
->valid_len
= char_idx
;
385 /* Apply TRANS to the buffer in PSTR. */
388 re_string_translate_buffer (pstr
)
391 int buf_idx
, end_idx
;
392 end_idx
= (pstr
->bufs_len
> pstr
->len
) ? pstr
->len
: pstr
->bufs_len
;
394 for (buf_idx
= pstr
->valid_len
; buf_idx
< end_idx
; ++buf_idx
)
396 int ch
= pstr
->raw_mbs
[pstr
->raw_mbs_idx
+ buf_idx
];
397 pstr
->mbs_case
[buf_idx
] = pstr
->trans
[ch
];
400 pstr
->valid_len
= buf_idx
;
403 /* This function re-construct the buffers.
404 Concretely, convert to wide character in case of MB_CUR_MAX > 1,
405 convert to upper case in case of REG_ICASE, apply translation. */
408 re_string_reconstruct (pstr
, idx
, eflags
, newline
)
410 int idx
, eflags
, newline
;
412 int offset
= idx
- pstr
->raw_mbs_idx
;
416 #ifdef RE_ENABLE_I18N
418 memset (&pstr
->cur_state
, '\0', sizeof (mbstate_t));
419 #endif /* RE_ENABLE_I18N */
420 pstr
->valid_len
= pstr
->raw_mbs_idx
= 0;
421 pstr
->tip_context
= ((eflags
& REG_NOTBOL
) ? CONTEXT_BEGBUF
422 : CONTEXT_NEWLINE
| CONTEXT_BEGBUF
);
423 if (!MBS_CASE_ALLOCATED (pstr
))
424 pstr
->mbs_case
= (unsigned char *)pstr
->raw_mbs
;
425 if (!MBS_ALLOCATED (pstr
) && !MBS_CASE_ALLOCATED (pstr
))
426 pstr
->mbs
= (unsigned char *)pstr
->raw_mbs
;
432 pstr
->tip_context
= re_string_context_at (pstr
, offset
- 1, eflags
,
434 /* Are the characters which are already checked remain? */
435 if (offset
< pstr
->valid_len
)
437 /* Yes, move them to the front of the buffer. */
438 #ifdef RE_ENABLE_I18N
440 memmove (pstr
->wcs
, pstr
->wcs
+ offset
,
441 (pstr
->valid_len
- offset
) * sizeof (wint_t));
442 #endif /* RE_ENABLE_I18N */
443 if (MBS_ALLOCATED (pstr
))
444 memmove (pstr
->mbs
, pstr
->mbs
+ offset
,
445 pstr
->valid_len
- offset
);
446 if (MBS_CASE_ALLOCATED (pstr
))
447 memmove (pstr
->mbs_case
, pstr
->mbs_case
+ offset
,
448 pstr
->valid_len
- offset
);
449 pstr
->valid_len
-= offset
;
451 assert (pstr
->valid_len
> 0);
456 /* No, skip all characters until IDX. */
458 #ifdef RE_ENABLE_I18N
462 pstr
->valid_len
= re_string_skip_chars (pstr
, idx
) - idx
;
463 for (wcs_idx
= 0; wcs_idx
< pstr
->valid_len
; ++wcs_idx
)
464 pstr
->wcs
[wcs_idx
] = WEOF
;
466 #endif /* RE_ENABLE_I18N */
468 if (!MBS_CASE_ALLOCATED (pstr
))
470 pstr
->mbs_case
+= offset
;
471 /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */
472 if (!MBS_ALLOCATED (pstr
))
476 pstr
->raw_mbs_idx
= idx
;
478 pstr
->stop
-= offset
;
480 /* Then build the buffers. */
481 #ifdef RE_ENABLE_I18N
485 build_wcs_upper_buffer (pstr
);
487 build_wcs_buffer (pstr
);
490 #endif /* RE_ENABLE_I18N */
493 build_upper_buffer (pstr
);
494 else if (pstr
->trans
!= NULL
)
495 re_string_translate_buffer (pstr
);
503 re_string_destruct (pstr
)
506 #ifdef RE_ENABLE_I18N
508 #endif /* RE_ENABLE_I18N */
509 if (MBS_ALLOCATED (pstr
))
511 if (MBS_CASE_ALLOCATED (pstr
))
512 re_free (pstr
->mbs_case
);
515 /* Return the context at IDX in INPUT. */
518 re_string_context_at (input
, idx
, eflags
, newline_anchor
)
519 const re_string_t
*input
;
520 int idx
, eflags
, newline_anchor
;
523 if (idx
< 0 || idx
== input
->len
)
526 /* In this case, we use the value stored in input->tip_context,
527 since we can't know the character in input->mbs[-1] here. */
528 return input
->tip_context
;
529 else /* (idx == input->len) */
530 return ((eflags
& REG_NOTEOL
) ? CONTEXT_ENDBUF
531 : CONTEXT_NEWLINE
| CONTEXT_ENDBUF
);
533 c
= re_string_byte_at (input
, idx
);
534 if (IS_WORD_CHAR (c
))
536 return (newline_anchor
&& IS_NEWLINE (c
)) ? CONTEXT_NEWLINE
: 0;
539 /* Functions for set operation. */
542 re_node_set_alloc (set
, size
)
548 set
->elems
= re_malloc (int, size
);
549 if (BE (set
->elems
== NULL
, 0))
555 re_node_set_init_1 (set
, elem
)
561 set
->elems
= re_malloc (int, 1);
562 if (BE (set
->elems
== NULL
, 0))
564 set
->elems
[0] = elem
;
569 re_node_set_init_2 (set
, elem1
, elem2
)
574 set
->elems
= re_malloc (int, 2);
575 if (BE (set
->elems
== NULL
, 0))
580 set
->elems
[0] = elem1
;
587 set
->elems
[0] = elem1
;
588 set
->elems
[1] = elem2
;
592 set
->elems
[0] = elem2
;
593 set
->elems
[1] = elem1
;
600 re_node_set_init_copy (dest
, src
)
602 const re_node_set
*src
;
604 dest
->nelem
= src
->nelem
;
607 dest
->alloc
= dest
->nelem
;
608 dest
->elems
= re_malloc (int, dest
->alloc
);
609 if (BE (dest
->elems
== NULL
, 0))
611 memcpy (dest
->elems
, src
->elems
, src
->nelem
* sizeof (int));
614 re_node_set_init_empty (dest
);
618 /* Calculate the intersection of the sets SRC1 and SRC2. And store it in
619 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
620 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
623 re_node_set_intersect (dest
, src1
, src2
)
625 const re_node_set
*src1
, *src2
;
628 if (src1
->nelem
> 0 && src2
->nelem
> 0)
630 if (src1
->nelem
+ src2
->nelem
> dest
->alloc
)
632 dest
->alloc
= src1
->nelem
+ src2
->nelem
;
633 dest
->elems
= re_realloc (dest
->elems
, int, dest
->alloc
);
634 if (BE (dest
->elems
== NULL
, 0))
640 /* The intersection of empty sets is also empty set. */
645 for (i1
= i2
= id
= 0; i1
< src1
->nelem
&& i2
< src2
->nelem
; )
647 if (src1
->elems
[i1
] > src2
->elems
[i2
])
652 /* The intersection must have the elements which are in both of
654 if (src1
->elems
[i1
] == src2
->elems
[i2
])
655 dest
->elems
[id
++] = src2
->elems
[i2
++];
662 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
663 DEST. Return value indicate the error code or REG_NOERROR if succeeded.
664 Note: We assume dest->elems is NULL, when dest->alloc is 0. */
667 re_node_set_add_intersect (dest
, src1
, src2
)
669 const re_node_set
*src1
, *src2
;
672 if (src1
->nelem
> 0 && src2
->nelem
> 0)
674 if (src1
->nelem
+ src2
->nelem
+ dest
->nelem
> dest
->alloc
)
676 dest
->alloc
= src1
->nelem
+ src2
->nelem
+ dest
->nelem
;
677 dest
->elems
= re_realloc (dest
->elems
, int, dest
->alloc
);
678 if (BE (dest
->elems
== NULL
, 0))
685 for (i1
= i2
= id
= 0 ; i1
< src1
->nelem
&& i2
< src2
->nelem
;)
687 if (src1
->elems
[i1
] > src2
->elems
[i2
])
692 if (src1
->elems
[i1
] == src2
->elems
[i2
])
694 while (id
< dest
->nelem
&& dest
->elems
[id
] < src2
->elems
[i2
])
696 if (id
< dest
->nelem
&& dest
->elems
[id
] == src2
->elems
[i2
])
700 memmove (dest
->elems
+ id
+ 1, dest
->elems
+ id
,
701 sizeof (int) * (dest
->nelem
- id
));
702 dest
->elems
[id
++] = src2
->elems
[i2
++];
711 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
712 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
715 re_node_set_init_union (dest
, src1
, src2
)
717 const re_node_set
*src1
, *src2
;
720 if (src1
!= NULL
&& src1
->nelem
> 0 && src2
!= NULL
&& src2
->nelem
> 0)
722 dest
->alloc
= src1
->nelem
+ src2
->nelem
;
723 dest
->elems
= re_malloc (int, dest
->alloc
);
724 if (BE (dest
->elems
== NULL
, 0))
729 if (src1
!= NULL
&& src1
->nelem
> 0)
730 return re_node_set_init_copy (dest
, src1
);
731 else if (src2
!= NULL
&& src2
->nelem
> 0)
732 return re_node_set_init_copy (dest
, src2
);
734 re_node_set_init_empty (dest
);
737 for (i1
= i2
= id
= 0 ; i1
< src1
->nelem
&& i2
< src2
->nelem
;)
739 if (src1
->elems
[i1
] > src2
->elems
[i2
])
741 dest
->elems
[id
++] = src2
->elems
[i2
++];
744 if (src1
->elems
[i1
] == src2
->elems
[i2
])
746 dest
->elems
[id
++] = src1
->elems
[i1
++];
748 if (i1
< src1
->nelem
)
750 memcpy (dest
->elems
+ id
, src1
->elems
+ i1
,
751 (src1
->nelem
- i1
) * sizeof (int));
752 id
+= src1
->nelem
- i1
;
754 else if (i2
< src2
->nelem
)
756 memcpy (dest
->elems
+ id
, src2
->elems
+ i2
,
757 (src2
->nelem
- i2
) * sizeof (int));
758 id
+= src2
->nelem
- i2
;
764 /* Calculate the union set of the sets DEST and SRC. And store it to
765 DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
768 re_node_set_merge (dest
, src
)
770 const re_node_set
*src
;
773 if (src
== NULL
|| src
->nelem
== 0)
775 if (dest
->alloc
< src
->nelem
+ dest
->nelem
)
777 dest
->alloc
= 2 * (src
->nelem
+ dest
->alloc
);
778 dest
->elems
= re_realloc (dest
->elems
, int, dest
->alloc
);
779 if (BE (dest
->elems
== NULL
, 0))
783 for (si
= 0, di
= 0 ; si
< src
->nelem
&& di
< dest
->nelem
;)
785 int cp_from
, ncp
, mid
, right
, src_elem
= src
->elems
[si
];
786 /* Binary search the spot we will add the new element. */
790 mid
= (di
+ right
) / 2;
791 if (dest
->elems
[mid
] < src_elem
)
796 if (di
>= dest
->nelem
)
799 if (dest
->elems
[di
] == src_elem
)
801 /* Skip since, DEST already has the element. */
807 /* Skip the src elements which are less than dest->elems[di]. */
809 while (si
< src
->nelem
&& src
->elems
[si
] < dest
->elems
[di
])
811 /* Copy these src elements. */
813 memmove (dest
->elems
+ di
+ ncp
, dest
->elems
+ di
,
814 sizeof (int) * (dest
->nelem
- di
));
815 memcpy (dest
->elems
+ di
, src
->elems
+ cp_from
,
817 /* Update counters. */
822 /* Copy remaining src elements. */
825 memcpy (dest
->elems
+ di
, src
->elems
+ si
,
826 sizeof (int) * (src
->nelem
- si
));
827 dest
->nelem
+= src
->nelem
- si
;
832 /* Insert the new element ELEM to the re_node_set* SET.
833 return 0 if SET already has ELEM,
834 return -1 if an error is occured, return 1 otherwise. */
837 re_node_set_insert (set
, elem
)
842 /* In case of the set is empty. */
843 if (set
->elems
== NULL
|| set
->alloc
== 0)
845 if (BE (re_node_set_init_1 (set
, elem
) == REG_NOERROR
, 1))
851 /* Binary search the spot we will add the new element. */
856 mid
= (idx
+ right
) / 2;
857 if (set
->elems
[mid
] < elem
)
863 /* Realloc if we need. */
864 if (set
->alloc
< set
->nelem
+ 1)
867 set
->alloc
= set
->alloc
* 2;
868 new_array
= re_malloc (int, set
->alloc
);
869 if (BE (new_array
== NULL
, 0))
871 /* Copy the elements they are followed by the new element. */
873 memcpy (new_array
, set
->elems
, sizeof (int) * (idx
));
874 /* Copy the elements which follows the new element. */
875 if (set
->nelem
- idx
> 0)
876 memcpy (new_array
+ idx
+ 1, set
->elems
+ idx
,
877 sizeof (int) * (set
->nelem
- idx
));
878 re_free (set
->elems
);
879 set
->elems
= new_array
;
883 /* Move the elements which follows the new element. */
884 if (set
->nelem
- idx
> 0)
885 memmove (set
->elems
+ idx
+ 1, set
->elems
+ idx
,
886 sizeof (int) * (set
->nelem
- idx
));
888 /* Insert the new element. */
889 set
->elems
[idx
] = elem
;
894 /* Compare two node sets SET1 and SET2.
895 return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise. */
898 re_node_set_compare (set1
, set2
)
899 const re_node_set
*set1
, *set2
;
902 if (set1
== NULL
|| set2
== NULL
|| set1
->nelem
!= set2
->nelem
)
904 for (i
= 0 ; i
< set1
->nelem
; i
++)
905 if (set1
->elems
[i
] != set2
->elems
[i
])
910 /* Return 1 if SET contains the element ELEM, return 0 otherwise. */
913 re_node_set_contains (set
, elem
)
914 const re_node_set
*set
;
921 /* Binary search the element. */
923 right
= set
->nelem
- 1;
926 mid
= (idx
+ right
) / 2;
927 if (set
->elems
[mid
] < elem
)
932 return set
->elems
[idx
] == elem
;
936 re_node_set_remove_at (set
, idx
)
940 if (idx
< 0 || idx
>= set
->nelem
)
942 if (idx
< set
->nelem
- 1)
943 memmove (set
->elems
+ idx
, set
->elems
+ idx
+ 1,
944 sizeof (int) * (set
->nelem
- idx
- 1));
949 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
950 Or return -1, if an error will be occured. */
953 re_dfa_add_node (dfa
, token
, mode
)
958 if (dfa
->nodes_len
>= dfa
->nodes_alloc
)
960 re_token_t
*new_array
;
961 dfa
->nodes_alloc
*= 2;
962 new_array
= re_realloc (dfa
->nodes
, re_token_t
, dfa
->nodes_alloc
);
963 if (BE (new_array
== NULL
, 0))
966 dfa
->nodes
= new_array
;
969 int *new_firsts
, *new_nexts
;
970 re_node_set
*new_edests
, *new_eclosures
, *new_inveclosures
;
972 new_firsts
= re_realloc (dfa
->firsts
, int, dfa
->nodes_alloc
);
973 new_nexts
= re_realloc (dfa
->nexts
, int, dfa
->nodes_alloc
);
974 new_edests
= re_realloc (dfa
->edests
, re_node_set
, dfa
->nodes_alloc
);
975 new_eclosures
= re_realloc (dfa
->eclosures
, re_node_set
,
977 new_inveclosures
= re_realloc (dfa
->inveclosures
, re_node_set
,
979 if (BE (new_firsts
== NULL
|| new_nexts
== NULL
|| new_edests
== NULL
980 || new_eclosures
== NULL
|| new_inveclosures
== NULL
, 0))
982 dfa
->firsts
= new_firsts
;
983 dfa
->nexts
= new_nexts
;
984 dfa
->edests
= new_edests
;
985 dfa
->eclosures
= new_eclosures
;
986 dfa
->inveclosures
= new_inveclosures
;
989 dfa
->nodes
[dfa
->nodes_len
] = token
;
990 dfa
->nodes
[dfa
->nodes_len
].duplicated
= 0;
991 return dfa
->nodes_len
++;
994 static unsigned int inline
995 calc_state_hash (nodes
, context
)
996 const re_node_set
*nodes
;
997 unsigned int context
;
999 unsigned int hash
= nodes
->nelem
+ context
;
1001 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1002 hash
+= nodes
->elems
[i
];
1006 /* Search for the state whose node_set is equivalent to NODES.
1007 Return the pointer to the state, if we found it in the DFA.
1008 Otherwise create the new one and return it. In case of an error
1009 return NULL and set the error code in ERR.
1010 Note: - We assume NULL as the invalid state, then it is possible that
1011 return value is NULL and ERR is REG_NOERROR.
1012 - We never return non-NULL value in case of any errors, it is for
1015 static re_dfastate_t
*
1016 re_acquire_state (err
, dfa
, nodes
)
1019 const re_node_set
*nodes
;
1022 re_dfastate_t
*new_state
;
1023 struct re_state_table_entry
*spot
;
1025 if (BE (nodes
->nelem
== 0, 0))
1030 hash
= calc_state_hash (nodes
, 0);
1031 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1033 for (i
= 0 ; i
< spot
->num
; i
++)
1035 re_dfastate_t
*state
= spot
->array
[i
];
1036 if (hash
!= state
->hash
)
1038 if (re_node_set_compare (&state
->nodes
, nodes
))
1042 /* There are no appropriate state in the dfa, create the new one. */
1043 new_state
= create_ci_newstate (dfa
, nodes
, hash
);
1044 if (BE (new_state
!= NULL
, 1))
1053 /* Search for the state whose node_set is equivalent to NODES and
1054 whose context is equivalent to CONTEXT.
1055 Return the pointer to the state, if we found it in the DFA.
1056 Otherwise create the new one and return it. In case of an error
1057 return NULL and set the error code in ERR.
1058 Note: - We assume NULL as the invalid state, then it is possible that
1059 return value is NULL and ERR is REG_NOERROR.
1060 - We never return non-NULL value in case of any errors, it is for
1063 static re_dfastate_t
*
1064 re_acquire_state_context (err
, dfa
, nodes
, context
)
1067 const re_node_set
*nodes
;
1068 unsigned int context
;
1071 re_dfastate_t
*new_state
;
1072 struct re_state_table_entry
*spot
;
1074 if (nodes
->nelem
== 0)
1079 hash
= calc_state_hash (nodes
, context
);
1080 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1082 for (i
= 0 ; i
< spot
->num
; i
++)
1084 re_dfastate_t
*state
= spot
->array
[i
];
1085 if (hash
!= state
->hash
)
1087 if (re_node_set_compare (state
->entrance_nodes
, nodes
)
1088 && state
->context
== context
)
1091 /* There are no appropriate state in `dfa', create the new one. */
1092 new_state
= create_cd_newstate (dfa
, nodes
, context
, hash
);
1093 if (BE (new_state
!= NULL
, 1))
1102 /* Allocate memory for DFA state and initialize common properties.
1103 Return the new state if succeeded, otherwise return NULL. */
1105 static re_dfastate_t
*
1106 create_newstate_common (dfa
, nodes
, hash
)
1108 const re_node_set
*nodes
;
1111 re_dfastate_t
*newstate
;
1112 newstate
= (re_dfastate_t
*) calloc (sizeof (re_dfastate_t
), 1);
1113 if (BE (newstate
== NULL
, 0))
1115 re_node_set_init_copy (&newstate
->nodes
, nodes
);
1116 newstate
->trtable
= NULL
;
1117 newstate
->trtable_search
= NULL
;
1118 newstate
->hash
= hash
;
1122 /* Store the new state NEWSTATE whose hash value is HASH in appropriate
1123 position. Return value indicate the error code if failed. */
1125 static reg_errcode_t
1126 register_state (dfa
, newstate
, hash
)
1128 re_dfastate_t
*newstate
;
1131 struct re_state_table_entry
*spot
;
1132 spot
= dfa
->state_table
+ (hash
& dfa
->state_hash_mask
);
1134 if (spot
->alloc
<= spot
->num
)
1136 spot
->alloc
= 2 * spot
->num
+ 2;
1137 spot
->array
= re_realloc (spot
->array
, re_dfastate_t
*, spot
->alloc
);
1138 if (BE (spot
->array
== NULL
, 0))
1141 spot
->array
[spot
->num
++] = newstate
;
1145 /* Create the new state which is independ of contexts.
1146 Return the new state if succeeded, otherwise return NULL. */
1148 static re_dfastate_t
*
1149 create_ci_newstate (dfa
, nodes
, hash
)
1151 const re_node_set
*nodes
;
1156 re_dfastate_t
*newstate
;
1157 newstate
= create_newstate_common (dfa
, nodes
, hash
);
1158 if (BE (newstate
== NULL
, 0))
1160 newstate
->entrance_nodes
= &newstate
->nodes
;
1162 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1164 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1165 re_token_type_t type
= node
->type
;
1166 if (type
== CHARACTER
)
1169 /* If the state has the halt node, the state is a halt state. */
1170 else if (type
== END_OF_RE
)
1172 #ifdef RE_ENABLE_I18N
1173 else if (type
== COMPLEX_BRACKET
1174 || (type
== OP_PERIOD
&& MB_CUR_MAX
> 1))
1175 newstate
->accept_mb
= 1;
1176 #endif /* RE_ENABLE_I18N */
1177 else if (type
== OP_BACK_REF
)
1178 newstate
->has_backref
= 1;
1179 else if (type
== ANCHOR
|| OP_CONTEXT_NODE
)
1181 newstate
->has_constraint
= 1;
1182 if (type
== OP_CONTEXT_NODE
1183 && dfa
->nodes
[node
->opr
.ctx_info
->entity
].type
== END_OF_RE
)
1187 err
= register_state (dfa
, newstate
, hash
);
1188 return (err
!= REG_NOERROR
) ? NULL
: newstate
;
1191 /* Create the new state which is depend on the context CONTEXT.
1192 Return the new state if succeeded, otherwise return NULL. */
1194 static re_dfastate_t
*
1195 create_cd_newstate (dfa
, nodes
, context
, hash
)
1197 const re_node_set
*nodes
;
1198 unsigned int context
, hash
;
1200 int i
, nctx_nodes
= 0;
1202 re_dfastate_t
*newstate
;
1204 newstate
= create_newstate_common (dfa
, nodes
, hash
);
1205 if (BE (newstate
== NULL
, 0))
1207 newstate
->context
= context
;
1208 newstate
->entrance_nodes
= &newstate
->nodes
;
1210 for (i
= 0 ; i
< nodes
->nelem
; i
++)
1212 unsigned int constraint
= 0;
1213 re_token_t
*node
= dfa
->nodes
+ nodes
->elems
[i
];
1214 re_token_type_t type
= node
->type
;
1215 if (type
== CHARACTER
)
1218 /* If the state has the halt node, the state is a halt state. */
1219 else if (type
== END_OF_RE
)
1221 #ifdef RE_ENABLE_I18N
1222 else if (type
== COMPLEX_BRACKET
1223 || (type
== OP_PERIOD
&& MB_CUR_MAX
> 1))
1224 newstate
->accept_mb
= 1;
1225 #endif /* RE_ENABLE_I18N */
1226 else if (type
== OP_BACK_REF
)
1227 newstate
->has_backref
= 1;
1228 else if (type
== ANCHOR
)
1229 constraint
= node
->opr
.ctx_type
;
1230 else if (type
== OP_CONTEXT_NODE
)
1232 re_token_type_t ctype
= dfa
->nodes
[node
->opr
.ctx_info
->entity
].type
;
1233 constraint
= node
->constraint
;
1234 if (ctype
== END_OF_RE
)
1236 else if (ctype
== OP_BACK_REF
)
1237 newstate
->has_backref
= 1;
1238 #ifdef RE_ENABLE_I18N
1239 else if (ctype
== COMPLEX_BRACKET
1240 || (type
== OP_PERIOD
&& MB_CUR_MAX
> 1))
1241 newstate
->accept_mb
= 1;
1242 #endif /* RE_ENABLE_I18N */
1247 if (newstate
->entrance_nodes
== &newstate
->nodes
)
1249 newstate
->entrance_nodes
= re_malloc (re_node_set
, 1);
1250 if (BE (newstate
->entrance_nodes
== NULL
, 0))
1252 re_node_set_init_copy (newstate
->entrance_nodes
, nodes
);
1254 newstate
->has_constraint
= 1;
1257 if (NOT_SATISFY_PREV_CONSTRAINT (constraint
,context
))
1259 re_node_set_remove_at (&newstate
->nodes
, i
- nctx_nodes
);
1264 err
= register_state (dfa
, newstate
, hash
);
1265 return (err
!= REG_NOERROR
) ? NULL
: newstate
;