1 /* Substring test for UTF-8/UTF-16/UTF-32 strings. -*- coding: utf-8 -*-
2 Copyright (C) 1999, 2002, 2006, 2010-2021 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2002, 2005.
5 This program is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Lesser General Public License as published
7 by the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
19 FUNC (const UNIT
*haystack
, const UNIT
*needle
)
21 UNIT first
= needle
[0];
23 /* Is needle empty? */
25 return (UNIT
*) haystack
;
27 /* Is needle nearly empty (only one unit)? */
29 return U_STRCHR (haystack
, first
);
32 /* Is needle nearly empty (only one character)? */
35 int count
= U_STRMBTOUC (&first_uc
, needle
);
36 if (count
> 0 && needle
[count
] == 0)
37 return U_STRCHR (haystack
, first_uc
);
42 return (uint8_t *) strstr ((const char *) haystack
, (const char *) needle
);
45 /* Minimizing the worst-case complexity:
46 Let n = U_STRLEN(haystack), m = U_STRLEN(needle).
47 The naïve algorithm is O(n*m) worst-case.
48 The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
50 To achieve linear complexity and yet amortize the cost of the
51 memory allocation, we activate the Knuth-Morris-Pratt algorithm
52 only once the naïve algorithm has already run for some time; more
54 - the outer loop count is >= 10,
55 - the average number of comparisons per outer loop is >= 5,
56 - the total number of comparisons is >= m.
57 But we try it only once. If the memory allocation attempt failed,
60 size_t outer_loop_count
= 0;
61 size_t comparison_count
= 0;
62 size_t last_ccount
= 0; /* last comparison count */
63 const UNIT
*needle_last_ccount
= needle
; /* = needle + last_ccount */
65 /* Speed up the following searches of needle by caching its first
75 /* See whether it's advisable to use an asymptotically faster
78 && outer_loop_count
>= 10
79 && comparison_count
>= 5 * outer_loop_count
)
81 /* See if needle + comparison_count now reaches the end of
83 if (needle_last_ccount
!= NULL
)
86 U_STRNLEN (needle_last_ccount
,
87 comparison_count
- last_ccount
);
88 if (*needle_last_ccount
== 0)
89 needle_last_ccount
= NULL
;
90 last_ccount
= comparison_count
;
92 if (needle_last_ccount
== NULL
)
94 /* Try the Knuth-Morris-Pratt algorithm. */
97 knuth_morris_pratt (haystack
,
98 needle
- 1, U_STRLEN (needle
- 1),
101 return (UNIT
*) result
;
109 /* The first character matches. */
111 const UNIT
*rhaystack
= haystack
+ 1;
112 const UNIT
*rneedle
= needle
;
114 for (;; rhaystack
++, rneedle
++)
118 return (UNIT
*) haystack
;
123 if (*rhaystack
!= *rneedle
)
124 /* Nothing in this round. */