dirent: Fix compilation error in C++ mode on OS/2 kLIBC.
[gnulib.git] / lib / unistr / u-strstr.h
blobdb7e33b961a0245a66bf0f18839956ce3a4473bc
1 /* Substring test for UTF-8/UTF-16/UTF-32 strings. -*- coding: utf-8 -*-
2 Copyright (C) 1999, 2002, 2006, 2010-2021 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2002, 2005.
5 This program is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Lesser General Public License as published
7 by the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 UNIT *
19 FUNC (const UNIT *haystack, const UNIT *needle)
21 UNIT first = needle[0];
23 /* Is needle empty? */
24 if (first == 0)
25 return (UNIT *) haystack;
27 /* Is needle nearly empty (only one unit)? */
28 if (needle[1] == 0)
29 return U_STRCHR (haystack, first);
31 #ifdef U_STRMBTOUC
32 /* Is needle nearly empty (only one character)? */
34 ucs4_t first_uc;
35 int count = U_STRMBTOUC (&first_uc, needle);
36 if (count > 0 && needle[count] == 0)
37 return U_STRCHR (haystack, first_uc);
39 #endif
41 #if UNIT_IS_UINT8_T
42 return (uint8_t *) strstr ((const char *) haystack, (const char *) needle);
43 #else
45 /* Minimizing the worst-case complexity:
46 Let n = U_STRLEN(haystack), m = U_STRLEN(needle).
47 The naïve algorithm is O(n*m) worst-case.
48 The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
49 memory allocation.
50 To achieve linear complexity and yet amortize the cost of the
51 memory allocation, we activate the Knuth-Morris-Pratt algorithm
52 only once the naïve algorithm has already run for some time; more
53 precisely, when
54 - the outer loop count is >= 10,
55 - the average number of comparisons per outer loop is >= 5,
56 - the total number of comparisons is >= m.
57 But we try it only once. If the memory allocation attempt failed,
58 we don't retry it. */
59 bool try_kmp = true;
60 size_t outer_loop_count = 0;
61 size_t comparison_count = 0;
62 size_t last_ccount = 0; /* last comparison count */
63 const UNIT *needle_last_ccount = needle; /* = needle + last_ccount */
65 /* Speed up the following searches of needle by caching its first
66 character. */
67 UNIT b = *needle++;
69 for (;; haystack++)
71 if (*haystack == 0)
72 /* No match. */
73 return NULL;
75 /* See whether it's advisable to use an asymptotically faster
76 algorithm. */
77 if (try_kmp
78 && outer_loop_count >= 10
79 && comparison_count >= 5 * outer_loop_count)
81 /* See if needle + comparison_count now reaches the end of
82 needle. */
83 if (needle_last_ccount != NULL)
85 needle_last_ccount +=
86 U_STRNLEN (needle_last_ccount,
87 comparison_count - last_ccount);
88 if (*needle_last_ccount == 0)
89 needle_last_ccount = NULL;
90 last_ccount = comparison_count;
92 if (needle_last_ccount == NULL)
94 /* Try the Knuth-Morris-Pratt algorithm. */
95 const UNIT *result;
96 bool success =
97 knuth_morris_pratt (haystack,
98 needle - 1, U_STRLEN (needle - 1),
99 &result);
100 if (success)
101 return (UNIT *) result;
102 try_kmp = false;
106 outer_loop_count++;
107 comparison_count++;
108 if (*haystack == b)
109 /* The first character matches. */
111 const UNIT *rhaystack = haystack + 1;
112 const UNIT *rneedle = needle;
114 for (;; rhaystack++, rneedle++)
116 if (*rneedle == 0)
117 /* Found a match. */
118 return (UNIT *) haystack;
119 if (*rhaystack == 0)
120 /* No match. */
121 return NULL;
122 comparison_count++;
123 if (*rhaystack != *rneedle)
124 /* Nothing in this round. */
125 break;
130 #endif