Fix testcase unsupportedcheck1 for --disable-backend-remote
[xapian.git] / xapian-core / common / stringutils.h
bloba52ce29d3d87eed0d3d8669b807a2f0120b61878
1 /** @file
2 * @brief Various handy string-related helpers
3 */
4 /* Copyright (C) 2004-2023 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 #ifndef XAPIAN_INCLUDED_STRINGUTILS_H
22 #define XAPIAN_INCLUDED_STRINGUTILS_H
24 // Hack to allow inclusion from xapian-omega.
25 // FIXME: Move C_isalpha(), etc to the public API?
26 #define XAPIAN_IN_XAPIAN_H
27 #include <xapian/constinfo.h>
28 #undef XAPIAN_IN_XAPIAN_H
30 #include <algorithm>
31 #include <string>
32 #include <string_view>
33 #include <cstring>
35 /** Helper macro for STRINGIZE - the nested call is required because of how
36 * # works in macros.
38 #define STRINGIZE_(X) #X
40 /// The STRINGIZE macro converts its parameter into a string constant.
41 #define STRINGIZE(X) STRINGIZE_(X)
43 /** Returns the length of a string constant.
45 * We rely on concatenation of string literals to produce an error if this
46 * macro is applied to something other than a string literal.
48 #define CONST_STRLEN(S) (sizeof(S"") - 1)
50 /* C++20 added starts_with(), ends_with() and contains() methods to std::string
51 * and std::string_view which provide this functionality, but we don't yet
52 * require C++20.
55 inline bool
56 startswith(std::string_view s, char pfx)
58 return !s.empty() && s[0] == pfx;
61 inline bool
62 startswith(std::string_view s, const char* pfx, size_t len)
64 return s.size() >= len && (std::memcmp(s.data(), pfx, len) == 0);
67 inline bool
68 startswith(std::string_view s, const char* pfx)
70 return startswith(s, pfx, std::strlen(pfx));
73 inline bool
74 startswith(std::string_view s, std::string_view pfx)
76 return startswith(s, pfx.data(), pfx.size());
79 inline bool
80 endswith(std::string_view s, char sfx)
82 return !s.empty() && s[s.size() - 1] == sfx;
85 inline bool
86 endswith(std::string_view s, const char* sfx, size_t len)
88 return s.size() >= len && (std::memcmp(s.data() + s.size() - len, sfx, len) == 0);
91 inline bool
92 endswith(std::string_view s, const char* sfx)
94 return endswith(s, sfx, std::strlen(sfx));
97 inline bool
98 endswith(std::string_view s, std::string_view sfx)
100 return endswith(s, sfx.data(), sfx.size());
103 inline bool
104 contains(std::string_view s, char substring)
106 return s.find(substring) != s.npos;
109 inline bool
110 contains(std::string_view s, const char* substring, size_t len)
112 return s.find(substring, 0, len) != s.npos;
115 inline bool
116 contains(std::string_view s, const char* substring)
118 return s.find(substring) != s.npos;
121 inline bool
122 contains(std::string_view s, std::string_view substring)
124 return s.find(substring) != s.npos;
127 inline std::string::size_type
128 common_prefix_length(std::string_view a, std::string_view b)
130 std::string::size_type minlen = std::min(a.size(), b.size());
131 std::string::size_type common;
132 for (common = 0; common < minlen; ++common) {
133 if (a[common] != b[common]) break;
135 return common;
138 inline std::string::size_type
139 common_prefix_length(std::string_view a, std::string_view b,
140 std::string::size_type max_prefix_len)
142 std::string::size_type minlen = std::min({a.size(),
143 b.size(),
144 max_prefix_len});
145 std::string::size_type common;
146 for (common = 0; common < minlen; ++common) {
147 if (a[common] != b[common]) break;
149 return common;
152 // Like C's isXXXXX() but:
153 // (a) always work in the C locale
154 // (b) handle signed char as well as unsigned char
155 // (c) have a suitable signature for use as predicates with find_if()
156 // (d) add negated versions isnotXXXXX() which are useful as predicates
158 namespace Xapian {
159 namespace Internal {
160 const unsigned char HEX_MASK = 0x0f;
161 const unsigned char IS_UPPER = 0x10;
162 const unsigned char IS_ALPHA = 0x20; // NB Same as ASCII "case bit".
163 const unsigned char IS_DIGIT = 0x40;
164 const unsigned char IS_SPACE = 0x80;
168 // FIXME: These functions assume ASCII or an ASCII compatible character set
169 // such as ISO-8859-N or UTF-8. EBCDIC would need some work (patches
170 // welcome!)
171 static_assert('\x20' == ' ', "character set isn't a superset of ASCII");
173 // Add explicit conversion to bool to prevent compiler warning from "aCC +w":
174 // Warning (suggestion) 818: [...] # Type `int' is larger than type `bool',
175 // truncation in value may result.
177 inline unsigned char C_tab_(char ch) {
178 const unsigned char * C_tab = Xapian::Internal::get_constinfo_()->C_tab;
179 return C_tab[static_cast<unsigned char>(ch)];
182 inline bool C_isdigit(char ch) {
183 using namespace Xapian::Internal;
184 return bool(C_tab_(ch) & IS_DIGIT);
187 inline bool C_isxdigit(char ch) {
188 using namespace Xapian::Internal;
189 // Include IS_DIGIT so '0' gives true.
190 return bool(C_tab_(ch) & (HEX_MASK|IS_DIGIT));
193 inline bool C_isupper(char ch) {
194 using namespace Xapian::Internal;
195 return bool(C_tab_(ch) & IS_UPPER);
198 inline bool C_islower(char ch) {
199 using namespace Xapian::Internal;
200 return (C_tab_(ch) & (IS_ALPHA|IS_UPPER)) == IS_ALPHA;
203 inline bool C_isalpha(char ch) {
204 using namespace Xapian::Internal;
205 return bool(C_tab_(ch) & IS_ALPHA);
208 inline bool C_isalnum(char ch) {
209 using namespace Xapian::Internal;
210 return bool(C_tab_(ch) & (IS_ALPHA|IS_DIGIT));
213 inline bool C_isspace(char ch) {
214 using namespace Xapian::Internal;
215 return bool(C_tab_(ch) & IS_SPACE);
218 inline bool C_isnotdigit(char ch) { return !C_isdigit(ch); }
219 inline bool C_isnotxdigit(char ch) { return !C_isxdigit(ch); }
220 inline bool C_isnotupper(char ch) { return !C_isupper(ch); }
221 inline bool C_isnotlower(char ch) { return !C_islower(ch); }
222 inline bool C_isnotalpha(char ch) { return !C_isalpha(ch); }
223 inline bool C_isnotalnum(char ch) { return !C_isalnum(ch); }
224 inline bool C_isnotspace(char ch) { return !C_isspace(ch); }
226 inline char C_tolower(char ch) {
227 using namespace Xapian::Internal;
228 return ch | (C_tab_(ch) & IS_ALPHA);
231 inline char C_toupper(char ch) {
232 using namespace Xapian::Internal;
233 return ch &~ (C_tab_(ch) & IS_ALPHA);
236 inline int hex_digit(char ch) {
237 using namespace Xapian::Internal;
238 return C_tab_(ch) & HEX_MASK;
241 /** Decode a pair of ASCII hex digits.
243 * E.g. hex_decode('4', 'A') gives 'J'.
245 * If C_isxdigit(ch1) isn't true then ch1 is treated as '0', and similarly for
246 * ch2.
248 inline char hex_decode(char ch1, char ch2) {
249 return char(hex_digit(ch1) << 4 | hex_digit(ch2));
252 #endif // XAPIAN_INCLUDED_STRINGUTILS_H