1 /*@ Anything (locale agnostic: ASCII only) around char and char*.
3 * Copyright (c) 2001 - 2020 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
4 * SPDX-License-Identifier: ISC
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
24 * \brief \r{CS} tools and heap
30 #include <su/code-in.h>
34 * \defgroup CS Byte character data
36 * \brief Byte character data, locale agnostic: ASCII only (\r{su/cs.h})
38 * Oh, the vivid part this is!
44 su_CS_CTYPE_NONE
, /*!< \_ */
45 su_CS_CTYPE_ALNUM
= 1u<<0, /*!< \_ */
46 su_CS_CTYPE_ALPHA
= 1u<<1, /*!< \_ */
47 su_CS_CTYPE_BLANK
= 1u<<2, /*!< \_ */
48 su_CS_CTYPE_CNTRL
= 1u<<3, /*!< \_ */
49 su_CS_CTYPE_DIGIT
= 1u<<4, /*!< \_ */
50 su_CS_CTYPE_GRAPH
= 1u<<5, /*!< \_ */
51 su_CS_CTYPE_LOWER
= 1u<<6, /*!< \_ */
52 su_CS_CTYPE_PRINT
= 1u<<7, /*!< \_ */
53 su_CS_CTYPE_PUNCT
= 1u<<8, /*!< \_ */
54 su_CS_CTYPE_SPACE
= 1u<<9, /*!< \_ */
55 su_CS_CTYPE_UPPER
= 1u<<10, /*!< \_ */
56 su_CS_CTYPE_WHITE
= 1u<<11, /*!< SPACE, HT or LF */
57 su_CS_CTYPE_XDIGIT
= 1u<<12, /*!< \_ */
59 su__CS_CTYPE_MAXSHIFT
= 13u,
60 su__CS_CTYPE_MASK
= (1u<<su__CS_CTYPE_MAXSHIFT
) - 1
63 EXPORT_DATA u16
const su__cs_ctype
[S8_MAX
+ 1];
64 EXPORT_DATA u8
const su__cs_tolower
[S8_MAX
+ 1];
65 EXPORT_DATA u8
const su__cs_toupper
[S8_MAX
+ 1];
68 EXPORT_DATA
struct su_toolbox
const su_cs_toolbox
;
71 EXPORT_DATA
struct su_toolbox
const su_cs_toolbox_case
;
73 /*! This actually tests for 7-bit cleanliness. */
74 INLINE boole
su_cs_is_ascii(s32 x
) {return (S(u32
,x
) <= S8_MAX
);}
78 return (su_cs_is_ascii(X) &&\
79 (su__cs_ctype[S(u32,X)] & su_CONCAT(su_CS_CTYPE_,F)) != 0)
81 /*! \r{su_CS_CTYPE_ALNUM}. */
82 INLINE boole
su_cs_is_alnum(s32 x
) {a_X(x
, ALNUM
);}
84 /*! \r{su_CS_CTYPE_ALPHA}. */
85 INLINE boole
su_cs_is_alpha(s32 x
) {a_X(x
, ALPHA
);}
87 /*! \r{su_CS_CTYPE_BLANK}. */
88 INLINE boole
su_cs_is_blank(s32 x
) {a_X(x
, BLANK
);}
90 /*! \r{su_CS_CTYPE_CNTRL}. */
91 INLINE boole
su_cs_is_cntrl(s32 x
) {a_X(x
, CNTRL
);}
93 /*! \r{su_CS_CTYPE_DIGIT}. */
94 INLINE boole
su_cs_is_digit(s32 x
) {a_X(x
, DIGIT
);}
96 /*! \r{su_CS_CTYPE_GRAPH}. */
97 INLINE boole
su_cs_is_graph(s32 x
) {a_X(x
, GRAPH
);}
99 /*! \r{su_CS_CTYPE_LOWER}. */
100 INLINE boole
su_cs_is_lower(s32 x
) {a_X(x
, LOWER
);}
102 /*! \r{su_CS_CTYPE_PRINT}. */
103 INLINE boole
su_cs_is_print(s32 x
) {a_X(x
, PRINT
);}
105 /*! \r{su_CS_CTYPE_PUNCT}. */
106 INLINE boole
su_cs_is_punct(s32 x
) {a_X(x
, PUNCT
);}
108 /*! \r{su_CS_CTYPE_SPACE}. */
109 INLINE boole
su_cs_is_space(s32 x
) {a_X(x
, SPACE
);}
111 /*! \r{su_CS_CTYPE_UPPER}. */
112 INLINE boole
su_cs_is_upper(s32 x
) {a_X(x
, UPPER
);}
114 /*! \r{su_CS_CTYPE_WHITE}. */
115 INLINE boole
su_cs_is_white(s32 x
) {a_X(x
, WHITE
);}
117 /*! \r{su_CS_CTYPE_XDIGIT}. */
118 INLINE boole
su_cs_is_xdigit(s32 x
) {a_X(x
, XDIGIT
);}
122 /*! Test \a{x} for any of the \r{su_cs_ctype} bits given in \a{csct}. */
123 INLINE boole
su_cs_is_ctype(s32 x
, u32 csct
){
124 return (su_cs_is_ascii(x
) && (su__cs_ctype
[x
] & csct
) != 0);
127 /*! String comparison, byte-based, case-sensitive. */
128 EXPORT sz
su_cs_cmp(char const *cp1
, char const *cp2
);
130 /*! \r{su_cs_cmp()}, size-cramped.
131 * \remarks{A \a{n} of 0 compares equal.} */
132 EXPORT sz
su_cs_cmp_n(char const *cp1
, char const *cp2
, uz n
);
134 /*! String comparison, byte-based, case-insensitive. */
135 EXPORT sz
su_cs_cmp_case(char const *cp1
, char const *cp2
);
137 /*! \r{su_cs_cmp_case()}, size-cramped.
138 * \remarks{A \a{n} of 0 compares equal.} */
139 EXPORT sz
su_cs_cmp_case_n(char const *cp1
, char const *cp2
, uz n
);
141 /*! Copy at most \a{n} bytes of \a{src} to \a{dst}, and return \a{dst} again.
142 * Returns \NIL if \a{dst} is not large enough; \a{dst} will always be
143 * terminated unless \a{n} was 0 on entry.
144 * Also see \r{su_cs_pcopy_n()}. */
145 EXPORT
char *su_cs_copy_n(char *dst
, char const *src
, uz n
);
147 /*! Duplicate a buffer into a \r{su_MEM_TALLOC()}ated duplicate.
148 * Unless \a{len} was \r{su_UZ_MAX} and thus detected by searching NUL,
149 * embedded NUL bytes will be included in the result.
150 * \copydoc{su_clone_fun}. */
151 EXPORT
char *su_cs_dup_cbuf(char const *buf
, uz len
, u32 estate
);
153 /*! \r{su_cs_dup_cbuf()}. */
154 EXPORT
char *su_cs_dup(char const *cp
, u32 estate
);
157 /*! Is \a{x} the ending (sub)string of \a{cp}? */
158 EXPORT boole
su_cs_ends_with_case(char const *cp
, char const *x
);
161 /*! Search \a{xp} within \a{cp}, return pointer to location or \NIL.
162 * Returns \a{cp} if \a{xp} is the empty buffer. */
163 EXPORT
char *su_cs_find(char const *cp
, char const *xp
);
165 /*! Search \a{xc} within \a{cp}, return pointer to location or \NIL. */
166 EXPORT
char *su_cs_find_c(char const *cp
, char xc
);
168 /*! Like \r{su_cs_find()}, but case-insensitive. */
169 EXPORT
char *su_cs_find_case(char const *cp
, char const *xp
);
171 /*! Returns offset to first character of \a{xp} in \a{cp}, or \r{su_UZ_MAX}.
172 * \remarks{Will not find NUL.} */
173 EXPORT uz
su_cs_first_of_cbuf_cbuf(char const *cp
, uz cplen
,
174 char const *xp
, uz xlen
);
177 INLINE uz
su_cs_first_of(char const *cp
, char const *xp
){
178 ASSERT_RET(cp
!= NIL
, UZ_MAX
);
179 ASSERT_RET(xp
!= NIL
, UZ_MAX
);
180 return su_cs_first_of_cbuf_cbuf(cp
, UZ_MAX
, xp
, UZ_MAX
);
183 /*! Hash a string (buffer).
184 * This should be considered an attackable hash, for now Chris Torek's hash
185 * algorithm is used, the resulting hash is stirred as shown by Bret Mulvey.
186 * TODO Add _strong_hash (with, e.g., siphash algo)
187 * Also see \r{su_cs_hash_case_cbuf()}. */
188 EXPORT uz
su_cs_hash_cbuf(char const *buf
, uz len
);
190 /*! \r{su_cs_hash_cbuf()}. */
191 INLINE uz
su_cs_hash(char const *cp
){
192 ASSERT_RET(cp
!= NIL
, 0);
193 return su_cs_hash_cbuf(cp
, UZ_MAX
);
196 /*! Hash a string (buffer), case-insensitively, otherwise identical to
197 * \r{su_cs_hash_cbuf()}.
198 * As usual, if \a{len} is 0 \a{buf} may be \NIL. */
199 EXPORT uz
su_cs_hash_case_cbuf(char const *buf
, uz len
);
201 /*! \r{su_cs_hash_case_cbuf()}. */
202 INLINE uz
su_cs_hash_case(char const *cp
){
203 ASSERT_RET(cp
!= NIL
, 0);
204 return su_cs_hash_case_cbuf(cp
, UZ_MAX
);
208 EXPORT uz
su_cs_len(char const *cp
);
210 /*! Copy \a{src} to \a{dst}, return pointer to NUL in \a{dst}. */
211 EXPORT
char *su_cs_pcopy(char *dst
, char const *src
);
213 /*! Copy \a{src} to \a{dst}, return pointer to NUL in \a{dst}.
214 * Returns \NIL if \a{dst} is not large enough; \a{dst} will always be
215 * terminated unless \a{n} was 0 on entry. */
216 EXPORT
char *su_cs_pcopy_n(char *dst
, char const *src
, uz n
);
218 /*! Search \a{x} within \a{cp}, starting at end, return pointer to location
220 EXPORT
char *su_cs_rfind_c(char const *cp
, char x
);
222 /*! Find the next \a{sep}arator in *\a{iolist}, terminate the resulting
223 * substring and return it.
224 * \r{su_cs_is_space()} surrounding the result will be trimmed away.
225 * If \a{ignore_empty} is set, empty results will be skipped over.
226 * \a{iolist} will be updated for the next round, \NIL will be placed if the
227 * input string is exhausted.
228 * If called with an exhausted string, \NIL is returned.
229 * (\r{su_cs_sep_escable_c()} supports separator escaping.) */
230 EXPORT
char *su_cs_sep_c(char **iolist
, char sep
, boole ignore_empty
);
232 /*! Like \r{su_cs_sep_c()}, but supports escaping of \a{sep}arators via reverse
233 * solidus characters.
234 * \remarks{Whereas reverse solidus characters are supposed to escape the next
235 * character, including reverse solidus itself, only those which escape \a{sep}
236 * characters will be stripped from the result string.} */
237 EXPORT
char *su_cs_sep_escable_c(char **iolist
, char sep
, boole ignore_empty
);
239 /*! Is \a{x} the starting (sub)string of \a{cp}? */
240 EXPORT boole
su_cs_starts_with(char const *cp
, char const *x
);
242 /*! Is \a{x} the starting (sub)string of \a{cp}? */
243 EXPORT boole
su_cs_starts_with_n(char const *cp
, char const *x
, uz n
);
245 /*! Is \a{x} the starting (sub)string of \a{cp}, case-insensitively? */
246 EXPORT boole
su_cs_starts_with_case(char const *cp
, char const *x
);
248 /*! Is \a{x} the starting (sub)string of \a{cp}, case-insensitively? */
249 EXPORT boole
su_cs_starts_with_case_n(char const *cp
, char const *x
, uz n
);
251 /*! Map to lowercase equivalent, or return unchanged.
252 * For convenience values beyond \c{char} are supported (e.g., \c{EOF}), they
253 * are returned unchanged. */
254 INLINE s32
su_cs_to_lower(s32 x
){
255 return (S(u32
,x
) <= S8_MAX
? su__cs_tolower
[x
] : x
);
258 /*! Uppercasing variant of \r{su_cs_to_lower()}. */
259 INLINE s32
su_cs_to_upper(s32 x
){
260 return (S(u32
,x
) <= S8_MAX
? su__cs_toupper
[x
] : x
);
265 #include <su/code-ou.h>
266 #if !su_C_LANG || defined CXX_DOXYGEN
267 # define su_A_T_T_DECL_ONLY
268 # include <su/a-t-t.h>
270 # define su_CXX_HEADER
271 # include <su/code-in.h>
278 * C++ variant of \r{CS} (\r{su/cs.h})
282 /*! \copydoc{su_cs_ctype} */
284 /*! \copydoc{su_CS_CTYPE_NONE} */
285 ctype_none
= su_CS_CTYPE_NONE
,
286 /*! \copydoc{su_CS_CTYPE_ALNUM} */
287 ctype_alnum
= su_CS_CTYPE_ALNUM
,
288 /*! \copydoc{su_CS_CTYPE_ALPHA} */
289 ctype_alpha
= su_CS_CTYPE_ALPHA
,
290 /*! \copydoc{su_CS_CTYPE_BLANK} */
291 ctype_blank
= su_CS_CTYPE_BLANK
,
292 /*! \copydoc{su_CS_CTYPE_CNTRL} */
293 ctype_cntrl
= su_CS_CTYPE_CNTRL
,
294 /*! \copydoc{su_CS_CTYPE_DIGIT} */
295 ctype_digit
= su_CS_CTYPE_DIGIT
,
296 /*! \copydoc{su_CS_CTYPE_GRAPH} */
297 ctype_graph
= su_CS_CTYPE_GRAPH
,
298 /*! \copydoc{su_CS_CTYPE_LOWER} */
299 ctype_lower
= su_CS_CTYPE_LOWER
,
300 /*! \copydoc{su_CS_CTYPE_PRINT} */
301 ctype_print
= su_CS_CTYPE_PRINT
,
302 /*! \copydoc{su_CS_CTYPE_PUNCT} */
303 ctype_punct
= su_CS_CTYPE_PUNCT
,
304 /*! \copydoc{su_CS_CTYPE_SPACE} */
305 ctype_space
= su_CS_CTYPE_SPACE
,
306 /*! \copydoc{su_CS_CTYPE_UPPER} */
307 ctype_upper
= su_CS_CTYPE_UPPER
,
308 /*! \copydoc{su_CS_CTYPE_WHITE} */
309 ctype_white
= su_CS_CTYPE_WHITE
,
310 /*! \copydoc{su_CS_CTYPE_XDIGIT} */
311 ctype_xdigit
= su_CS_CTYPE_XDIGIT
314 /*! \copydoc{su_cs_toolbox} */
315 static NSPC(su
)type_toolbox
<char*> const * const type_toolbox
;
316 /*! \copydoc{su_cs_toolbox} */
317 static NSPC(su
)type_toolbox
<char const*> const * const const_type_toolbox
;
319 /*! \copydoc{su_cs_toolbox_case} */
320 static NSPC(su
)type_toolbox
<char*> const * const type_toolbox_case
;
321 /*! \copydoc{su_cs_toolbox_case} */
322 static NSPC(su
)type_toolbox
<char const*> const * const
323 const_type_toolbox_case
;
325 /*! \copydoc{su_cs_is_ascii()} */
326 static boole
is_ascii(s32 x
) {return su_cs_is_ascii(x
);}
328 /*! \copydoc{su_cs_is_alnum()} */
329 static boole
is_alnum(s32 x
) {return su_cs_is_alnum(x
);}
331 /*! \copydoc{su_cs_is_alpha()} */
332 static boole
is_alpha(s32 x
) {return su_cs_is_alpha(x
);}
334 /*! \copydoc{su_cs_is_blank()} */
335 static boole
is_blank(s32 x
) {return su_cs_is_blank(x
);}
337 /*! \copydoc{su_cs_is_cntrl()} */
338 static boole
is_cntrl(s32 x
) {return su_cs_is_cntrl(x
);}
340 /*! \copydoc{su_cs_is_digit()} */
341 static boole
is_digit(s32 x
) {return su_cs_is_digit(x
);}
343 /*! \copydoc{su_cs_is_graph()} */
344 static boole
is_graph(s32 x
) {return su_cs_is_graph(x
);}
346 /*! \copydoc{su_cs_is_lower()} */
347 static boole
is_lower(s32 x
) {return su_cs_is_lower(x
);}
349 /*! \copydoc{su_cs_is_print()} */
350 static boole
is_print(s32 x
) {return su_cs_is_print(x
);}
352 /*! \copydoc{su_cs_is_punct()} */
353 static boole
is_punct(s32 x
) {return su_cs_is_punct(x
);}
355 /*! \copydoc{su_cs_is_space()} */
356 static boole
is_space(s32 x
) {return su_cs_is_space(x
);}
358 /*! \copydoc{su_cs_is_upper()} */
359 static boole
is_upper(s32 x
) {return su_cs_is_upper(x
);}
361 /*! \copydoc{su_cs_is_white()} */
362 static boole
is_white(s32 x
) {return su_cs_is_white(x
);}
364 /*! \copydoc{su_cs_is_xdigit()} */
365 static boole
is_xdigit(s32 x
) {return su_cs_is_xdigit(x
);}
367 /*! \copydoc{su_cs_is_ctype()} */
368 static boole
is_ctype(s32 x
, u32 ct
) {return su_cs_is_ctype(x
, ct
);}
370 /*! \copydoc{su_cs_cmp()} */
371 static sz
cmp(char const *cp1
, char const *cp2
){
372 return su_cs_cmp(cp1
, cp2
);
375 /*! \copydoc{su_cs_cmp_n()} */
376 static sz
cmp(char const *cp1
, char const *cp2
, uz n
){
377 return su_cs_cmp_n(cp1
, cp2
, n
);
380 /*! \copydoc{su_cs_cmp_case()} */
381 static sz
cmp_case(char const *cp1
, char const *cp2
){
382 return su_cs_cmp_case(cp1
, cp2
);
385 /*! \copydoc{su_cs_cmp_case_n()} */
386 static sz
cmp_case(char const *cp1
, char const *cp2
, uz n
){
387 return su_cs_cmp_case_n(cp1
, cp2
, n
);
390 /*! \copydoc{su_cs_copy_n()} */
391 static char *copy(char *dst
, char const *src
, uz n
){
392 return su_cs_copy_n(dst
, src
, n
);
395 /*! \copydoc{su_cs_dup_cbuf()} */
396 static char *dup(char const *buf
, uz len
, u32 estate
=state::none
){
397 return su_cs_dup_cbuf(buf
, len
, estate
);
400 /*! \copydoc{su_cs_dup()} */
401 static char *dup(char const *cp
, u32 estate
=state::none
){
402 return su_cs_dup(cp
, estate
);
405 /*! \copydoc{su_cs_find()} */
406 static char *find(char const *cp
, char const *x
) {return su_cs_find(cp
, x
);}
408 /*! \copydoc{su_cs_find_c()} */
409 static char *find(char const *cp
, char x
) {return su_cs_find_c(cp
, x
);}
411 /*! \copydoc{su_cs_hash_cbuf()} */
412 static uz
hash(char const *buf
, uz len
) {return su_cs_hash_cbuf(buf
, len
);}
414 /*! \copydoc{su_cs_hash()} */
415 static uz
hash(char const *cp
) {return su_cs_hash(cp
);}
417 /*! \copydoc{su_cs_hash_case_cbuf()} */
418 static uz
hash_case(char const *buf
, uz len
){
419 return su_cs_hash_case_cbuf(buf
, len
);
422 /*! \copydoc{su_cs_hash_case()} */
423 static uz
hash_case(char const *cp
) {return su_cs_hash_case(cp
);}
425 /*! \copydoc{su_cs_len()} */
426 static uz
len(char const *cp
) {return su_cs_len(cp
);}
428 /*! \copydoc{su_cs_pcopy()} */
429 static char *pcopy(char *dst
, char const *src
){
430 return su_cs_pcopy(dst
, src
);
433 /*! \copydoc{su_cs_pcopy_n()} */
434 static char *pcopy(char *dst
, char const *src
, uz n
){
435 return su_cs_pcopy_n(dst
, src
, n
);
438 /*! \copydoc{su_cs_rfind_c()} */
439 static char *rfind(char const *cp
, char x
) {return su_cs_rfind_c(cp
, x
);}
441 /*! \copydoc{su_cs_sep_c()} */
442 static char *sep(char **iolist
, char sep
, boole ignore_empty
){
443 return su_cs_sep_c(iolist
, sep
, ignore_empty
);
446 /*! \copydoc{su_cs_sep_escable_c()} */
447 static char *sep_escable(char **iolist
, char sep
, boole ignore_empty
){
448 return su_cs_sep_escable_c(iolist
, sep
, ignore_empty
);
451 /*! \copydoc{su_cs_starts_with()} */
452 static boole
starts_with(char const *cp
, char const *x
){
453 return su_cs_starts_with(cp
, x
);
456 /*! \copydoc{su_cs_to_lower()} */
457 static s32
to_lower(s32 c
) {return su_cs_to_lower(c
);}
459 /*! \copydoc{su_cs_to_upper()} */
460 static s32
to_upper(s32 c
) {return su_cs_to_upper(c
);}
465 * \r{auto_type_toolbox} specialization (also \r{cs::toolbox}; \r{su/cs.h})
468 class auto_type_toolbox
<char*>{
471 static type_toolbox
<char*> const *get_instance(void){
472 return cs::type_toolbox
;
478 * \r{auto_type_toolbox} specialization (also \r{cs::toolbox}; \r{su/cs.h})
481 class auto_type_toolbox
<char const*>{
484 static type_toolbox
<char const*> const *get_instance(void){
485 return cs::const_type_toolbox
;
490 # include <su/code-ou.h>
491 #endif /* !C_LANG || CXX_DOXYGEN */