1 #ifndef _library__utf8__hpp__included__
2 #define _library__utf8__hpp__included__
9 * Initial state for UTF-8 parser.
11 extern const uint16_t utf8_initial_state
;
15 * Parameter ch: The character to parse. -1 for end of string.
16 * Parameter state: The state. Mutated.
17 * Returns: The codepoint, or -1 if no codepoint emitted.
19 * Note: When called with EOF, max 1 codepoint can be emitted.
21 int32_t utf8_parse_byte(int ch
, uint16_t& state
) throw();
23 * Return length of string in UTF-8 codepoints.
25 * Parameter str: The string.
26 * Returns: The length in codepoints.
28 size_t utf8_strlen(const std::string
& str
) throw();
31 * Iterator copy from UTF-8 to UTF-32
33 template<typename srcitr
, typename dstitr
>
34 inline void copy_from_utf8(srcitr begin
, srcitr end
, dstitr target
)
36 uint16_t state
= utf8_initial_state
;
37 for(srcitr i
= begin
; i
!= end
; i
++) {
38 int32_t x
= utf8_parse_byte(*i
, state
);
44 int32_t x
= utf8_parse_byte(-1, state
);