1 #ifndef _library__utf8__hpp__included__
2 #define _library__utf8__hpp__included__
13 * Initial state for UTF-8 parser.
15 extern const uint16_t initial_state
;
19 * Parameter ch: The character to parse. -1 for end of string.
20 * Parameter state: The state. Mutated.
21 * Returns: The codepoint, or -1 if no codepoint emitted.
23 * Note: When called with EOF, max 1 codepoint can be emitted.
25 int32_t parse_byte(int ch
, uint16_t& state
) throw();
27 * Return length of string in UTF-8 codepoints.
29 * Parameter str: The string.
30 * Returns: The length in codepoints.
32 size_t strlen(const std::string
& str
) throw();
35 * Transform UTF-8 into UTF-32.
37 std::u32string
to32(const std::string
& utf8
);
40 * Transform UTF-32 into UTF-8.
42 std::string
to8(const std::u32string
& utf32
);
45 * Iterator copy from UTF-8 to UTF-32
47 template<typename srcitr
, typename dstitr
>
48 inline void to32i(srcitr begin
, srcitr end
, dstitr target
)
50 uint16_t state
= initial_state
;
51 for(srcitr i
= begin
; i
!= end
; i
++) {
52 int32_t x
= parse_byte((unsigned char)*i
, state
);
58 int32_t x
= parse_byte(-1, state
);