Refactoring: copy_from_utf8()
[lsnes.git] / include / library / utf8.hpp
bloba050b8c5cecb77dbd31c2b6432ee2c34e8055535
1 #ifndef _library__utf8__hpp__included__
2 #define _library__utf8__hpp__included__
4 #include <cstdint>
5 #include <cstdlib>
6 #include <string>
8 /**
9 * Initial state for UTF-8 parser.
11 extern const uint16_t utf8_initial_state;
12 /**
13 * Parse a byte.
15 * Parameter ch: The character to parse. -1 for end of string.
16 * Parameter state: The state. Mutated.
17 * Returns: The codepoint, or -1 if no codepoint emitted.
19 * Note: When called with EOF, max 1 codepoint can be emitted.
21 int32_t utf8_parse_byte(int ch, uint16_t& state) throw();
22 /**
23 * Return length of string in UTF-8 codepoints.
25 * Parameter str: The string.
26 * Returns: The length in codepoints.
28 size_t utf8_strlen(const std::string& str) throw();
30 /**
31 * Iterator copy from UTF-8 to UTF-32
33 template<typename srcitr, typename dstitr>
34 inline void copy_from_utf8(srcitr begin, srcitr end, dstitr target)
36 uint16_t state = utf8_initial_state;
37 for(srcitr i = begin; i != end; i++) {
38 int32_t x = utf8_parse_byte(*i, state);
39 if(x >= 0) {
40 *target = x;
41 ++target;
44 int32_t x = utf8_parse_byte(-1, state);
45 if(x >= 0) {
46 *target = x;
47 ++target;
51 #endif