lsnes rr2-β23
[lsnes.git] / include / library / utf8.hpp
blob7239fccda31253bdc36969b4861decc323a1a584
1 #ifndef _library__utf8__hpp__included__
2 #define _library__utf8__hpp__included__
4 #include <iostream>
5 #include <cstdint>
6 #include <cstdlib>
7 #include <string>
8 #include <functional>
10 namespace utf8
12 /**
13 * Initial state for UTF-8 parser.
15 extern const uint16_t initial_state;
16 /**
17 * Parse a byte.
19 * Parameter ch: The character to parse. -1 for end of string.
20 * Parameter state: The state. Mutated.
21 * Returns: The codepoint, or -1 if no codepoint emitted.
23 * Note: When called with EOF, max 1 codepoint can be emitted.
25 int32_t parse_byte(int ch, uint16_t& state) throw();
26 /**
27 * Return length of string in UTF-8 codepoints.
29 * Parameter str: The string.
30 * Returns: The length in codepoints.
32 size_t strlen(const std::string& str) throw();
34 /**
35 * Transform UTF-8 into UTF-32.
37 std::u32string to32(const std::string& utf8);
39 /**
40 * Transform UTF-32 into UTF-8.
42 std::string to8(const std::u32string& utf32);
44 /**
45 * Iterator copy from UTF-8 to UTF-32
47 template<typename srcitr, typename dstitr>
48 inline void to32i(srcitr begin, srcitr end, dstitr target)
50 uint16_t state = initial_state;
51 for(srcitr i = begin; i != end; i++) {
52 int32_t x = parse_byte((unsigned char)*i, state);
53 if(x >= 0) {
54 *target = x;
55 ++target;
58 int32_t x = parse_byte(-1, state);
59 if(x >= 0) {
60 *target = x;
61 ++target;
66 #endif