Fix integer overflow in ft_rendered_size_line
[ilaris-y4m-tools.git] / parseval.cpp
bloba787c3b3839389185290d078a1db6cffde187104
1 #include <boost/regex.hpp>
2 #include "parseval.hpp"
4 int string_to_bool(const std::string& x)
6 std::string y = x;
7 for(size_t i = 0; i < y.length(); i++)
8 y[i] = tolower(y[i]);
9 if(y == "on" || y == "true" || y == "yes" || y == "1" || y == "enable" || y == "enabled")
10 return 1;
11 if(y == "off" || y == "false" || y == "no" || y == "0" || y == "disable" || y == "disabled")
12 return 0;
13 return -1;
16 regex_results::regex_results()
18 matched = false;
21 regex_results::regex_results(std::vector<std::string> res)
23 matched = true;
24 results = res;
27 regex_results::operator bool() const
29 return matched;
32 bool regex_results::operator!() const
34 return !matched;
37 size_t regex_results::size() const
39 return results.size();
41 const std::string& regex_results::operator[](size_t i) const
43 return results[i];
46 regex_results regex(const std::string& regexp, const std::string& str, const char* ex) throw(std::bad_alloc,
47 std::runtime_error)
49 static std::map<std::string, boost::regex*> regexps;
50 if(!regexps.count(regexp)) {
51 boost::regex* y = NULL;
52 try {
53 y = new boost::regex(regexp, boost::regex::extended & ~boost::regex::collate);
54 regexps[regexp] = y;
55 } catch(std::bad_alloc& e) {
56 delete y;
57 throw;
58 } catch(std::exception& e) {
59 throw std::runtime_error(e.what());
63 boost::smatch matches;
64 bool x = boost::regex_match(str.begin(), str.end(), matches, *(regexps[regexp]));
65 if(x) {
66 std::vector<std::string> res;
67 for(size_t i = 0; i < matches.size(); i++)
68 res.push_back(matches.str(i));
69 return regex_results(res);
70 } else if(ex)
71 throw std::runtime_error(ex);
72 else
73 return regex_results();
76 bool regex_match(const std::string& regexp, const std::string& str) throw(std::bad_alloc, std::runtime_error)
78 return regex(regexp, str);
81 namespace
83 //First nibble values:
84 //0 => INITIAL
85 //1 => S_2_2
86 //2 => S_2_3
87 //3 => S_2_4
88 //4 => S_3_3
89 //5 => S_3_4
90 //6 => INIT_RE
91 //7 => (unused)
92 //8 => S_4_4
93 //Second nibble values:
94 //0 => Return NO CHARACTER and transition to another state with substate 0.
95 //1 => Return the character and transition to another state with substate 0.
96 //2 => Return invalid character and transition to another state with substate 0.
97 //3 => Memorize character minus 192, return NO CHARACTER and transition to another state.
98 //4 => Memorize character minus 224, return NO CHARACTER and transition to another state.
99 //5 => Memorize character minus 240, return NO CHARACTER and transition to another state.
100 //6 => Memorize byte, return invalid character and transition to another state.
101 //7 => Return 2-byte value and transition to another state.
102 //8 => Combine memorized, return NO CHARACTER and transition to another state.
103 //9 => Return 3-byte value and transition to another state.
104 //A => Return 4-byte value and transition to another state.
105 //B => Handle memorized character and EOF.
106 //C => Handle memorized character and continuation.
107 const unsigned char transitions[] = {
108 //E //1 //C //2 //3 //4 //I
109 0x00, 0x01, 0x02, 0x13, 0x24, 0x35, 0x02, //INITIAL
110 0x01, 0x66, 0x07, 0x66, 0x66, 0x66, 0x66, //S_2_2
111 0x01, 0x66, 0x48, 0x66, 0x66, 0x66, 0x66, //S_2_3
112 0x01, 0x66, 0x58, 0x66, 0x66, 0x66, 0x66, //S_2_4
113 0x01, 0x66, 0x09, 0x66, 0x66, 0x66, 0x66, //S_3_3
114 0x01, 0x66, 0x88, 0x66, 0x66, 0x66, 0x66, //S_3_4
115 0x0B, 0x6C, 0x6C, 0x6C, 0x6C, 0x6C, 0x6C, //INIT_RE
116 0x01, 0x66, 0x0A, 0x66, 0x66, 0x66, 0x66 //S_4_4
120 extern const uint16_t utf8_initial_state = 0;
122 int32_t utf8_parse_byte(int ch, uint16_t& state) throw()
124 unsigned char mch = (ch < 248) ? ch : 248;
125 uint32_t astate = state >> 12;
126 uint32_t iclass;
127 uint32_t tmp;
128 if(astate > 7) astate = 7;
129 if(ch < 0) iclass = 0;
130 else if(ch < 128) iclass = 1;
131 else if(ch < 192) iclass = 2;
132 else if(ch < 224) iclass = 3;
133 else if(ch < 240) iclass = 4;
134 else if(ch < 248) iclass = 5;
135 else iclass = 6;
136 unsigned char ctrl = transitions[astate * 7 + iclass];
138 switch(ctrl & 0xF) {
139 case 0x0:
140 state = (ctrl & 0xF0) * 256;
141 return -1;
142 case 0x1:
143 state = (ctrl & 0xF0) * 256;
144 return ch;
145 case 0x2:
146 state = (ctrl & 0xF0) * 256;
147 return 0xFFFD;
148 case 0x3:
149 state = (ctrl & 0xF0) * 256 + ch - 192;
150 return -1;
151 case 0x4:
152 state = (ctrl & 0xF0) * 256 + ch - 224;
153 return -1;
154 case 0x5:
155 state = (ctrl & 0xF0) * 256 + ch - 240;
156 return -1;
157 case 0x6:
158 state = (ctrl & 0xF0) * 256 + mch;
159 return 0xFFFD;
160 case 0x7:
161 tmp = (state & 0xFFF) * 64 + ch - 128;
162 if(tmp < 0x80)
163 tmp = 0xFFFD;
164 state = (ctrl & 0xF0) * 256;
165 return tmp;
166 case 0x8:
167 state = (ctrl & 0xF0) * 256 + (state & 0xFFF) * 64 + ch - 128;
168 return -1;
169 case 0x9:
170 tmp = (state & 0xFFF) * 64 + ch - 128;
171 if(tmp < 0x800 || (tmp & 0xF800) == 0xD800 || (tmp & 0xFFFE) == 0xFFFE)
172 tmp = 0xFFFD;
173 state = (ctrl & 0xF0) * 256;
174 return tmp;
175 case 0xA:
176 tmp = (state & 0x7FFF) * 64 + ch - 128;
177 if(tmp < 0x10000 || tmp > 0x10FFFD || (tmp & 0xFFFE) == 0xFFFE)
178 tmp = 0xFFFD;
179 state = (ctrl & 0xF0) * 256;
180 return tmp;
181 case 0xB:
182 if(state & 0x80)
183 tmp = 0xFFFD;
184 else
185 tmp = state & 0x7F;
186 state = (ctrl & 0xF0) * 256;
187 return tmp;
188 case 0xC:
189 //This is nasty.
190 if((state & 0x80) == 0) {
191 tmp = state & 0x7F;
192 state = 0x6000 + mch;
193 return tmp;
194 } else if((state & 0xF8) == 0xF8 || (state & 0xF8) == 0x80) {
195 //Continuation or invalid.
196 state = 0x6000 + mch;
197 return 0xFFFD;
198 } else if(iclass == 0) {
199 //Incomplete.
200 state = 0;
201 return 0xFFFD;
202 } else if(iclass != 2) {
203 //Bad sequence.
204 state = 0x6000 + mch;
205 return 0xFFFD;
206 } else if((state & 0xE0) == 0xC0) {
207 //Complete 2-byte sequence.
208 tmp = (state & 0x1F) * 64 + (ch & 0x3F);
209 state = 0;
210 if(tmp < 0x80)
211 tmp = 0xFFFD;
212 return tmp;
213 } else if((state & 0xF0) == 0xE0) {
214 //First 2 bytes of 3-byte sequence.
215 state = 0x4000 + (state & 0x0F) * 64 + (ch & 0x3F);
216 return -1;
217 } else if((state & 0xF8) == 0xF0) {
218 //First 2 bytes of 4-byte sequence.
219 state = 0x5000 + (state & 0x07) * 64 + (ch & 0x3F);
220 return -1;
223 return -1;
226 size_t utf8_strlen(const std::string& str) throw()
228 uint16_t s = utf8_initial_state;
229 size_t r = 0;
230 for(size_t i = 0; i < str.length(); i++)
231 if(utf8_parse_byte(static_cast<uint8_t>(str[i]), s) >= 0)
232 r++;
233 if(utf8_parse_byte(-1, s) >= 0)
234 r++;
235 return r;
238 std::u32string to_u32string(const std::string& utf8)
240 std::u32string x;
241 x.resize(utf8_strlen(utf8));
242 copy_from_utf8(utf8.begin(), utf8.end(), x.begin());
243 return x;
246 std::string to_u8string(const std::u32string& utf32)
248 std::ostringstream s;
249 for(auto i : utf32) {
250 if(i < 0x80)
251 s << (unsigned char)i;
252 else if(i < 0x800)
253 s << (unsigned char)(0xC0 + (i >> 6)) << (unsigned char)(0x80 + (i & 0x3F));
254 else if(i < 0x10000)
255 s << (unsigned char)(0xE0 + (i >> 12)) << (unsigned char)(0x80 + ((i >> 6) & 0x3F))
256 << (unsigned char)(0x80 + (i & 0x3F));
257 else if(i < 0x10FFFF)
258 s << (unsigned char)(0xF0 + (i >> 18)) << (unsigned char)(0x80 + ((i >> 12) & 0x3F))
259 << (unsigned char)(0x80 + ((i >> 6) & 0x3F))
260 << (unsigned char)(0x80 + (i & 0x3F));
262 return s.str();