1 #include <boost/regex.hpp>
2 #include "parseval.hpp"
4 int string_to_bool(const std::string
& x
)
7 for(size_t i
= 0; i
< y
.length(); i
++)
9 if(y
== "on" || y
== "true" || y
== "yes" || y
== "1" || y
== "enable" || y
== "enabled")
11 if(y
== "off" || y
== "false" || y
== "no" || y
== "0" || y
== "disable" || y
== "disabled")
16 regex_results::regex_results()
21 regex_results::regex_results(std::vector
<std::string
> res
)
27 regex_results::operator bool() const
32 bool regex_results::operator!() const
37 size_t regex_results::size() const
39 return results
.size();
41 const std::string
& regex_results::operator[](size_t i
) const
46 regex_results
regex(const std::string
& regexp
, const std::string
& str
, const char* ex
) throw(std::bad_alloc
,
49 static std::map
<std::string
, boost::regex
*> regexps
;
50 if(!regexps
.count(regexp
)) {
51 boost::regex
* y
= NULL
;
53 y
= new boost::regex(regexp
, boost::regex::extended
& ~boost::regex::collate
);
55 } catch(std::bad_alloc
& e
) {
58 } catch(std::exception
& e
) {
59 throw std::runtime_error(e
.what());
63 boost::smatch matches
;
64 bool x
= boost::regex_match(str
.begin(), str
.end(), matches
, *(regexps
[regexp
]));
66 std::vector
<std::string
> res
;
67 for(size_t i
= 0; i
< matches
.size(); i
++)
68 res
.push_back(matches
.str(i
));
69 return regex_results(res
);
71 throw std::runtime_error(ex
);
73 return regex_results();
76 bool regex_match(const std::string
& regexp
, const std::string
& str
) throw(std::bad_alloc
, std::runtime_error
)
78 return regex(regexp
, str
);
83 //First nibble values:
93 //Second nibble values:
94 //0 => Return NO CHARACTER and transition to another state with substate 0.
95 //1 => Return the character and transition to another state with substate 0.
96 //2 => Return invalid character and transition to another state with substate 0.
97 //3 => Memorize character minus 192, return NO CHARACTER and transition to another state.
98 //4 => Memorize character minus 224, return NO CHARACTER and transition to another state.
99 //5 => Memorize character minus 240, return NO CHARACTER and transition to another state.
100 //6 => Memorize byte, return invalid character and transition to another state.
101 //7 => Return 2-byte value and transition to another state.
102 //8 => Combine memorized, return NO CHARACTER and transition to another state.
103 //9 => Return 3-byte value and transition to another state.
104 //A => Return 4-byte value and transition to another state.
105 //B => Handle memorized character and EOF.
106 //C => Handle memorized character and continuation.
107 const unsigned char transitions
[] = {
108 //E //1 //C //2 //3 //4 //I
109 0x00, 0x01, 0x02, 0x13, 0x24, 0x35, 0x02, //INITIAL
110 0x01, 0x66, 0x07, 0x66, 0x66, 0x66, 0x66, //S_2_2
111 0x01, 0x66, 0x48, 0x66, 0x66, 0x66, 0x66, //S_2_3
112 0x01, 0x66, 0x58, 0x66, 0x66, 0x66, 0x66, //S_2_4
113 0x01, 0x66, 0x09, 0x66, 0x66, 0x66, 0x66, //S_3_3
114 0x01, 0x66, 0x88, 0x66, 0x66, 0x66, 0x66, //S_3_4
115 0x0B, 0x6C, 0x6C, 0x6C, 0x6C, 0x6C, 0x6C, //INIT_RE
116 0x01, 0x66, 0x0A, 0x66, 0x66, 0x66, 0x66 //S_4_4
120 extern const uint16_t utf8_initial_state
= 0;
122 int32_t utf8_parse_byte(int ch
, uint16_t& state
) throw()
124 unsigned char mch
= (ch
< 248) ? ch
: 248;
125 uint32_t astate
= state
>> 12;
128 if(astate
> 7) astate
= 7;
129 if(ch
< 0) iclass
= 0;
130 else if(ch
< 128) iclass
= 1;
131 else if(ch
< 192) iclass
= 2;
132 else if(ch
< 224) iclass
= 3;
133 else if(ch
< 240) iclass
= 4;
134 else if(ch
< 248) iclass
= 5;
136 unsigned char ctrl
= transitions
[astate
* 7 + iclass
];
140 state
= (ctrl
& 0xF0) * 256;
143 state
= (ctrl
& 0xF0) * 256;
146 state
= (ctrl
& 0xF0) * 256;
149 state
= (ctrl
& 0xF0) * 256 + ch
- 192;
152 state
= (ctrl
& 0xF0) * 256 + ch
- 224;
155 state
= (ctrl
& 0xF0) * 256 + ch
- 240;
158 state
= (ctrl
& 0xF0) * 256 + mch
;
161 tmp
= (state
& 0xFFF) * 64 + ch
- 128;
164 state
= (ctrl
& 0xF0) * 256;
167 state
= (ctrl
& 0xF0) * 256 + (state
& 0xFFF) * 64 + ch
- 128;
170 tmp
= (state
& 0xFFF) * 64 + ch
- 128;
171 if(tmp
< 0x800 || (tmp
& 0xF800) == 0xD800 || (tmp
& 0xFFFE) == 0xFFFE)
173 state
= (ctrl
& 0xF0) * 256;
176 tmp
= (state
& 0x7FFF) * 64 + ch
- 128;
177 if(tmp
< 0x10000 || tmp
> 0x10FFFD || (tmp
& 0xFFFE) == 0xFFFE)
179 state
= (ctrl
& 0xF0) * 256;
186 state
= (ctrl
& 0xF0) * 256;
190 if((state
& 0x80) == 0) {
192 state
= 0x6000 + mch
;
194 } else if((state
& 0xF8) == 0xF8 || (state
& 0xF8) == 0x80) {
195 //Continuation or invalid.
196 state
= 0x6000 + mch
;
198 } else if(iclass
== 0) {
202 } else if(iclass
!= 2) {
204 state
= 0x6000 + mch
;
206 } else if((state
& 0xE0) == 0xC0) {
207 //Complete 2-byte sequence.
208 tmp
= (state
& 0x1F) * 64 + (ch
& 0x3F);
213 } else if((state
& 0xF0) == 0xE0) {
214 //First 2 bytes of 3-byte sequence.
215 state
= 0x4000 + (state
& 0x0F) * 64 + (ch
& 0x3F);
217 } else if((state
& 0xF8) == 0xF0) {
218 //First 2 bytes of 4-byte sequence.
219 state
= 0x5000 + (state
& 0x07) * 64 + (ch
& 0x3F);
226 size_t utf8_strlen(const std::string
& str
) throw()
228 uint16_t s
= utf8_initial_state
;
230 for(size_t i
= 0; i
< str
.length(); i
++)
231 if(utf8_parse_byte(static_cast<uint8_t>(str
[i
]), s
) >= 0)
233 if(utf8_parse_byte(-1, s
) >= 0)
238 std::u32string
to_u32string(const std::string
& utf8
)
241 x
.resize(utf8_strlen(utf8
));
242 copy_from_utf8(utf8
.begin(), utf8
.end(), x
.begin());
246 std::string
to_u8string(const std::u32string
& utf32
)
248 std::ostringstream s
;
249 for(auto i
: utf32
) {
251 s
<< (unsigned char)i
;
253 s
<< (unsigned char)(0xC0 + (i
>> 6)) << (unsigned char)(0x80 + (i
& 0x3F));
255 s
<< (unsigned char)(0xE0 + (i
>> 12)) << (unsigned char)(0x80 + ((i
>> 6) & 0x3F))
256 << (unsigned char)(0x80 + (i
& 0x3F));
257 else if(i
< 0x10FFFF)
258 s
<< (unsigned char)(0xF0 + (i
>> 18)) << (unsigned char)(0x80 + ((i
>> 12) & 0x3F))
259 << (unsigned char)(0x80 + ((i
>> 6) & 0x3F))
260 << (unsigned char)(0x80 + (i
& 0x3F));