2 Copyright 2013 Karel Matas
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
28 #include <FL/fl_utf8.h>
36 * \note http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&item_id=IWS-Appendix
37 * \note http://home.telfort.nl/~t876506/utf8tbl.html
47 * Max. length of tagname in parse_markup()
48 * \note should be enough for now (tags without arguments)
50 const int MAX_TAGNAME_LENGTH
= 32;
52 class CantOpenFile
: public std::exception
{
56 CantOpenFile(const string
&filename
): msg_(filename
) {};
57 const char *what() const noexcept
{return msg_
.c_str();};
60 class ParsingError
: public std::exception
{
64 ParsingError(const string
&filename
): msg_(filename
) {};
65 const char *what() const noexcept
{return msg_
.c_str();};
69 //! Histogram. Counts inserted items. Returns items sorted by their key or counts.
79 //! Reset the histogram.
80 inline void clear () { map_
.clear(); }
82 //! Add an item to the histogram.
83 inline void add ( const T
&item
){
84 if ( map_
.find(item
) == map_
.end() )
90 //! Add vector of items to the histogram.
91 inline void add ( const vector
<T
> &v
) {
96 //! Returns items sorted by the key.
97 inline std::map
<T
,int> map () { return map_
; }
99 //! Returns items sorted by their count (ascending order).
100 inline std::multimap
<int,T
> sorted () {
101 std::multimap
<int,T
> mm
;
102 for ( auto mi
: map_
)
103 mm
.insert( {mi
.second
,mi
.first
} );
109 * Finds first digit (0-9) in input string.
110 * \param s input string
111 * \return position of first digit in string or length of the string if no digit found
113 inline size_t find_first_digit ( const char *s
)
116 while ( pos
< strlen(s
) ){
117 if ( s
[pos
] > '0' && s
[pos
] < '9' )
126 * Groups similar string in input vector. Similar strings means strings,
127 * which ends with different number.
128 * <b>Example: </b><br>
129 * { "a 1", "d", "b 3", "ddd", "a 4", "b 2" }<br>-><br>
130 * { "a 1", "a 4" }, { "b 2", "b 3" }, { "d" }, { "ddd" }
131 * \param v input vector of strings
132 * \return alphabetically sorted vector of groupped strings
134 vector
<vector
<string
>> separate_groups ( vector
<string
> &v
);
138 * Checks ZLIB error code. In case of an error throws exception with
139 * description of the error.
140 * \exception std::runtime_error
141 * \param err ZLIB error code
143 void check_zlib_error ( int err
);
147 * Decompress gzip file, copies file if it is not gzipped.
148 * Throws exception if cant open file. Calss check_zlib_error().
149 * \exception std::runtime_error
150 * \param infilename input file name
151 * \param outfilename output file name
153 void gzip_decompress_file( const char *infilename
, const char *outfilename
);
157 * Parse string containing integer range and returns minimum and maximum.
158 * Range is separated by -.
162 * 4-8 between 4 and 8
163 * Minimal acceptable value is 0. maximal 99.
164 * \param s string containing integer range
165 * \return { minimal_value, maximal_value }
167 inline std::pair
<int,int> parse_range ( const string
&s
)
172 size_t pos
= s
.find("-");
173 if ( s
.back() == '+' )
174 min
= s
.substr( 0, s
.size()-1 );
175 else if ( s
.back() == '-' )
176 max
= s
.substr( 0, s
.size()-1 );
177 else if ( pos
!= string::npos
){
178 min
= s
.substr(0, pos
);
179 max
= s
.substr( pos
+1, s
.size() );
186 return {std::stoi(min
),std::stoi(max
)};
191 * Tries to guess the filename from an url. Filename is created as substring from url,
192 * beginning after position of the last /.
194 * \return guessed filename or "downloaded.file" when guess failed
196 inline string
guess_fname_from_url ( const string
&url
)
198 size_t pos
= url
.find_last_of("/")+1;
199 if ( pos
!= string::npos
)
200 return url
.substr( pos
);
202 return "downloaded.file";
207 * Convenient function for finding element in the vector.
209 * <pre>return std::find( v.begin(), v.end(), elt ) != v.end();</pre>
212 inline bool is_in ( const vector
<T
> &v
, const T
&elt
){
213 return std::find( v
.begin(), v
.end(), elt
) != v
.end();
218 * Check whether string contains an positive integer.
219 * \return true if s contains only 0-9, false otherwise
221 inline bool isint ( const char *s
)
223 for ( size_t i
=0; i
<strlen(s
); i
++ )
224 if ( s
[i
] < '0' || s
[i
] > '9' )
231 * Reads the string s until character end is found (or \0 until is reached).
232 * Throws an exception if the string is invalid.
233 * \exception ParsingError
234 * \param s [in] input string
235 * \param pos [in,out] starting position in the s, pos is modified in the process
236 * (and contains position of the <b>end</b>)
237 * \param end the input string is read until this character is reached
238 * \return substring starting at input pos and ending before character <b>end</b>
240 inline string
read_until ( const char *s
, size_t *pos
, const char end
)
242 char buff
[strlen(s
)]; // should be enough
245 while ( s
[i
] != end
&& s
[i
] != '\0' )
248 throw ParsingError("read_until(): Invalid string: " + string(s
));
260 string tag
; //!< name of the tag
261 int pos
; //!< position in the raw string (returned by parse_markup())
262 int len
; //!< length of the tagged text
265 bool operator< ( const TextTag
&rhs
) const { return (this->pos
< rhs
.pos
); };
270 * Parses string with simple markup (without attributes and overlapping).
271 * Example: This is an <tag1>example</tag1> string with <tag2>markup</tag2>.
273 * \return String without tags and vector of TextTags.
275 std::pair
<string
,vector
<TextTag
>> parse_markup ( const char *s
);
278 //! Checks whether file path existst (i.e. can be open).
279 inline bool file_exists ( const char *path
)
283 bool ret
= f
.is_open();
288 inline bool file_exists ( const string
&s
) { return file_exists(s
.c_str()); }
292 * Replaces all accurences of the string <i>match</i> in <i>s</i> by <i>repl</i>.
293 * Modifies input string.
294 * \note Dont work with non-ASCII characters (at least with current libstdc++).
297 inline void replace_all ( string
&s
, const string
&match
, const string
&repl
)
300 while ( (pos
= s
.find(match
, pos
)) != string::npos
)
301 s
.replace( pos
, match
.size(), repl
);
306 * Strips whitespace from the start and end of the input string.
307 * \param s string to be stripped
308 * \return copy of the stripped input string
310 inline string
strip ( const char *s
)
313 size_t end
= strlen(s
);
314 while ( isspace(s
[start
]) ) start
++;
315 while ( isspace(s
[end
-1]) && end
> start
) end
--;
316 return string(s
, start
, end
-start
);
321 * Joins elements of the input vector into string with separator sep.
323 * vector<string> v = { "a", "b", "c" }
324 * to_string(v, ":") returns "a:b:c"
325 * \param v input vector
326 * \param sep string to be used as separator between elements of v in resulting string
327 * \return joined string
329 template <class T
=string
>
330 inline string
to_string ( const vector
<T
> &v
, const char *sep
=", " )
332 if ( v
.empty() ) return string("");
333 std::stringstream ss
;
336 ss
<< s
<< ( ( i
++ < v
.size()-1 ) ? sep
:"" );
341 //! Joins elements of the input set into string. See above.
342 template <class T
=string
>
343 inline string
to_string ( const std::set
<T
> &s
, const char *sep
=", " )
344 { return to_string
<T
>( vector
<T
>(s
.begin(),s
.end()), sep
); }
348 * Splits string s by delimiters. Delimiters are same as in strtok().
350 vector
<string
> split_string ( const string
&s
, const char *delimiters
=" " );
353 //! Splits string of integers into vector. See split_string()
354 vector
<int> split_string_int ( const string
&s
, const char *delimiters
=" " );
358 * Splits string to chars. E.g. 'hello' -> 'h','e','l','l','o'
359 * \todo check & process errors
361 vector
<string
> str_to_chars ( const char *s
);
365 * Splits string s to chars and returns vector of their utf8 codes.
367 inline std::vector
<unsigned int> utf8_to_ints ( const char *s
)
369 std::vector
<unsigned int> v
;
371 while ( i
< strlen(s
) ){
372 int len
= fl_utf8len(s
[i
]);;
375 unsigned int dec
= fl_utf8decode( s
+i
, s
+i
+len
,&dlen
);
377 printf("UTF8 decoding error: i: %d, len: %d, dlen: %d", i
, len
, dlen
);
388 #endif // __UTILS_HXX