1 /***************************************************************************
2 * Copyright (C) 2008-2016 by Andrzej Rybczak *
3 * electricityispower@gmail.com *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
19 ***************************************************************************/
22 #include <boost/algorithm/string/replace.hpp>
23 #include "utility/html.h"
25 std::string
unescapeHtmlUtf8(const std::string
&data
)
28 for (size_t i
= 0, j
; i
< data
.length(); ++i
)
30 if (data
[i
] == '&' && data
[i
+1] == '#' && (j
= data
.find(';', i
)) != std::string::npos
)
32 int n
= atoi(&data
.c_str()[i
+2]);
35 result
+= (0xe0 | ((n
>> 12) & 0x0f));
36 result
+= (0x80 | ((n
>> 6) & 0x3f));
37 result
+= (0x80 | (n
& 0x3f));
41 result
+= (0xc0 | ((n
>> 6) & 0x1f));
42 result
+= (0x80 | (n
& 0x3f));
54 void unescapeHtmlEntities(std::string
&s
)
56 // well, at least some of them.
57 boost::replace_all(s
, "&", "&");
58 boost::replace_all(s
, ">", ">");
59 boost::replace_all(s
, "<", "<");
60 boost::replace_all(s
, " ", " ");
61 boost::replace_all(s
, """, "\"");
62 boost::replace_all(s
, "–", "–");
63 boost::replace_all(s
, "—", "—");
66 void stripHtmlTags(std::string
&s
)
68 // Erase newlines so they don't duplicate with HTML ones.
69 s
.erase(std::remove_if(s
.begin(), s
.end(), [](char c
) {
70 return c
== '\n' || c
== '\r';
74 for (size_t i
= s
.find("<"); i
!= std::string::npos
; i
= s
.find("<"))
76 size_t j
= s
.find(">", i
);
77 if (j
!= std::string::npos
)
81 = s
.compare(i
, std::min
<size_t>(3, j
-i
), "<p ") == 0
82 || s
.compare(i
, j
-i
, "<p>") == 0
83 || s
.compare(i
, j
-i
, "</p>") == 0
84 || s
.compare(i
, j
-i
, "<br>") == 0
85 || s
.compare(i
, j
-i
, "<br/>") == 0
86 || s
.compare(i
, std::min
<size_t>(4, j
-i
), "<br ") == 0;
88 s
.replace(i
, j
-i
, "\n");
90 s
.replace(i
, j
-i
, "");
95 unescapeHtmlEntities(s
);