song list: get rid of boost::zip_iterator and improve {Const,}SongIterator
[ncmpcpp.git] / src / utility / html.cpp
blobe79ef503deb87d30b5a1b4e754a45a1bde864cdc
1 /***************************************************************************
2 * Copyright (C) 2008-2016 by Andrzej Rybczak *
3 * electricityispower@gmail.com *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
19 ***************************************************************************/
21 #include <algorithm>
22 #include <boost/algorithm/string/replace.hpp>
23 #include "utility/html.h"
25 std::string unescapeHtmlUtf8(const std::string &data)
27 std::string result;
28 for (size_t i = 0, j; i < data.length(); ++i)
30 if (data[i] == '&' && data[i+1] == '#' && (j = data.find(';', i)) != std::string::npos)
32 int n = atoi(&data.c_str()[i+2]);
33 if (n >= 0x800)
35 result += (0xe0 | ((n >> 12) & 0x0f));
36 result += (0x80 | ((n >> 6) & 0x3f));
37 result += (0x80 | (n & 0x3f));
39 else if (n >= 0x80)
41 result += (0xc0 | ((n >> 6) & 0x1f));
42 result += (0x80 | (n & 0x3f));
44 else
45 result += n;
46 i = j;
48 else
49 result += data[i];
51 return result;
54 void unescapeHtmlEntities(std::string &s)
56 // well, at least some of them.
57 boost::replace_all(s, "&amp;", "&");
58 boost::replace_all(s, "&gt;", ">");
59 boost::replace_all(s, "&lt;", "<");
60 boost::replace_all(s, "&nbsp;", " ");
61 boost::replace_all(s, "&quot;", "\"");
62 boost::replace_all(s, "&ndash;", "–");
63 boost::replace_all(s, "&mdash;", "—");
66 void stripHtmlTags(std::string &s)
68 // Erase newlines so they don't duplicate with HTML ones.
69 s.erase(std::remove_if(s.begin(), s.end(), [](char c) {
70 return c == '\n' || c == '\r';
71 }), s.end());
73 bool is_newline;
74 for (size_t i = s.find("<"); i != std::string::npos; i = s.find("<"))
76 size_t j = s.find(">", i);
77 if (j != std::string::npos)
79 ++j;
80 is_newline
81 = s.compare(i, std::min<size_t>(3, j-i), "<p ") == 0
82 || s.compare(i, j-i, "<p>") == 0
83 || s.compare(i, j-i, "</p>") == 0
84 || s.compare(i, j-i, "<br>") == 0
85 || s.compare(i, j-i, "<br/>") == 0
86 || s.compare(i, std::min<size_t>(4, j-i), "<br ") == 0;
87 if (is_newline)
88 s.replace(i, j-i, "\n");
89 else
90 s.replace(i, j-i, "");
92 else
93 break;
95 unescapeHtmlEntities(s);