Restore curses after running external command
[ncmpcpp.git] / src / lyrics_fetcher.cpp
blobb32230cf1010e804c235010ca252e221b16ce824
1 /***************************************************************************
2 * Copyright (C) 2008-2017 by Andrzej Rybczak *
3 * electricityispower@gmail.com *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
19 ***************************************************************************/
21 #include "config.h"
22 #include "curl_handle.h"
24 #include <cstdlib>
25 #include <cstring>
26 #include <boost/algorithm/string/join.hpp>
27 #include <boost/algorithm/string/replace.hpp>
28 #include <boost/algorithm/string/split.hpp>
29 #include <boost/algorithm/string/trim.hpp>
30 #include <boost/regex.hpp>
32 #include "charset.h"
33 #include "lyrics_fetcher.h"
34 #include "utility/html.h"
35 #include "utility/string.h"
37 std::istream &operator>>(std::istream &is, LyricsFetcher_ &fetcher)
39 std::string s;
40 is >> s;
41 if (s == "lyricwiki")
42 fetcher = std::make_unique<LyricwikiFetcher>();
43 else if (s == "azlyrics")
44 fetcher = std::make_unique<AzLyricsFetcher>();
45 else if (s == "genius")
46 fetcher = std::make_unique<GeniusFetcher>();
47 else if (s == "sing365")
48 fetcher = std::make_unique<Sing365Fetcher>();
49 else if (s == "lyricsmania")
50 fetcher = std::make_unique<LyricsmaniaFetcher>();
51 else if (s == "metrolyrics")
52 fetcher = std::make_unique<MetrolyricsFetcher>();
53 else if (s == "justsomelyrics")
54 fetcher = std::make_unique<JustSomeLyricsFetcher>();
55 else if (s == "jahlyrics")
56 fetcher = std::make_unique<JahLyricsFetcher>();
57 else if (s == "plyrics")
58 fetcher = std::make_unique<PLyricsFetcher>();
59 else if (s == "tekstowo")
60 fetcher = std::make_unique<TekstowoFetcher>();
61 else if (s == "internet")
62 fetcher = std::make_unique<InternetLyricsFetcher>();
63 else
64 is.setstate(std::ios::failbit);
65 return is;
68 const char LyricsFetcher::msgNotFound[] = "Not found";
70 LyricsFetcher::Result LyricsFetcher::fetch(const std::string &artist,
71 const std::string &title)
73 Result result;
74 result.first = false;
76 std::string url = urlTemplate();
77 boost::replace_all(url, "%artist%", Curl::escape(artist));
78 boost::replace_all(url, "%title%", Curl::escape(title));
80 std::string data;
81 CURLcode code = Curl::perform(data, url, "", true);
83 if (code != CURLE_OK)
85 result.second = curl_easy_strerror(code);
86 return result;
89 auto lyrics = getContent(regex(), data);
91 if (lyrics.empty() || notLyrics(data))
93 result.second = msgNotFound;
94 return result;
97 data.clear();
98 for (auto it = lyrics.begin(); it != lyrics.end(); ++it)
100 postProcess(*it);
101 if (!it->empty())
103 data += *it;
104 if (it != lyrics.end()-1)
105 data += "\n\n----------\n\n";
109 result.second = data;
110 result.first = true;
111 return result;
114 std::vector<std::string> LyricsFetcher::getContent(const char *regex_,
115 const std::string &data)
117 std::vector<std::string> result;
118 boost::regex rx(regex_);
119 auto first = boost::sregex_iterator(data.begin(), data.end(), rx);
120 auto last = boost::sregex_iterator();
121 for (; first != last; ++first)
123 std::string content;
124 for (size_t i = 1; i < first->size(); ++i)
125 content += first->str(i);
126 result.push_back(std::move(content));
128 return result;
131 void LyricsFetcher::postProcess(std::string &data) const
133 data = unescapeHtmlUtf8(data);
134 stripHtmlTags(data);
135 // Remove indentation from each line and collapse multiple newlines into one.
136 std::vector<std::string> lines;
137 boost::split(lines, data, boost::is_any_of("\n"));
138 for (auto &line : lines)
139 boost::trim(line);
140 std::unique(lines.begin(), lines.end(), [](std::string &a, std::string &b) {
141 return a.empty() && b.empty();
143 data = boost::algorithm::join(lines, "\n");
144 boost::trim(data);
147 /***********************************************************************/
149 LyricsFetcher::Result LyricwikiFetcher::fetch(const std::string &artist,
150 const std::string &title)
152 LyricsFetcher::Result result = LyricsFetcher::fetch(artist, title);
153 if (result.first == true)
155 result.first = false;
157 std::string data;
158 CURLcode code = Curl::perform(data, result.second, "", true);
160 if (code != CURLE_OK)
162 result.second = curl_easy_strerror(code);
163 return result;
166 auto lyrics = getContent("<div class='lyricbox'>(.*?)</div>", data);
168 if (lyrics.empty())
170 result.second = msgNotFound;
171 return result;
173 std::transform(lyrics.begin(), lyrics.end(), lyrics.begin(), unescapeHtmlUtf8);
174 bool license_restriction = std::any_of(lyrics.begin(), lyrics.end(), [](const std::string &s) {
175 return s.find("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") != std::string::npos;
177 if (license_restriction)
179 result.second = "Licence restriction";
180 return result;
183 data.clear();
184 for (auto it = lyrics.begin(); it != lyrics.end(); ++it)
186 stripHtmlTags(*it);
187 boost::trim(*it);
188 if (!it->empty())
190 data += *it;
191 if (it != lyrics.end()-1)
192 data += "\n\n----------\n\n";
196 result.second = data;
197 result.first = true;
199 return result;
202 bool LyricwikiFetcher::notLyrics(const std::string &data) const
204 return data.find("action=edit") != std::string::npos;
207 /**********************************************************************/
209 LyricsFetcher::Result GoogleLyricsFetcher::fetch(const std::string &artist,
210 const std::string &title)
212 Result result;
213 result.first = false;
215 std::string search_str;
216 if (siteKeyword() != nullptr)
218 search_str += "site:";
219 search_str += Curl::escape(siteKeyword());
221 else
222 search_str = "lyrics";
223 search_str += "+";
224 search_str += Curl::escape(artist);
225 search_str += "+";
226 search_str += Curl::escape(title);
228 std::string google_url = "http://www.google.com/search?hl=en&ie=UTF-8&oe=UTF-8&q=";
229 google_url += search_str;
230 google_url += "&btnI=I%27m+Feeling+Lucky";
232 std::string data;
233 CURLcode code = Curl::perform(data, google_url, google_url);
235 if (code != CURLE_OK)
237 result.second = curl_easy_strerror(code);
238 return result;
241 auto urls = getContent("<A HREF=\"(.*?)\">here</A>", data);
243 if (urls.empty() || !isURLOk(urls[0]))
245 result.second = msgNotFound;
246 return result;
249 data = unescapeHtmlUtf8(urls[0]);
251 URL = data.c_str();
252 return LyricsFetcher::fetch("", "");
255 bool GoogleLyricsFetcher::isURLOk(const std::string &url)
257 return url.find(siteKeyword()) != std::string::npos;
260 /**********************************************************************/
262 bool MetrolyricsFetcher::isURLOk(const std::string &url)
264 // it sometimes return link to sitemap.xml, which is huge so we need to discard it
265 return GoogleLyricsFetcher::isURLOk(url) && url.find("sitemap") == std::string::npos;
268 /**********************************************************************/
270 LyricsFetcher::Result InternetLyricsFetcher::fetch(const std::string &artist,
271 const std::string &title)
273 GoogleLyricsFetcher::fetch(artist, title);
274 LyricsFetcher::Result result;
275 result.first = false;
276 result.second = "The following site may contain lyrics for this song: ";
277 result.second += URL;
278 return result;
281 bool InternetLyricsFetcher::isURLOk(const std::string &url)
283 URL = url;
284 return false;