set SIGWINCH handler before initializing ncurses to avoid races
[ncmpcpp.git] / src / lyrics_fetcher.cpp
blobbaeeb11f296e84b05988d9e56996248663cf2cc3
1 /***************************************************************************
2 * Copyright (C) 2008-2014 by Andrzej Rybczak *
3 * electricityispower@gmail.com *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
19 ***************************************************************************/
21 #include "config.h"
22 #include "curl_handle.h"
24 #ifdef HAVE_CURL_CURL_H
26 #include <cstdlib>
27 #include <cstring>
28 #include <boost/algorithm/string/replace.hpp>
29 #include <boost/algorithm/string/trim.hpp>
30 #include <boost/regex.hpp>
32 #include "charset.h"
33 #include "lyrics_fetcher.h"
34 #include "utility/html.h"
35 #include "utility/string.h"
37 LyricsFetcher *lyricsPlugins[] =
39 new LyricwikiFetcher(),
40 new AzLyricsFetcher(),
41 new Sing365Fetcher(),
42 new LyricsmaniaFetcher(),
43 new MetrolyricsFetcher(),
44 new JustSomeLyricsFetcher(),
45 new InternetLyricsFetcher(),
49 const char LyricsFetcher::msgNotFound[] = "Not found";
51 LyricsFetcher::Result LyricsFetcher::fetch(const std::string &artist, const std::string &title)
53 Result result;
54 result.first = false;
56 std::string url = urlTemplate();
57 boost::replace_all(url, "%artist%", artist);
58 boost::replace_all(url, "%title%", title);
60 std::string data;
61 CURLcode code = Curl::perform(data, url);
63 if (code != CURLE_OK)
65 result.second = curl_easy_strerror(code);
66 return result;
69 auto lyrics = getContent(regex(), data);
71 if (lyrics.empty() || notLyrics(data))
73 result.second = msgNotFound;
74 return result;
77 data.clear();
78 for (auto it = lyrics.begin(); it != lyrics.end(); ++it)
80 postProcess(*it);
81 if (!it->empty())
83 data += *it;
84 if (it != lyrics.end()-1)
85 data += "\n\n----------\n\n";
89 result.second = data;
90 result.first = true;
91 return result;
94 std::vector<std::string> LyricsFetcher::getContent(const char *regex_, const std::string &data)
96 std::vector<std::string> result;
97 boost::regex rx(regex_);
98 auto first = boost::sregex_iterator(data.begin(), data.end(), rx);
99 auto last = boost::sregex_iterator();
100 for (; first != last; ++first)
101 result.push_back(first->str(1));
102 return result;
105 void LyricsFetcher::postProcess(std::string &data)
107 stripHtmlTags(data);
108 boost::trim(data);
111 /***********************************************************************/
113 LyricsFetcher::Result LyricwikiFetcher::fetch(const std::string &artist, const std::string &title)
115 LyricsFetcher::Result result = LyricsFetcher::fetch(artist, title);
116 if (result.first == true)
118 result.first = false;
120 std::string data;
121 CURLcode code = Curl::perform(data, result.second);
123 if (code != CURLE_OK)
125 result.second = curl_easy_strerror(code);
126 return result;
129 auto lyrics = getContent("<div class='lyricbox'><script>.*?</script>(.*?)<!--", data);
131 if (lyrics.empty())
133 result.second = msgNotFound;
134 return result;
136 std::transform(lyrics.begin(), lyrics.end(), lyrics.begin(), unescapeHtmlUtf8);
137 bool license_restriction = std::any_of(lyrics.begin(), lyrics.end(), [](const std::string &s) {
138 return s.find("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") != std::string::npos;
140 if (license_restriction)
142 result.second = "Licence restriction";
143 return result;
146 data.clear();
147 for (auto it = lyrics.begin(); it != lyrics.end(); ++it)
149 boost::replace_all(*it, "<br />", "\n");
150 stripHtmlTags(*it);
151 boost::trim(*it);
152 if (!it->empty())
154 data += *it;
155 if (it != lyrics.end()-1)
156 data += "\n\n----------\n\n";
160 result.second = data;
161 result.first = true;
163 return result;
166 bool LyricwikiFetcher::notLyrics(const std::string &data)
168 return data.find("action=edit") != std::string::npos;
171 /**********************************************************************/
173 LyricsFetcher::Result GoogleLyricsFetcher::fetch(const std::string &artist, const std::string &title)
175 Result result;
176 result.first = false;
178 std::string search_str = artist;
179 search_str += "+";
180 search_str += title;
181 search_str += "+%2B";
182 search_str += siteKeyword();
184 std::string google_url = "http://www.google.com/search?hl=en&ie=UTF-8&oe=UTF-8&q=";
185 google_url += search_str;
186 google_url += "&btnI=I%27m+Feeling+Lucky";
188 std::string data;
189 CURLcode code = Curl::perform(data, google_url, google_url);
191 if (code != CURLE_OK)
193 result.second = curl_easy_strerror(code);
194 return result;
197 auto urls = getContent("<A HREF=\"(.*?)\">here</A>", data);
199 if (urls.empty() || !isURLOk(urls[0]))
201 result.second = msgNotFound;
202 return result;
205 data = unescapeHtmlUtf8(urls[0]);
207 URL = data.c_str();
208 return LyricsFetcher::fetch("", "");
211 bool GoogleLyricsFetcher::isURLOk(const std::string &url)
213 return url.find(siteKeyword()) != std::string::npos;
216 /**********************************************************************/
218 void Sing365Fetcher::postProcess(std::string &data)
220 // throw away ad
221 data = boost::regex_replace(data, boost::regex("<div.*</div>"), "");
222 LyricsFetcher::postProcess(data);
225 /**********************************************************************/
227 void MetrolyricsFetcher::postProcess(std::string &data)
229 // some of lyrics have both \n chars and <br />, html tags
230 // are always present whereas \n chars are not, so we need to
231 // throw them away to avoid having line breaks doubled.
232 boost::replace_all(data, "&#10;", "");
233 boost::replace_all(data, "<br />", "\n");
234 data = unescapeHtmlUtf8(data);
235 LyricsFetcher::postProcess(data);
238 bool MetrolyricsFetcher::isURLOk(const std::string &url)
240 // it sometimes return link to sitemap.xml, which is huge so we need to discard it
241 return GoogleLyricsFetcher::isURLOk(url) && url.find("sitemap") == std::string::npos;
244 /**********************************************************************/
246 LyricsFetcher::Result InternetLyricsFetcher::fetch(const std::string &artist, const std::string &title)
248 GoogleLyricsFetcher::fetch(artist, title);
249 LyricsFetcher::Result result;
250 result.first = false;
251 result.second = "The following site may contain lyrics for this song: ";
252 result.second += URL;
253 return result;
256 bool InternetLyricsFetcher::isURLOk(const std::string &url)
258 URL = url;
259 return false;
262 #endif // HAVE_CURL_CURL_H