lyrics fetcher: add support for genius.com
[ncmpcpp.git] / src / lyrics_fetcher.cpp
blob4f0702d7f39f126c87ff256ed115aab096c362e9
1 /***************************************************************************
2 * Copyright (C) 2008-2016 by Andrzej Rybczak *
3 * electricityispower@gmail.com *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
19 ***************************************************************************/
21 #include "config.h"
22 #include "curl_handle.h"
24 #ifdef HAVE_CURL_CURL_H
26 #include <cstdlib>
27 #include <cstring>
28 #include <boost/algorithm/string/join.hpp>
29 #include <boost/algorithm/string/replace.hpp>
30 #include <boost/algorithm/string/split.hpp>
31 #include <boost/algorithm/string/trim.hpp>
32 #include <boost/regex.hpp>
34 #include "charset.h"
35 #include "lyrics_fetcher.h"
36 #include "utility/html.h"
37 #include "utility/string.h"
39 LyricsFetcher *lyricsPlugins[] =
41 new LyricwikiFetcher(),
42 new GeniusLyricsFetcher(),
43 new AzLyricsFetcher(),
44 new Sing365Fetcher(),
45 new LyricsmaniaFetcher(),
46 new MetrolyricsFetcher(),
47 new JustSomeLyricsFetcher(),
48 new InternetLyricsFetcher(),
52 const char LyricsFetcher::msgNotFound[] = "Not found";
54 LyricsFetcher::Result LyricsFetcher::fetch(const std::string &artist, const std::string &title)
56 Result result;
57 result.first = false;
59 std::string url = urlTemplate();
60 boost::replace_all(url, "%artist%", artist);
61 boost::replace_all(url, "%title%", title);
63 std::string data;
64 CURLcode code = Curl::perform(data, url);
66 if (code != CURLE_OK)
68 result.second = curl_easy_strerror(code);
69 return result;
72 auto lyrics = getContent(regex(), data);
74 if (lyrics.empty() || notLyrics(data))
76 result.second = msgNotFound;
77 return result;
80 data.clear();
81 for (auto it = lyrics.begin(); it != lyrics.end(); ++it)
83 postProcess(*it);
84 if (!it->empty())
86 data += *it;
87 if (it != lyrics.end()-1)
88 data += "\n\n----------\n\n";
92 result.second = data;
93 result.first = true;
94 return result;
97 std::vector<std::string> LyricsFetcher::getContent(const char *regex_, const std::string &data)
99 std::vector<std::string> result;
100 boost::regex rx(regex_);
101 auto first = boost::sregex_iterator(data.begin(), data.end(), rx);
102 auto last = boost::sregex_iterator();
103 for (; first != last; ++first)
104 result.push_back(first->str(1));
105 return result;
108 void LyricsFetcher::postProcess(std::string &data) const
110 data = unescapeHtmlUtf8(data);
111 stripHtmlTags(data);
112 // Remove indentation from each line and collapse multiple newlines into one.
113 std::vector<std::string> lines;
114 boost::split(lines, data, boost::is_any_of("\n"));
115 for (auto &line : lines)
116 boost::trim(line);
117 std::unique(lines.begin(), lines.end(), [](std::string &a, std::string &b) {
118 return a.empty() && b.empty();
120 data = boost::algorithm::join(lines, "\n");
121 boost::trim(data);
124 /***********************************************************************/
126 LyricsFetcher::Result LyricwikiFetcher::fetch(const std::string &artist, const std::string &title)
128 LyricsFetcher::Result result = LyricsFetcher::fetch(artist, title);
129 if (result.first == true)
131 result.first = false;
133 std::string data;
134 CURLcode code = Curl::perform(data, result.second, "", true);
136 if (code != CURLE_OK)
138 result.second = curl_easy_strerror(code);
139 return result;
142 auto lyrics = getContent("<div class='lyricbox'>(.*?)</div>", data);
144 if (lyrics.empty())
146 result.second = msgNotFound;
147 return result;
149 std::transform(lyrics.begin(), lyrics.end(), lyrics.begin(), unescapeHtmlUtf8);
150 bool license_restriction = std::any_of(lyrics.begin(), lyrics.end(), [](const std::string &s) {
151 return s.find("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") != std::string::npos;
153 if (license_restriction)
155 result.second = "Licence restriction";
156 return result;
159 data.clear();
160 for (auto it = lyrics.begin(); it != lyrics.end(); ++it)
162 stripHtmlTags(*it);
163 boost::trim(*it);
164 if (!it->empty())
166 data += *it;
167 if (it != lyrics.end()-1)
168 data += "\n\n----------\n\n";
172 result.second = data;
173 result.first = true;
175 return result;
178 bool LyricwikiFetcher::notLyrics(const std::string &data) const
180 return data.find("action=edit") != std::string::npos;
183 /**********************************************************************/
185 LyricsFetcher::Result GoogleLyricsFetcher::fetch(const std::string &artist, const std::string &title)
187 Result result;
188 result.first = false;
190 std::string search_str = artist;
191 search_str += "+";
192 search_str += title;
193 search_str += "+%2B";
194 search_str += siteKeyword();
196 std::string google_url = "http://www.google.com/search?hl=en&ie=UTF-8&oe=UTF-8&q=";
197 google_url += search_str;
198 google_url += "&btnI=I%27m+Feeling+Lucky";
200 std::string data;
201 CURLcode code = Curl::perform(data, google_url, google_url);
203 if (code != CURLE_OK)
205 result.second = curl_easy_strerror(code);
206 return result;
209 auto urls = getContent("<A HREF=\"(.*?)\">here</A>", data);
211 if (urls.empty() || !isURLOk(urls[0]))
213 result.second = msgNotFound;
214 return result;
217 data = unescapeHtmlUtf8(urls[0]);
219 URL = data.c_str();
220 return LyricsFetcher::fetch("", "");
223 bool GoogleLyricsFetcher::isURLOk(const std::string &url)
225 return url.find(siteKeyword()) != std::string::npos;
228 /**********************************************************************/
230 bool MetrolyricsFetcher::isURLOk(const std::string &url)
232 // it sometimes return link to sitemap.xml, which is huge so we need to discard it
233 return GoogleLyricsFetcher::isURLOk(url) && url.find("sitemap") == std::string::npos;
236 /**********************************************************************/
238 LyricsFetcher::Result InternetLyricsFetcher::fetch(const std::string &artist, const std::string &title)
240 GoogleLyricsFetcher::fetch(artist, title);
241 LyricsFetcher::Result result;
242 result.first = false;
243 result.second = "The following site may contain lyrics for this song: ";
244 result.second += URL;
245 return result;
248 bool InternetLyricsFetcher::isURLOk(const std::string &url)
250 URL = url;
251 return false;
254 #endif // HAVE_CURL_CURL_H