1 /***************************************************************************
2 * Copyright (C) 2008-2014 by Andrzej Rybczak *
3 * electricityispower@gmail.com *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
19 ***************************************************************************/
22 #include "curl_handle.h"
24 #ifdef HAVE_CURL_CURL_H
28 #include <boost/algorithm/string/replace.hpp>
29 #include <boost/algorithm/string/trim.hpp>
30 #include <boost/regex.hpp>
33 #include "lyrics_fetcher.h"
34 #include "utility/html.h"
35 #include "utility/string.h"
37 LyricsFetcher
*lyricsPlugins
[] =
39 new LyricwikiFetcher(),
40 new AzLyricsFetcher(),
42 new LyricsmaniaFetcher(),
43 new MetrolyricsFetcher(),
44 new JustSomeLyricsFetcher(),
45 new InternetLyricsFetcher(),
49 const char LyricsFetcher::msgNotFound
[] = "Not found";
51 LyricsFetcher::Result
LyricsFetcher::fetch(const std::string
&artist
, const std::string
&title
)
56 std::string url
= urlTemplate();
57 boost::replace_all(url
, "%artist%", artist
);
58 boost::replace_all(url
, "%title%", title
);
61 CURLcode code
= Curl::perform(data
, url
);
65 result
.second
= curl_easy_strerror(code
);
69 auto lyrics
= getContent(regex(), data
);
71 if (lyrics
.empty() || notLyrics(data
))
73 result
.second
= msgNotFound
;
78 for (auto it
= lyrics
.begin(); it
!= lyrics
.end(); ++it
)
84 if (it
!= lyrics
.end()-1)
85 data
+= "\n\n----------\n\n";
94 std::vector
<std::string
> LyricsFetcher::getContent(const char *regex_
, const std::string
&data
)
96 std::vector
<std::string
> result
;
97 boost::regex
rx(regex_
);
98 auto first
= boost::sregex_iterator(data
.begin(), data
.end(), rx
);
99 auto last
= boost::sregex_iterator();
100 for (; first
!= last
; ++first
)
101 result
.push_back(first
->str(1));
105 void LyricsFetcher::postProcess(std::string
&data
)
111 /***********************************************************************/
113 LyricsFetcher::Result
LyricwikiFetcher::fetch(const std::string
&artist
, const std::string
&title
)
115 LyricsFetcher::Result result
= LyricsFetcher::fetch(artist
, title
);
116 if (result
.first
== true)
118 result
.first
= false;
121 CURLcode code
= Curl::perform(data
, result
.second
);
123 if (code
!= CURLE_OK
)
125 result
.second
= curl_easy_strerror(code
);
129 auto lyrics
= getContent("<div class='lyricbox'><script>.*?</script>(.*?)<!--", data
);
133 result
.second
= msgNotFound
;
136 std::transform(lyrics
.begin(), lyrics
.end(), lyrics
.begin(), unescapeHtmlUtf8
);
137 bool license_restriction
= std::any_of(lyrics
.begin(), lyrics
.end(), [](const std::string
&s
) {
138 return s
.find("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") != std::string::npos
;
140 if (license_restriction
)
142 result
.second
= "Licence restriction";
147 for (auto it
= lyrics
.begin(); it
!= lyrics
.end(); ++it
)
149 boost::replace_all(*it
, "<br />", "\n");
155 if (it
!= lyrics
.end()-1)
156 data
+= "\n\n----------\n\n";
160 result
.second
= data
;
166 bool LyricwikiFetcher::notLyrics(const std::string
&data
)
168 return data
.find("action=edit") != std::string::npos
;
171 /**********************************************************************/
173 LyricsFetcher::Result
GoogleLyricsFetcher::fetch(const std::string
&artist
, const std::string
&title
)
176 result
.first
= false;
178 std::string search_str
= artist
;
181 search_str
+= "+%2B";
182 search_str
+= siteKeyword();
184 std::string google_url
= "http://www.google.com/search?hl=en&ie=UTF-8&oe=UTF-8&q=";
185 google_url
+= search_str
;
186 google_url
+= "&btnI=I%27m+Feeling+Lucky";
189 CURLcode code
= Curl::perform(data
, google_url
, google_url
);
191 if (code
!= CURLE_OK
)
193 result
.second
= curl_easy_strerror(code
);
197 auto urls
= getContent("<A HREF=\"(.*?)\">here</A>", data
);
199 if (urls
.empty() || !isURLOk(urls
[0]))
201 result
.second
= msgNotFound
;
205 data
= unescapeHtmlUtf8(urls
[0]);
208 return LyricsFetcher::fetch("", "");
211 bool GoogleLyricsFetcher::isURLOk(const std::string
&url
)
213 return url
.find(siteKeyword()) != std::string::npos
;
216 /**********************************************************************/
218 void Sing365Fetcher::postProcess(std::string
&data
)
221 data
= boost::regex_replace(data
, boost::regex("<div.*</div>"), "");
222 LyricsFetcher::postProcess(data
);
225 /**********************************************************************/
227 void MetrolyricsFetcher::postProcess(std::string
&data
)
229 // some of lyrics have both \n chars and <br />, html tags
230 // are always present whereas \n chars are not, so we need to
231 // throw them away to avoid having line breaks doubled.
232 boost::replace_all(data
, " ", "");
233 boost::replace_all(data
, "<br />", "\n");
234 data
= unescapeHtmlUtf8(data
);
235 LyricsFetcher::postProcess(data
);
238 bool MetrolyricsFetcher::isURLOk(const std::string
&url
)
240 // it sometimes return link to sitemap.xml, which is huge so we need to discard it
241 return GoogleLyricsFetcher::isURLOk(url
) && url
.find("sitemap") == std::string::npos
;
244 /**********************************************************************/
246 LyricsFetcher::Result
InternetLyricsFetcher::fetch(const std::string
&artist
, const std::string
&title
)
248 GoogleLyricsFetcher::fetch(artist
, title
);
249 LyricsFetcher::Result result
;
250 result
.first
= false;
251 result
.second
= "The following site may contain lyrics for this song: ";
252 result
.second
+= URL
;
256 bool InternetLyricsFetcher::isURLOk(const std::string
&url
)
262 #endif // HAVE_CURL_CURL_H