1 /***************************************************************************
2 * Copyright (C) 2008-2017 by Andrzej Rybczak *
3 * electricityispower@gmail.com *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
19 ***************************************************************************/
22 #include "curl_handle.h"
26 #include <boost/algorithm/string/join.hpp>
27 #include <boost/algorithm/string/replace.hpp>
28 #include <boost/algorithm/string/split.hpp>
29 #include <boost/algorithm/string/trim.hpp>
30 #include <boost/regex.hpp>
33 #include "lyrics_fetcher.h"
34 #include "utility/html.h"
35 #include "utility/string.h"
37 std::istream
&operator>>(std::istream
&is
, LyricsFetcher_
&fetcher
)
42 fetcher
= std::make_unique
<LyricwikiFetcher
>();
43 else if (s
== "azlyrics")
44 fetcher
= std::make_unique
<AzLyricsFetcher
>();
45 else if (s
== "genius")
46 fetcher
= std::make_unique
<GeniusFetcher
>();
47 else if (s
== "sing365")
48 fetcher
= std::make_unique
<Sing365Fetcher
>();
49 else if (s
== "lyricsmania")
50 fetcher
= std::make_unique
<LyricsmaniaFetcher
>();
51 else if (s
== "metrolyrics")
52 fetcher
= std::make_unique
<MetrolyricsFetcher
>();
53 else if (s
== "justsomelyrics")
54 fetcher
= std::make_unique
<JustSomeLyricsFetcher
>();
55 else if (s
== "jahlyrics")
56 fetcher
= std::make_unique
<JahLyricsFetcher
>();
57 else if (s
== "plyrics")
58 fetcher
= std::make_unique
<PLyricsFetcher
>();
59 else if (s
== "tekstowo")
60 fetcher
= std::make_unique
<TekstowoFetcher
>();
61 else if (s
== "internet")
62 fetcher
= std::make_unique
<InternetLyricsFetcher
>();
64 is
.setstate(std::ios::failbit
);
68 const char LyricsFetcher::msgNotFound
[] = "Not found";
70 LyricsFetcher::Result
LyricsFetcher::fetch(const std::string
&artist
,
71 const std::string
&title
)
76 std::string url
= urlTemplate();
77 boost::replace_all(url
, "%artist%", Curl::escape(artist
));
78 boost::replace_all(url
, "%title%", Curl::escape(title
));
81 CURLcode code
= Curl::perform(data
, url
);
85 result
.second
= curl_easy_strerror(code
);
89 auto lyrics
= getContent(regex(), data
);
91 if (lyrics
.empty() || notLyrics(data
))
93 result
.second
= msgNotFound
;
98 for (auto it
= lyrics
.begin(); it
!= lyrics
.end(); ++it
)
104 if (it
!= lyrics
.end()-1)
105 data
+= "\n\n----------\n\n";
109 result
.second
= data
;
114 std::vector
<std::string
> LyricsFetcher::getContent(const char *regex_
,
115 const std::string
&data
)
117 std::vector
<std::string
> result
;
118 boost::regex
rx(regex_
);
119 auto first
= boost::sregex_iterator(data
.begin(), data
.end(), rx
);
120 auto last
= boost::sregex_iterator();
121 for (; first
!= last
; ++first
)
122 result
.push_back(first
->str(1));
126 void LyricsFetcher::postProcess(std::string
&data
) const
128 data
= unescapeHtmlUtf8(data
);
130 // Remove indentation from each line and collapse multiple newlines into one.
131 std::vector
<std::string
> lines
;
132 boost::split(lines
, data
, boost::is_any_of("\n"));
133 for (auto &line
: lines
)
135 std::unique(lines
.begin(), lines
.end(), [](std::string
&a
, std::string
&b
) {
136 return a
.empty() && b
.empty();
138 data
= boost::algorithm::join(lines
, "\n");
142 /***********************************************************************/
144 LyricsFetcher::Result
LyricwikiFetcher::fetch(const std::string
&artist
,
145 const std::string
&title
)
147 LyricsFetcher::Result result
= LyricsFetcher::fetch(artist
, title
);
148 if (result
.first
== true)
150 result
.first
= false;
153 CURLcode code
= Curl::perform(data
, result
.second
, "", true);
155 if (code
!= CURLE_OK
)
157 result
.second
= curl_easy_strerror(code
);
161 auto lyrics
= getContent("<div class='lyricbox'>(.*?)</div>", data
);
165 result
.second
= msgNotFound
;
168 std::transform(lyrics
.begin(), lyrics
.end(), lyrics
.begin(), unescapeHtmlUtf8
);
169 bool license_restriction
= std::any_of(lyrics
.begin(), lyrics
.end(), [](const std::string
&s
) {
170 return s
.find("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") != std::string::npos
;
172 if (license_restriction
)
174 result
.second
= "Licence restriction";
179 for (auto it
= lyrics
.begin(); it
!= lyrics
.end(); ++it
)
186 if (it
!= lyrics
.end()-1)
187 data
+= "\n\n----------\n\n";
191 result
.second
= data
;
197 bool LyricwikiFetcher::notLyrics(const std::string
&data
) const
199 return data
.find("action=edit") != std::string::npos
;
202 /**********************************************************************/
204 LyricsFetcher::Result
GoogleLyricsFetcher::fetch(const std::string
&artist
,
205 const std::string
&title
)
208 result
.first
= false;
210 std::string search_str
= "site:";
211 search_str
+= Curl::escape(siteKeyword());
213 search_str
+= Curl::escape(artist
);
215 search_str
+= Curl::escape(title
);
217 std::string google_url
= "http://www.google.com/search?hl=en&ie=UTF-8&oe=UTF-8&q=";
218 google_url
+= search_str
;
219 google_url
+= "&btnI=I%27m+Feeling+Lucky";
222 CURLcode code
= Curl::perform(data
, google_url
, google_url
);
224 if (code
!= CURLE_OK
)
226 result
.second
= curl_easy_strerror(code
);
230 auto urls
= getContent("<A HREF=\"(.*?)\">here</A>", data
);
232 if (urls
.empty() || !isURLOk(urls
[0]))
234 result
.second
= msgNotFound
;
238 data
= unescapeHtmlUtf8(urls
[0]);
241 return LyricsFetcher::fetch("", "");
244 bool GoogleLyricsFetcher::isURLOk(const std::string
&url
)
246 return url
.find(siteKeyword()) != std::string::npos
;
249 /**********************************************************************/
251 bool MetrolyricsFetcher::isURLOk(const std::string
&url
)
253 // it sometimes return link to sitemap.xml, which is huge so we need to discard it
254 return GoogleLyricsFetcher::isURLOk(url
) && url
.find("sitemap") == std::string::npos
;
257 /**********************************************************************/
259 LyricsFetcher::Result
InternetLyricsFetcher::fetch(const std::string
&artist
,
260 const std::string
&title
)
262 GoogleLyricsFetcher::fetch(artist
, title
);
263 LyricsFetcher::Result result
;
264 result
.first
= false;
265 result
.second
= "The following site may contain lyrics for this song: ";
266 result
.second
+= URL
;
270 bool InternetLyricsFetcher::isURLOk(const std::string
&url
)