1 /***************************************************************************
2 * Copyright (C) 2008-2016 by Andrzej Rybczak *
3 * electricityispower@gmail.com *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. *
19 ***************************************************************************/
22 #include "curl_handle.h"
24 #ifdef HAVE_CURL_CURL_H
28 #include <boost/algorithm/string/join.hpp>
29 #include <boost/algorithm/string/replace.hpp>
30 #include <boost/algorithm/string/split.hpp>
31 #include <boost/algorithm/string/trim.hpp>
32 #include <boost/regex.hpp>
35 #include "lyrics_fetcher.h"
36 #include "utility/html.h"
37 #include "utility/string.h"
39 LyricsFetcher
*lyricsPlugins
[] =
41 new LyricwikiFetcher(),
42 new GeniusLyricsFetcher(),
43 new AzLyricsFetcher(),
45 new LyricsmaniaFetcher(),
46 new MetrolyricsFetcher(),
47 new JustSomeLyricsFetcher(),
48 new InternetLyricsFetcher(),
52 const char LyricsFetcher::msgNotFound
[] = "Not found";
54 LyricsFetcher::Result
LyricsFetcher::fetch(const std::string
&artist
, const std::string
&title
)
59 std::string url
= urlTemplate();
60 boost::replace_all(url
, "%artist%", artist
);
61 boost::replace_all(url
, "%title%", title
);
64 CURLcode code
= Curl::perform(data
, url
);
68 result
.second
= curl_easy_strerror(code
);
72 auto lyrics
= getContent(regex(), data
);
74 if (lyrics
.empty() || notLyrics(data
))
76 result
.second
= msgNotFound
;
81 for (auto it
= lyrics
.begin(); it
!= lyrics
.end(); ++it
)
87 if (it
!= lyrics
.end()-1)
88 data
+= "\n\n----------\n\n";
97 std::vector
<std::string
> LyricsFetcher::getContent(const char *regex_
, const std::string
&data
)
99 std::vector
<std::string
> result
;
100 boost::regex
rx(regex_
);
101 auto first
= boost::sregex_iterator(data
.begin(), data
.end(), rx
);
102 auto last
= boost::sregex_iterator();
103 for (; first
!= last
; ++first
)
104 result
.push_back(first
->str(1));
108 void LyricsFetcher::postProcess(std::string
&data
) const
110 data
= unescapeHtmlUtf8(data
);
112 // Remove indentation from each line and collapse multiple newlines into one.
113 std::vector
<std::string
> lines
;
114 boost::split(lines
, data
, boost::is_any_of("\n"));
115 for (auto &line
: lines
)
117 std::unique(lines
.begin(), lines
.end(), [](std::string
&a
, std::string
&b
) {
118 return a
.empty() && b
.empty();
120 data
= boost::algorithm::join(lines
, "\n");
124 /***********************************************************************/
126 LyricsFetcher::Result
LyricwikiFetcher::fetch(const std::string
&artist
, const std::string
&title
)
128 LyricsFetcher::Result result
= LyricsFetcher::fetch(artist
, title
);
129 if (result
.first
== true)
131 result
.first
= false;
134 CURLcode code
= Curl::perform(data
, result
.second
, "", true);
136 if (code
!= CURLE_OK
)
138 result
.second
= curl_easy_strerror(code
);
142 auto lyrics
= getContent("<div class='lyricbox'>(.*?)</div>", data
);
146 result
.second
= msgNotFound
;
149 std::transform(lyrics
.begin(), lyrics
.end(), lyrics
.begin(), unescapeHtmlUtf8
);
150 bool license_restriction
= std::any_of(lyrics
.begin(), lyrics
.end(), [](const std::string
&s
) {
151 return s
.find("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") != std::string::npos
;
153 if (license_restriction
)
155 result
.second
= "Licence restriction";
160 for (auto it
= lyrics
.begin(); it
!= lyrics
.end(); ++it
)
167 if (it
!= lyrics
.end()-1)
168 data
+= "\n\n----------\n\n";
172 result
.second
= data
;
178 bool LyricwikiFetcher::notLyrics(const std::string
&data
) const
180 return data
.find("action=edit") != std::string::npos
;
183 /**********************************************************************/
185 LyricsFetcher::Result
GoogleLyricsFetcher::fetch(const std::string
&artist
, const std::string
&title
)
188 result
.first
= false;
190 std::string search_str
= artist
;
193 search_str
+= "+%2B";
194 search_str
+= siteKeyword();
196 std::string google_url
= "http://www.google.com/search?hl=en&ie=UTF-8&oe=UTF-8&q=";
197 google_url
+= search_str
;
198 google_url
+= "&btnI=I%27m+Feeling+Lucky";
201 CURLcode code
= Curl::perform(data
, google_url
, google_url
);
203 if (code
!= CURLE_OK
)
205 result
.second
= curl_easy_strerror(code
);
209 auto urls
= getContent("<A HREF=\"(.*?)\">here</A>", data
);
211 if (urls
.empty() || !isURLOk(urls
[0]))
213 result
.second
= msgNotFound
;
217 data
= unescapeHtmlUtf8(urls
[0]);
220 return LyricsFetcher::fetch("", "");
223 bool GoogleLyricsFetcher::isURLOk(const std::string
&url
)
225 return url
.find(siteKeyword()) != std::string::npos
;
228 /**********************************************************************/
230 bool MetrolyricsFetcher::isURLOk(const std::string
&url
)
232 // it sometimes return link to sitemap.xml, which is huge so we need to discard it
233 return GoogleLyricsFetcher::isURLOk(url
) && url
.find("sitemap") == std::string::npos
;
236 /**********************************************************************/
238 LyricsFetcher::Result
InternetLyricsFetcher::fetch(const std::string
&artist
, const std::string
&title
)
240 GoogleLyricsFetcher::fetch(artist
, title
);
241 LyricsFetcher::Result result
;
242 result
.first
= false;
243 result
.second
= "The following site may contain lyrics for this song: ";
244 result
.second
+= URL
;
248 bool InternetLyricsFetcher::isURLOk(const std::string
&url
)
254 #endif // HAVE_CURL_CURL_H