1 from PyQt4
import QtCore
, QtGui
2 from htmlentitydefs
import name2codepoint
as n2cp
4 import urllib2
, cookielib
11 socket
.setdefaulttimeout(8)
13 appIcon
= 'gfx/icon.png'
18 """Make some time for necessary events."""
19 QtCore
.QEventLoop().processEvents(QtCore
.QEventLoop
.AllEvents
)
22 """Converts seconds to min:sec."""
25 if sec
<10:sec
='0'+str(sec
)
26 return str(min)+':'+str(sec
)
28 def numeric_compare(x
, y
):
35 """Retrieve list of unique elements."""
37 return t(c
for c
in seq
if not (c
in seen
or seen
.append(c
)))
42 return unicodedata
.normalize('NFKD', ustr
).encode('ascii', 'ignore')
44 def fetch(SE
, sites
, song
=None, xtra_tags
={}, stripHTML
=True):
45 """Returns None when nothing found, or [site,source-url]."""
46 # compose the search-engine URL
48 SE_url
=toAscii(f(format
.params(song
, xtra_tags
)))
49 SE_url
=SE_url
.replace(' ', '+')
51 # fetch the page from the search-engine with the results
52 request
=urllib2
.Request(SE_url
)
53 request
.add_header('User-Agent', APPNAME
)
54 opener
=urllib2
.build_opener()
55 data
=opener
.open(request
).read()
57 # look for urls on the search page!
58 regex
=re
.compile('<a href="(.*?)".*?>.*?<\/a>')
59 urls
=regex
.findall(data
)
60 logging
.debug("all urls")
62 logging
.debug(" %s"%(url))
64 # look for urls which are defined in $sites.
65 # The first matching URL is taken
67 logging
.debug("Checking %i URLs on %s"%(len(sites
), SE_url
))
68 # loop over all sites which may have what we're interested in
72 # check if on the results-page there is a link to $site
75 logging
.debug(" Found site %s in results: %s"%(site
, url
))
77 finalRegex
=sites
[site
]
83 cj
= cookielib
.CookieJar()
84 opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(cj
))
85 loging
.debug(" Reading URL %s"%(finalURL))
88 r
= opener
.open(finalURL
)
90 # perform the regular expression
91 regex
=re
.compile(finalRegex
, re
.IGNORECASE|re
.MULTILINE|re
.DOTALL
)
92 match
=regex
.search(data
)
94 logging
.debug(" Failed to open site %s"%(finalURL))
98 # if the regex matches, then we arrive here
99 # we assume the content we want is in the first group
100 logging
.debug(" Regex succeeded!")
102 charset
=re
.compile('charset=["\']?([\w-]+)').search(data
).group(1)
103 logging
.debug(" charset=%s"%(charset))
106 logging
.debug(" charset not found. Assuming %s"%(charset))
108 data
=re
.sub(chr(13), '', data
) # replace ^M aka \r
109 data
=unicode(data
, charset
)
112 data
=re
.sub('<br.*?>', '\n', data
) # replace <br />'s with newline
113 data
=re
.sub('\n\n', '\n', data
)
114 data
=re
.sub('<[^>]*?>', '', data
) # strip all other HTML
115 data
=decodeHTMLEntities(data
) # convert HTML entities
117 logging
.debug("Succeeded fetching.")
118 return [data
,finalURL
]
120 logging
.debug(" Regex for %s%s failed"%(site
, (" (%s)"%(finalURL) if finalURL
else "")))
122 logging
.debug(" Site %s not found on results-page"%(site))
125 logging
.debug("Failed fetching.")
128 def substEntity(match
):
130 if match
.group(1) == "#":
131 return unichr(int(ent
))
140 def decodeHTMLEntities(string
):
141 # replace entities with their UTF-counterpart
142 entity_re
= re
.compile("&(#?)(\d{1,5}|\w{1,8});")
143 return entity_re
.subn(substEntity
, string
)[0]
146 class Button(QtGui
.QPushButton
):
148 """A simple Button class which calls $onClick when clicked."""
149 def __init__(self
, caption
, onClick
=None, iconPath
=None, iconOnly
=False, parent
=None):
150 QtGui
.QPushButton
.__init
__(self
, parent
)
153 self
.connect(self
, QtCore
.SIGNAL('clicked(bool)'), onClick
)
155 self
.changeIcon(iconPath
)
157 if not(iconPath
and iconOnly
):
158 QtGui
.QPushButton
.setText(self
, caption
)
160 self
.setToolTip(caption
)
162 def setText(self
, caption
):
163 self
.setToolTip(caption
)
164 if self
.icon()==None:
165 self
.setText(caption
)
167 def changeIcon(self
, iconPath
):
169 icon
.addFile(iconPath
, QtCore
.QSize(self
.iconSize
, self
.iconSize
))