Fix broken YouTube search function (website changed)
[gpodder.git] / src / gpodder / resolver.py
blobe0a2825399fae10815e18f22126c9bdf5fd641ec
1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2008 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # resolver.py -- YouTube and related magic
20 # Justin Forest <justin.forest@gmail.com> 2008-10-13
22 # TODO:
24 # * Channel covers.
25 # * Support for Vimeo, maybe blip.tv and others.
27 import re
28 import urllib
29 import urllib2
30 import gtk
31 import gobject
32 from xml.sax import saxutils
33 from gpodder.liblogger import log
34 from gpodder.util import proxy_request
36 def get_real_download_url(url, proxy=None):
37 r1 = re.compile('http://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
38 if r1 is not None:
39 page = proxy_request('http://www.youtube.com/watch?v=' + r1.group(1), proxy, method='GET').read()
41 r2 = re.compile('.*"t"\:\s+"([^"]+)".*').search(page)
42 if r2:
43 next = 'http://www.youtube.com/get_video?video_id=' + r1.group(1) + '&t=' + r2.group(1) + '&fmt=18'
44 log('YouTube link resolved: %s => %s', url, next)
45 return next
47 return url
49 def get_real_channel_url(url):
50 r = re.compile('http://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)', re.IGNORECASE)
51 m = r.match(url)
53 if m is not None:
54 next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
55 log('YouTube link resolved: %s => %s', url, next)
56 return next
58 r = re.compile('http://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)', re.IGNORECASE)
59 m = r.match(url)
61 if m is not None:
62 next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
63 log('YouTube link resolved: %s => %s', url, next)
64 return next
66 return url
68 def get_real_cover(url):
69 log('Cover: %s', url)
71 r = re.compile('http://www\.youtube\.com/rss/user/([a-z0-9]+)/videos\.rss', re.IGNORECASE)
72 m = r.match(url)
74 if m is not None:
75 data = urllib2.urlopen('http://www.youtube.com/user/'+ m.group(1)).read()
76 data = data[data.find('id="user-profile-image"'):]
77 data = data[data.find('src="') + 5:]
79 next = data[:data.find('"')]
81 if next.strip() == '':
82 return None
84 log('YouTube userpic for %s is: %s', url, next)
85 return next
87 return None
89 def get_real_episode_length(episode):
90 url = get_real_download_url(episode.url)
92 if url != episode.url:
93 try:
94 info = urllib2.urlopen(url).info()
95 if 'content-length' in info:
96 return info['content-length']
97 except urllib2.HTTPError:
98 pass
100 return 0
102 def find_youtube_channels(string):
103 # FIXME: Make proper use of the YouTube API instead
104 # of screen-scraping the YouTube website
105 url = 'http://www.youtube.com/results?search_query='+ urllib.quote(string, '') +'&search_type=search_users&aq=f'
107 r = re.compile('>\s+<')
108 data = r.sub('><', urllib.urlopen(url).read())
110 r1 = re.compile('<a href="/user/([^"]+)"[^>]*>([^<]+)</a>')
111 m1 = r1.findall(data)
113 r2 = re.compile('\s+')
115 model = gtk.ListStore(gobject.TYPE_BOOLEAN, gobject.TYPE_STRING, gobject.TYPE_STRING)
117 found_users = []
118 for (name, title) in m1:
119 if name not in found_users:
120 found_users.append(name)
121 link = 'http://www.youtube.com/rss/user/'+ name +'/videos.rss'
122 model.append([False, name, link])
124 return model