Fixed the YouTube support.
[gpodder.git] / src / gpodder / resolver.py
blob50bb43ff578f803a62f796221afe44fc2fac5892
1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2009 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # resolver.py -- YouTube and related magic
20 # Justin Forest <justin.forest@gmail.com> 2008-10-13
22 # TODO:
24 # * Channel covers.
25 # * Support for Vimeo, maybe blip.tv and others.
27 import re
28 import urllib
29 import urllib2
30 import gtk
31 import gobject
33 import gpodder
34 from xml.sax import saxutils
35 from gpodder.liblogger import log
36 from gpodder.util import proxy_request
38 def get_real_download_url(url, proxy=None):
39 # IDs from http://forum.videohelp.com/topic336882-1800.html#1912972
40 if gpodder.interface == gpodder.MAEMO:
41 # Use 3GP with AAC on Maemo
42 fmt_id = 17
43 else:
44 # Use MP4 with AAC by default
45 fmt_id = 18
47 vid = get_youtube_id(url)
48 if vid is not None:
49 page = None
50 url = 'http://www.youtube.com/watch?v=' + vid
52 while page is None:
53 req = proxy_request(url, proxy, method='GET')
54 if 'location' in req.msg:
55 url = req.msg['location']
56 else:
57 page = req.read()
59 r2 = re.compile('.*"t"\:\s+"([^"]+)".*').search(page)
60 if r2:
61 next = 'http://www.youtube.com/get_video?video_id=' + vid + '&t=' + r2.group(1) + '&fmt=%d' % fmt_id
62 log('YouTube link resolved: %s => %s', url, next)
63 return next
65 return url
67 def get_youtube_id(url):
68 r = re.compile('http://(?:[a-z]+\.)?youtube\.com/v/(.*)\.swf', re.IGNORECASE).match(url)
69 if r is not None:
70 return r.group(1)
72 r = re.compile('http://(?:[a-z]+\.)?youtube\.com/watch\?v=([^&]*)', re.IGNORECASE).match(url)
73 if r is not None:
74 return r.group(1)
76 return None
78 def get_real_channel_url(url):
79 r = re.compile('http://(?:[a-z]+\.)?youtube\.com/user/([a-z0-9]+)', re.IGNORECASE)
80 m = r.match(url)
82 if m is not None:
83 next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
84 log('YouTube link resolved: %s => %s', url, next)
85 return next
87 r = re.compile('http://(?:[a-z]+\.)?youtube\.com/profile?user=([a-z0-9]+)', re.IGNORECASE)
88 m = r.match(url)
90 if m is not None:
91 next = 'http://www.youtube.com/rss/user/'+ m.group(1) +'/videos.rss'
92 log('YouTube link resolved: %s => %s', url, next)
93 return next
95 return url
97 def get_real_cover(url):
98 log('Cover: %s', url)
100 r = re.compile('http://www\.youtube\.com/rss/user/([a-z0-9]+)/videos\.rss', re.IGNORECASE)
101 m = r.match(url)
103 if m is not None:
104 data = urllib2.urlopen('http://www.youtube.com/user/'+ m.group(1)).read()
105 data = data[data.find('id="user-profile-image"'):]
106 data = data[data.find('src="') + 5:]
108 next = data[:data.find('"')]
110 if next.strip() == '':
111 return None
113 log('YouTube userpic for %s is: %s', url, next)
114 return next
116 return None
118 def get_real_episode_length(episode):
119 url = get_real_download_url(episode.url)
121 if url != episode.url:
122 try:
123 info = urllib2.urlopen(url).info()
124 if 'content-length' in info:
125 return info['content-length']
126 except urllib2.HTTPError:
127 pass
129 return 0
131 def find_youtube_channels(string):
132 # FIXME: Make proper use of the YouTube API instead
133 # of screen-scraping the YouTube website
134 url = 'http://www.youtube.com/results?search_query='+ urllib.quote(string, '') +'&search_type=search_users&aq=f'
136 r = re.compile('>\s+<')
137 data = r.sub('><', urllib.urlopen(url).read())
139 r1 = re.compile('<a href="/user/([^"]+)"[^>]*>([^<]+)</a>')
140 m1 = r1.findall(data)
142 r2 = re.compile('\s+')
144 model = gtk.ListStore(gobject.TYPE_BOOLEAN, gobject.TYPE_STRING, gobject.TYPE_STRING)
146 found_users = []
147 for (name, title) in m1:
148 if name not in found_users:
149 found_users.append(name)
150 link = 'http://www.youtube.com/rss/user/'+ name +'/videos.rss'
151 model.append([False, name, link])
153 return model