Remove libtagupdate + related functionality
[gpodder.git] / src / gpodder / libpodcasts.py
blob0e7570fa35a0cf188eb44c807b523c404810b655
1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2009 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 # libpodcasts.py -- data classes for gpodder
23 # thomas perl <thp@perli.net> 20051029
25 # Contains code based on:
26 # liblocdbwriter.py (2006-01-09)
27 # liblocdbreader.py (2006-01-10)
30 import gtk
31 import gobject
32 import pango
34 import gpodder
35 from gpodder import util
36 from gpodder import opml
37 from gpodder import feedcore
38 from gpodder import services
39 from gpodder import draw
40 from gpodder import dumbshelve
41 from gpodder import resolver
42 from gpodder import corestats
44 from gpodder.liblogger import log
45 from gpodder.libgpodder import gl
46 from gpodder.dbsqlite import db
48 import os.path
49 import os
50 import glob
51 import shutil
52 import sys
53 import urllib
54 import urlparse
55 import time
56 import datetime
57 import rfc822
58 import hashlib
59 import xml.dom.minidom
60 import feedparser
62 from xml.sax import saxutils
64 _ = gpodder.gettext
66 if gpodder.interface == gpodder.MAEMO:
67 ICON_AUDIO_FILE = 'gnome-mime-audio-mp3'
68 ICON_VIDEO_FILE = 'gnome-mime-video-mp4'
69 ICON_GENERIC_FILE = 'text-x-generic'
70 ICON_DOWNLOADING = 'qgn_toolb_messagin_moveto'
71 ICON_DELETED = 'qgn_toolb_gene_deletebutton'
72 ICON_NEW = 'qgn_list_gene_favor'
73 else:
74 ICON_AUDIO_FILE = 'audio-x-generic'
75 ICON_VIDEO_FILE = 'video-x-generic'
76 ICON_GENERIC_FILE = 'text-x-generic'
77 ICON_DOWNLOADING = gtk.STOCK_GO_DOWN
78 ICON_DELETED = gtk.STOCK_DELETE
79 ICON_NEW = gtk.STOCK_ABOUT
82 class gPodderFetcher(feedcore.Fetcher):
83 """
84 This class extends the feedcore Fetcher with the gPodder User-Agent and the
85 Proxy handler based on the current settings in gPodder and provides a
86 convenience method (fetch_channel) for use by PodcastChannel objects.
87 """
89 def __init__(self):
90 feedcore.Fetcher.__init__(self, gpodder.user_agent)
92 def fetch_channel(self, channel):
93 etag = channel.etag
94 modified = feedparser._parse_date(channel.last_modified)
95 # If we have a username or password, rebuild the url with them included
96 # Note: using a HTTPBasicAuthHandler would be pain because we need to
97 # know the realm. It can be done, but I think this method works, too
98 if channel.username or channel.password:
99 username = urllib.quote(channel.username)
100 password = urllib.quote(channel.password)
101 auth_string = ':'.join((username, password))
102 url_parts = list(urlparse.urlsplit(channel.url))
103 url_parts[1] = '@'.join((auth_string, url_parts[1]))
104 url = urlparse.urlunsplit(url_parts)
105 else:
106 url = channel.url
107 self.fetch(url, etag, modified)
109 def _resolve_url(self, url):
110 return resolver.get_real_channel_url(url)
112 # def _get_handlers(self):
113 # # Add a ProxyHandler for fetching data via a proxy server
114 # proxies = {'http': 'http://proxy.example.org:8080'}
115 # return[urllib2.ProxyHandler(proxies))]
118 class PodcastModelObject(object):
120 A generic base class for our podcast model providing common helper
121 and utility functions.
124 @classmethod
125 def create_from_dict(cls, d, *args):
127 Create a new object, passing "args" to the constructor
128 and then updating the object with the values from "d".
130 o = cls(*args)
131 o.update_from_dict(d)
132 return o
134 def update_from_dict(self, d):
136 Updates the attributes of this object with values from the
137 dictionary "d" by using the keys found in "d".
139 for k in d:
140 if hasattr(self, k):
141 setattr(self, k, d[k])
144 class PodcastChannel(PodcastModelObject):
145 """holds data for a complete channel"""
146 MAX_FOLDERNAME_LENGTH = 150
147 icon_cache = {}
149 feed_fetcher = gPodderFetcher()
151 @classmethod
152 def load(cls, url, create=True, authentication_tokens=None):
153 if isinstance(url, unicode):
154 url = url.encode('utf-8')
156 tmp = db.load_channels(factory=cls.create_from_dict, url=url)
157 if len(tmp):
158 return tmp[0]
159 elif create:
160 tmp = PodcastChannel(url)
161 if authentication_tokens is not None:
162 tmp.username = authentication_tokens[0]
163 tmp.password = authentication_tokens[1]
165 tmp.update()
166 tmp.save()
167 db.force_last_new(tmp)
168 return tmp
170 def episode_factory(self, d):
172 This function takes a dictionary containing key-value pairs for
173 episodes and returns a new PodcastEpisode object that is connected
174 to this PodcastChannel object.
176 Returns: A new PodcastEpisode object
178 return PodcastEpisode.create_from_dict(d, self)
180 def _consume_updated_feed(self, feed):
181 # update the cover if it's not there
182 self.update_cover()
184 self.parse_error = feed.get('bozo_exception', None)
186 self.title = feed.feed.get('title', self.url)
187 self.link = feed.feed.get('link', self.link)
188 self.description = feed.feed.get('subtitle', self.description)
189 # Start YouTube-specific title FIX
190 YOUTUBE_PREFIX = 'Uploads by '
191 if self.title.startswith(YOUTUBE_PREFIX):
192 self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
193 # End YouTube-specific title FIX
195 try:
196 self.pubDate = rfc822.mktime_tz(feed.feed.get('updated_parsed', None+(0,)))
197 except:
198 self.pubDate = time.time()
200 if hasattr(feed.feed, 'image'):
201 if hasattr(feed.feed.image, 'href') and feed.feed.image.href:
202 old = self.image
203 self.image = feed.feed.image.href
204 if old != self.image:
205 self.update_cover(force=True)
207 self.save()
209 # Load all episodes to update them properly.
210 existing = self.get_all_episodes()
212 # We can limit the maximum number of entries that gPodder will parse
213 # via the "max_episodes_per_feed" configuration option.
214 if len(feed.entries) > gl.config.max_episodes_per_feed:
215 log('Limiting number of episodes for %s to %d', self.title, gl.config.max_episodes_per_feed)
216 for entry in feed.entries[:min(gl.config.max_episodes_per_feed, len(feed.entries))]:
217 episode = None
219 try:
220 episode = PodcastEpisode.from_feedparser_entry(entry, self)
221 except Exception, e:
222 log('Cannot instantiate episode "%s": %s. Skipping.', entry.get('id', '(no id available)'), e, sender=self, traceback=True)
224 if episode:
225 self.count_new += 1
227 for ex in existing:
228 if ex.guid == episode.guid or episode.is_duplicate(ex):
229 for k in ('title', 'url', 'description', 'link', 'pubDate', 'guid'):
230 setattr(ex, k, getattr(episode, k))
231 self.count_new -= 1
232 episode = ex
234 episode.save()
236 # This *might* cause episodes to be skipped if there were more than
237 # max_episodes_per_feed items added to the feed between updates.
238 # The benefit is that it prevents old episodes from apearing as new
239 # in certain situations (see bug #340).
240 db.purge(gl.config.max_episodes_per_feed, self.id)
242 def _update_etag_modified(self, feed):
243 self.updated_timestamp = time.time()
244 self.calculate_publish_behaviour()
245 self.etag = feed.headers.get('etag', self.etag)
246 self.last_modified = feed.headers.get('last-modified', self.last_modified)
248 def update(self):
249 if self.updated_timestamp > time.time() - 60*60*24:
250 # If we have updated in the last 24 hours, do some optimizations
251 if self.release_expected > time.time():
252 hours = (self.release_expected-time.time())/(60*60)
253 log('Expecting a release in %.2f hours - skipping %s', hours, self.title, sender=self)
254 return
256 # If we have updated in the last 10 minutes, skip the update
257 if self.updated_timestamp > time.time() - 60*10:
258 log('Last update still too recent - skipping %s', self.title, sender=self)
259 return
261 try:
262 self.feed_fetcher.fetch_channel(self)
263 except feedcore.UpdatedFeed, updated:
264 feed = updated.data
265 self._consume_updated_feed(feed)
266 self._update_etag_modified(feed)
267 self.save()
268 except feedcore.NewLocation, updated:
269 feed = updated.data
270 self.url = feed.href
271 self._consume_updated_feed(feed)
272 self._update_etag_modified(feed)
273 self.save()
274 except feedcore.NotModified, updated:
275 feed = updated.data
276 self._update_etag_modified(feed)
277 self.save()
278 except Exception, e:
279 # "Not really" errors
280 #feedcore.AuthenticationRequired
281 # Temporary errors
282 #feedcore.Offline
283 #feedcore.BadRequest
284 #feedcore.InternalServerError
285 #feedcore.WifiLogin
286 # Permanent errors
287 #feedcore.Unsubscribe
288 #feedcore.NotFound
289 #feedcore.InvalidFeed
290 #feedcore.UnknownStatusCode
291 raise
293 db.commit()
295 def update_cover(self, force=False):
296 if self.cover_file is None or not os.path.exists(self.cover_file) or force:
297 if self.image is not None:
298 services.cover_downloader.request_cover(self)
300 def delete(self):
301 db.delete_channel(self)
303 def save(self):
304 db.save_channel(self)
306 def stat(self, state=None, is_played=None, is_locked=None):
307 return db.get_channel_stat(self.url, state=state, is_played=is_played, is_locked=is_locked)
309 def __init__( self, url = "", title = "", link = "", description = ""):
310 self.id = None
311 self.url = url
312 self.title = title
313 self.link = link
314 self.description = description
315 self.image = None
316 self.pubDate = 0
317 self.parse_error = None
318 self.newest_pubdate_cached = None
319 self.iter = None
320 self.foldername = None
321 self.auto_foldername = 1 # automatically generated foldername
323 # should this channel be synced to devices? (ex: iPod)
324 self.sync_to_devices = True
325 # to which playlist should be synced
326 self.device_playlist_name = 'gPodder'
327 # if set, this overrides the channel-provided title
328 self.override_title = ''
329 self.username = ''
330 self.password = ''
332 self.last_modified = None
333 self.etag = None
335 self.save_dir_size = 0
336 self.__save_dir_size_set = False
338 self.count_downloaded = 0
339 self.count_new = 0
340 self.count_unplayed = 0
342 self.channel_is_locked = False
344 self.release_expected = time.time()
345 self.release_deviation = 0
346 self.updated_timestamp = 0
348 def calculate_publish_behaviour(self):
349 episodes = db.load_episodes(self, factory=self.episode_factory, limit=30)
350 if len(episodes) < 3:
351 return
353 deltas = []
354 latest = max(e.pubDate for e in episodes)
355 for index in range(len(episodes)-1):
356 if episodes[index].pubDate != 0 and episodes[index+1].pubDate != 0:
357 deltas.append(episodes[index].pubDate - episodes[index+1].pubDate)
359 if len(deltas) > 1:
360 stats = corestats.Stats(deltas)
361 self.release_expected = min([latest+stats.stdev(), latest+(stats.min()+stats.avg())*.5])
362 self.release_deviation = stats.stdev()
363 else:
364 self.release_expected = latest
365 self.release_deviation = 0
367 def request_save_dir_size(self):
368 if not self.__save_dir_size_set:
369 self.update_save_dir_size()
370 self.__save_dir_size_set = True
372 def update_save_dir_size(self):
373 self.save_dir_size = util.calculate_size(self.save_dir)
375 def get_title( self):
376 if self.override_title:
377 return self.override_title
378 elif not self.__title.strip():
379 return self.url
380 else:
381 return self.__title
383 def set_title( self, value):
384 self.__title = value.strip()
386 title = property(fget=get_title,
387 fset=set_title)
389 def set_custom_title( self, custom_title):
390 custom_title = custom_title.strip()
392 # make sure self.foldername is initialized
393 self.get_save_dir()
395 # rename folder if custom_title looks sane
396 new_folder_name = self.find_unique_folder_name(custom_title)
397 if len(new_folder_name) > 0 and new_folder_name != self.foldername:
398 log('Changing foldername based on custom title: %s', custom_title, sender=self)
399 new_folder = os.path.join(gl.downloaddir, new_folder_name)
400 old_folder = os.path.join(gl.downloaddir, self.foldername)
401 if os.path.exists(old_folder):
402 if not os.path.exists(new_folder):
403 # Old folder exists, new folder does not -> simply rename
404 log('Renaming %s => %s', old_folder, new_folder, sender=self)
405 os.rename(old_folder, new_folder)
406 else:
407 # Both folders exist -> move files and delete old folder
408 log('Moving files from %s to %s', old_folder, new_folder, sender=self)
409 for file in glob.glob(os.path.join(old_folder, '*')):
410 shutil.move(file, new_folder)
411 log('Removing %s', old_folder, sender=self)
412 shutil.rmtree(old_folder, ignore_errors=True)
413 self.foldername = new_folder_name
414 self.save()
416 if custom_title != self.__title:
417 self.override_title = custom_title
418 else:
419 self.override_title = ''
421 def get_downloaded_episodes(self):
422 return db.load_episodes(self, factory=self.episode_factory, state=db.STATE_DOWNLOADED)
424 def get_new_episodes(self, downloading=lambda e: False):
426 Get a list of new episodes. You can optionally specify
427 "downloading" as a callback that takes an episode as
428 a parameter and returns True if the episode is currently
429 being downloaded or False if not.
431 By default, "downloading" is implemented so that it
432 reports all episodes as not downloading.
434 def check_is_new(episode):
436 For a given episode, returns True if it is to
437 be considered new or False if it is "not new".
439 return episode.state == db.STATE_NORMAL and \
440 not episode.is_played and \
441 not downloading(episode)
443 return [episode for episode in db.load_episodes(self, \
444 factory=self.episode_factory) if check_is_new(episode)]
446 def update_m3u_playlist(self):
447 if gl.config.create_m3u_playlists:
448 downloaded_episodes = self.get_downloaded_episodes()
449 fn = util.sanitize_filename(self.title)
450 if len(fn) == 0:
451 fn = os.path.basename(self.save_dir)
452 m3u_filename = os.path.join(gl.downloaddir, fn+'.m3u')
453 log('Writing playlist to %s', m3u_filename, sender=self)
454 f = open(m3u_filename, 'w')
455 f.write('#EXTM3U\n')
457 # Check to see if we need to reverse the playlist order
458 if gl.config.reverse_m3u_playlist_order:
459 episodes_m3u = reversed(downloaded_episodes)
460 else:
461 episodes_m3u = downloaded_episodes
463 for episode in episodes_m3u:
464 if episode.was_downloaded(and_exists=True):
465 filename = episode.local_filename(create=False)
466 assert filename is not None
468 if os.path.dirname(filename).startswith(os.path.dirname(m3u_filename)):
469 filename = filename[len(os.path.dirname(m3u_filename)+os.sep):]
470 f.write('#EXTINF:0,'+self.title+' - '+episode.title+' ('+episode.cute_pubdate()+')\n')
471 f.write(filename+'\n')
472 f.close()
474 def addDownloadedItem(self, item):
475 log('addDownloadedItem(%s)', item.url)
477 if not item.was_downloaded():
478 item.mark_downloaded(save=True)
479 self.update_m3u_playlist()
481 def get_all_episodes(self):
482 return db.load_episodes(self, factory=self.episode_factory)
484 def iter_set_downloading_columns(self, model, iter, episode=None, downloading=None):
485 global ICON_AUDIO_FILE, ICON_VIDEO_FILE, ICON_GENERIC_FILE
486 global ICON_DOWNLOADING, ICON_DELETED, ICON_NEW
488 if episode is None:
489 url = model.get_value( iter, 0)
490 episode = db.load_episode(url, factory=self.episode_factory)
491 else:
492 url = episode.url
494 if gl.config.episode_list_descriptions or gpodder.interface == gpodder.MAEMO:
495 icon_size = 32
496 else:
497 icon_size = 16
499 if downloading is not None and downloading(episode):
500 status_icon = util.get_tree_icon(ICON_DOWNLOADING, icon_cache=self.icon_cache, icon_size=icon_size)
501 else:
502 if episode.state == db.STATE_NORMAL:
503 if episode.is_played:
504 status_icon = None
505 else:
506 status_icon = util.get_tree_icon(ICON_NEW, icon_cache=self.icon_cache, icon_size=icon_size)
507 elif episode.was_downloaded():
508 missing = not episode.file_exists()
510 if missing:
511 log('Episode missing: %s (before drawing an icon)', episode.url, sender=self)
513 file_type = util.file_type_by_extension( model.get_value( iter, 9))
514 if file_type == 'audio':
515 status_icon = util.get_tree_icon(ICON_AUDIO_FILE, not episode.is_played, episode.is_locked, not episode.file_exists(), self.icon_cache, icon_size)
516 elif file_type == 'video':
517 status_icon = util.get_tree_icon(ICON_VIDEO_FILE, not episode.is_played, episode.is_locked, not episode.file_exists(), self.icon_cache, icon_size)
518 else:
519 status_icon = util.get_tree_icon(ICON_GENERIC_FILE, not episode.is_played, episode.is_locked, not episode.file_exists(), self.icon_cache, icon_size)
520 elif episode.state == db.STATE_DELETED or episode.state == db.STATE_DOWNLOADED:
521 status_icon = util.get_tree_icon(ICON_DELETED, not episode.is_played, icon_cache=self.icon_cache, icon_size=icon_size)
522 else:
523 log('Warning: Cannot determine status icon.', sender=self)
524 status_icon = None
526 model.set( iter, 4, status_icon)
528 def get_tree_model(self, downloading=None):
530 Return a gtk.ListStore containing episodes for this channel
532 DATA_TYPES = (str, str, str, bool, gtk.gdk.Pixbuf, str, str, str, str, str)
534 # TODO: Remove unused columns, make these symbolic names class
535 # members and use them everywhere, so we can change/reorder them
536 C_URL, C_TITLE, C_FILESIZE_TEXT, C_UNUSED0, C_STATUS_ICON, \
537 C_PUBLISHED_TEXT, C_DESCRIPTION, C_DESCRIPTION_STRIPPED, \
538 C_UNUSED1, C_EXTENSION = range(len(DATA_TYPES))
540 new_model = gtk.ListStore(*DATA_TYPES)
542 log('Returning TreeModel for %s', self.url, sender = self)
543 urls = []
544 for item in self.get_all_episodes():
545 description = item.title_and_description
547 if item.length > 0:
548 filelength = gl.format_filesize(item.length, 1)
549 else:
550 filelength = None
552 new_iter = new_model.append((item.url, item.title, filelength,
553 True, None, item.cute_pubdate(), description, util.remove_html_tags(item.description),
554 'XXXXXXXXXXXXXUNUSEDXXXXXXXXXXXXXXXXXXX', item.extension()))
555 self.iter_set_downloading_columns( new_model, new_iter, episode=item, downloading=downloading)
556 urls.append(item.url)
558 self.update_save_dir_size()
559 return (new_model, urls)
561 def find_episode( self, url):
562 return db.load_episode(url, factory=self.episode_factory)
564 @classmethod
565 def find_unique_folder_name(cls, foldername):
566 current_try = util.sanitize_filename(foldername, cls.MAX_FOLDERNAME_LENGTH)
567 next_try_id = 2
569 while db.channel_foldername_exists(current_try):
570 current_try = '%s (%d)' % (foldername, next_try_id)
571 next_try_id += 1
573 return current_try
575 def get_save_dir(self):
576 urldigest = hashlib.md5(self.url).hexdigest()
577 sanitizedurl = util.sanitize_filename(self.url, self.MAX_FOLDERNAME_LENGTH)
578 if self.foldername is None or (self.auto_foldername and (self.foldername == urldigest or self.foldername.startswith(sanitizedurl))):
579 # we must change the folder name, because it has not been set manually
580 fn_template = util.sanitize_filename(self.title, self.MAX_FOLDERNAME_LENGTH)
582 # if this is an empty string, try the basename
583 if len(fn_template) == 0:
584 log('That is one ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
585 fn_template = util.sanitize_filename(os.path.basename(self.url), self.MAX_FOLDERNAME_LENGTH)
587 # If the basename is also empty, use the first 6 md5 hexdigest chars of the URL
588 if len(fn_template) == 0:
589 log('That is one REALLY ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
590 fn_template = urldigest # no need for sanitize_filename here
592 # Find a unique folder name for this podcast
593 wanted_foldername = self.find_unique_folder_name(fn_template)
595 # if the foldername has not been set, check if the (old) md5 filename exists
596 if self.foldername is None and os.path.exists(os.path.join(gl.downloaddir, urldigest)):
597 log('Found pre-0.15.0 download folder for %s: %s', self.title, urldigest, sender=self)
598 self.foldername = urldigest
600 # we have a valid, new folder name in "current_try" -> use that!
601 if self.foldername is not None and wanted_foldername != self.foldername:
602 # there might be an old download folder crawling around - move it!
603 new_folder_name = os.path.join(gl.downloaddir, wanted_foldername)
604 old_folder_name = os.path.join(gl.downloaddir, self.foldername)
605 if os.path.exists(old_folder_name):
606 if not os.path.exists(new_folder_name):
607 # Old folder exists, new folder does not -> simply rename
608 log('Renaming %s => %s', old_folder_name, new_folder_name, sender=self)
609 os.rename(old_folder_name, new_folder_name)
610 else:
611 # Both folders exist -> move files and delete old folder
612 log('Moving files from %s to %s', old_folder_name, new_folder_name, sender=self)
613 for file in glob.glob(os.path.join(old_folder_name, '*')):
614 shutil.move(file, new_folder_name)
615 log('Removing %s', old_folder_name, sender=self)
616 shutil.rmtree(old_folder_name, ignore_errors=True)
617 log('Updating foldername of %s to "%s".', self.url, wanted_foldername, sender=self)
618 self.foldername = wanted_foldername
619 self.save()
621 save_dir = os.path.join(gl.downloaddir, self.foldername)
623 # Create save_dir if it does not yet exist
624 if not util.make_directory( save_dir):
625 log( 'Could not create save_dir: %s', save_dir, sender = self)
627 return save_dir
629 save_dir = property(fget=get_save_dir)
631 def remove_downloaded( self):
632 shutil.rmtree( self.save_dir, True)
634 def get_index_file(self):
635 # gets index xml filename for downloaded channels list
636 return os.path.join( self.save_dir, 'index.xml')
638 index_file = property(fget=get_index_file)
640 def get_cover_file( self):
641 # gets cover filename for cover download cache
642 return os.path.join( self.save_dir, 'cover')
644 cover_file = property(fget=get_cover_file)
646 def delete_episode_by_url(self, url):
647 episode = db.load_episode(url, factory=self.episode_factory)
649 if episode is not None:
650 filename = episode.local_filename(create=False)
651 if filename is not None:
652 util.delete_file(filename)
653 else:
654 log('Cannot delete episode: %s (I have no filename!)', episode.title, sender=self)
655 episode.set_state(db.STATE_DELETED)
657 self.update_m3u_playlist()
660 class PodcastEpisode(PodcastModelObject):
661 """holds data for one object in a channel"""
662 MAX_FILENAME_LENGTH = 200
664 def reload_from_db(self):
666 Re-reads all episode details for this object from the
667 database and updates this object accordingly. Can be
668 used to refresh existing objects when the database has
669 been updated (e.g. the filename has been set after a
670 download where it was not set before the download)
672 d = db.load_episode(self.url)
673 if d is not None:
674 self.update_from_dict(d)
676 return self
678 @staticmethod
679 def from_feedparser_entry( entry, channel):
680 episode = PodcastEpisode( channel)
682 episode.title = entry.get( 'title', util.get_first_line( util.remove_html_tags( entry.get( 'summary', ''))))
683 episode.link = entry.get( 'link', '')
684 episode.description = ''
686 # Get the episode description (prefer summary, then subtitle)
687 for key in ('summary', 'subtitle', 'link'):
688 if key in entry:
689 episode.description = entry[key]
690 if episode.description:
691 break
693 episode.guid = entry.get( 'id', '')
694 if entry.get( 'updated_parsed', None):
695 episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,))
697 if episode.title == '':
698 log( 'Warning: Episode has no title, adding anyways.. (Feed Is Buggy!)', sender = episode)
700 enclosure = None
701 if hasattr(entry, 'enclosures') and len(entry.enclosures) > 0:
702 enclosure = entry.enclosures[0]
703 if len(entry.enclosures) > 1:
704 for e in entry.enclosures:
705 if hasattr( e, 'href') and hasattr( e, 'length') and hasattr( e, 'type') and (e.type.startswith('audio/') or e.type.startswith('video/')):
706 if util.normalize_feed_url(e.href) is not None:
707 log( 'Selected enclosure: %s', e.href, sender = episode)
708 enclosure = e
709 break
710 episode.url = util.normalize_feed_url( enclosure.get( 'href', ''))
711 elif hasattr(entry, 'link'):
712 (filename, extension) = util.filename_from_url(entry.link)
713 if extension == '' and hasattr( entry, 'type'):
714 extension = util.extension_from_mimetype(e.type)
715 file_type = util.file_type_by_extension(extension)
716 if file_type is not None:
717 log('Adding episode with link to file type "%s".', file_type, sender=episode)
718 episode.url = entry.link
720 # YouTube specific
721 if not episode.url and hasattr(entry, 'links') and len(entry.links) and hasattr(entry.links[0], 'href'):
722 episode.url = entry.links[0].href
724 if not episode.url:
725 log('Episode has no URL')
726 log('Episode: %s', episode)
727 log('Entry: %s', entry)
728 # This item in the feed has no downloadable enclosure
729 return None
731 metainfo = None
732 if not episode.pubDate:
733 metainfo = util.get_episode_info_from_url(episode.url)
734 if 'pubdate' in metainfo:
735 try:
736 episode.pubDate = int(float(metainfo['pubdate']))
737 except:
738 log('Cannot convert pubDate "%s" in from_feedparser_entry.', str(metainfo['pubdate']), traceback=True)
740 if hasattr(enclosure, 'length'):
741 try:
742 episode.length = int(enclosure.length)
743 if episode.length == 0:
744 raise ValueError('Zero-length is not acceptable')
745 except ValueError, ve:
746 log('Invalid episode length: %s (%s)', enclosure.length, ve.message)
747 episode.length = -1
748 # If the configuration option is set, retrieve the length via a HTTP HEAD request
749 if gl.config.get_length_from_http_header_if_empty:
750 if enclosure.length == '' or episode.length == 0:
751 if metainfo is None:
752 metainfo = util.get_episode_info_from_url(episode.url)
753 if 'length' in metainfo:
754 try:
755 episode.length = int(float(metainfo['length']))
756 except:
757 log('Cannot convert lenght "%s" in from_feedparser_entry.', str(metainfo['length']), traceback=True)
759 if hasattr( enclosure, 'type'):
760 episode.mimetype = enclosure.type
762 if episode.title == '':
763 ( filename, extension ) = os.path.splitext( os.path.basename( episode.url))
764 episode.title = filename
766 return episode
769 def __init__( self, channel):
770 # Used by Storage for faster saving
771 self.id = None
772 self.url = ''
773 self.title = ''
774 self.length = 0
775 self.mimetype = 'application/octet-stream'
776 self.guid = ''
777 self.description = ''
778 self.link = ''
779 self.channel = channel
780 self.pubDate = 0
781 self.filename = None
782 self.auto_filename = 1 # automatically generated filename
784 self.state = db.STATE_NORMAL
785 self.is_played = False
786 self.is_locked = channel.channel_is_locked
788 def save(self):
789 if self.state != db.STATE_DOWNLOADED and self.file_exists():
790 self.state = db.STATE_DOWNLOADED
791 db.save_episode(self)
793 def set_state(self, state):
794 self.state = state
795 db.mark_episode(self.url, state=self.state, is_played=self.is_played, is_locked=self.is_locked)
797 def mark(self, state=None, is_played=None, is_locked=None):
798 if state is not None:
799 self.state = state
800 if is_played is not None:
801 self.is_played = is_played
802 if is_locked is not None:
803 self.is_locked = is_locked
804 db.mark_episode(self.url, state=state, is_played=is_played, is_locked=is_locked)
806 def mark_downloaded(self, save=False):
807 self.state = db.STATE_DOWNLOADED
808 self.is_played = False
809 if save:
810 self.save()
811 db.commit()
813 @property
814 def title_and_description(self):
816 Returns Pango markup for displaying in a TreeView, and
817 disables the description when the config variable
818 "episode_list_descriptions" is not set.
820 if gl.config.episode_list_descriptions and gpodder.interface != gpodder.MAEMO:
821 return '%s\n<small>%s</small>' % (saxutils.escape(self.title), saxutils.escape(self.one_line_description()))
822 else:
823 return saxutils.escape(self.title)
825 def age_in_days(self):
826 return util.file_age_in_days(self.local_filename(create=False))
828 def is_old(self):
829 return self.age_in_days() > gl.config.episode_old_age
831 def get_age_string(self):
832 return util.file_age_to_string(self.age_in_days())
834 age_prop = property(fget=get_age_string)
836 def one_line_description( self):
837 lines = util.remove_html_tags(self.description).strip().splitlines()
838 if not lines or lines[0] == '':
839 return _('No description available')
840 else:
841 return ' '.join(lines)
843 def delete_from_disk(self):
844 try:
845 self.channel.delete_episode_by_url(self.url)
846 except:
847 log('Cannot delete episode from disk: %s', self.title, traceback=True, sender=self)
849 @classmethod
850 def find_unique_file_name(cls, url, filename, extension):
851 current_try = util.sanitize_filename(filename, cls.MAX_FILENAME_LENGTH)+extension
852 next_try_id = 2
853 lookup_url = None
855 while db.episode_filename_exists(current_try):
856 if next_try_id == 2:
857 # If we arrive here, current_try has a collision, so
858 # try to resolve the URL for a better basename
859 log('Filename collision: %s - trying to resolve...', current_try)
860 url = util.get_real_url(url)
861 (episode_filename, extension_UNUSED) = util.filename_from_url(url)
862 current_try = util.sanitize_filename(episode_filename, cls.MAX_FILENAME_LENGTH)+extension
863 if not db.episode_filename_exists(current_try) and current_try:
864 log('Filename %s is available - collision resolved.', current_try)
865 return current_try
866 else:
867 log('Continuing search with %s as basename...', current_try)
869 current_try = '%s (%d)%s' % (filename, next_try_id, extension)
870 next_try_id += 1
872 return current_try
874 def local_filename(self, create, force_update=False, check_only=False):
875 """Get (and possibly generate) the local saving filename
877 Pass create=True if you want this function to generate a
878 new filename if none exists. You only want to do this when
879 planning to create/download the file after calling this function.
881 Normally, you should pass create=False. This will only
882 create a filename when the file already exists from a previous
883 version of gPodder (where we used md5 filenames). If the file
884 does not exist (and the filename also does not exist), this
885 function will return None.
887 If you pass force_update=True to this function, it will try to
888 find a new (better) filename and move the current file if this
889 is the case. This is useful if (during the download) you get
890 more information about the file, e.g. the mimetype and you want
891 to include this information in the file name generation process.
893 If check_only=True is passed to this function, it will never try
894 to rename the file, even if would be a good idea. Use this if you
895 only want to check if a file exists.
897 The generated filename is stored in the database for future access.
899 ext = self.extension()
901 # For compatibility with already-downloaded episodes, we
902 # have to know md5 filenames if they are downloaded already
903 urldigest = hashlib.md5(self.url).hexdigest()
905 if not create and self.filename is None:
906 urldigest_filename = os.path.join(self.channel.save_dir, urldigest+ext)
907 if os.path.exists(urldigest_filename):
908 # The file exists, so set it up in our database
909 log('Recovering pre-0.15.0 file: %s', urldigest_filename, sender=self)
910 self.filename = urldigest+ext
911 self.auto_filename = 1
912 self.save()
913 return urldigest_filename
914 return None
916 # We only want to check if the file exists, so don't try to
917 # rename the file, even if it would be reasonable. See also:
918 # http://bugs.gpodder.org/attachment.cgi?id=236
919 if check_only:
920 if self.filename is None:
921 return None
922 else:
923 return os.path.join(self.channel.save_dir, self.filename)
925 if self.filename is None or force_update or (self.auto_filename and self.filename == urldigest+ext):
926 # Try to find a new filename for the current file
927 (episode_filename, extension_UNUSED) = util.filename_from_url(self.url)
928 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
930 if 'redirect' in fn_template:
931 # This looks like a redirection URL - force URL resolving!
932 log('Looks like a redirection to me: %s', self.url, sender=self)
933 url = util.get_real_url(self.url)
934 log('Redirection resolved to: %s', url, sender=self)
935 (episode_filename, extension_UNUSED) = util.filename_from_url(url)
936 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
938 # Use the video title for YouTube downloads
939 for yt_url in ('http://youtube.com/', 'http://www.youtube.com/'):
940 if self.url.startswith(yt_url):
941 fn_template = self.title
943 # If the basename is empty, use the md5 hexdigest of the URL
944 if len(fn_template) == 0 or fn_template.startswith('redirect.'):
945 log('Report to bugs.gpodder.org: Podcast at %s with episode URL: %s', self.channel.url, self.url, sender=self)
946 fn_template = urldigest
948 # Find a unique filename for this episode
949 wanted_filename = self.find_unique_file_name(self.url, fn_template, ext)
951 # We populate the filename field the first time - does the old file still exist?
952 if self.filename is None and os.path.exists(os.path.join(self.channel.save_dir, urldigest+ext)):
953 log('Found pre-0.15.0 downloaded file: %s', urldigest, sender=self)
954 self.filename = urldigest+ext
956 # The old file exists, but we have decided to want a different filename
957 if self.filename is not None and wanted_filename != self.filename:
958 # there might be an old download folder crawling around - move it!
959 new_file_name = os.path.join(self.channel.save_dir, wanted_filename)
960 old_file_name = os.path.join(self.channel.save_dir, self.filename)
961 if os.path.exists(old_file_name) and not os.path.exists(new_file_name):
962 log('Renaming %s => %s', old_file_name, new_file_name, sender=self)
963 os.rename(old_file_name, new_file_name)
964 elif force_update and not os.path.exists(old_file_name):
965 # When we call force_update, the file might not yet exist when we
966 # call it from the downloading code before saving the file
967 log('Choosing new filename: %s', new_file_name, sender=self)
968 else:
969 log('Warning: %s exists or %s does not.', new_file_name, old_file_name, sender=self)
970 log('Updating filename of %s to "%s".', self.url, wanted_filename, sender=self)
971 self.filename = wanted_filename
972 self.save()
974 return os.path.join(self.channel.save_dir, self.filename)
976 def extension( self):
977 ( filename, ext ) = util.filename_from_url(self.url)
978 # if we can't detect the extension from the url fallback on the mimetype
979 if ext == '' or util.file_type_by_extension(ext) is None:
980 ext = util.extension_from_mimetype(self.mimetype)
981 #log('Getting extension from mimetype for: %s (mimetype: %s)' % (self.title, ext), sender=self)
982 return ext
984 def mark_new(self):
985 self.state = db.STATE_NORMAL
986 self.is_played = False
987 db.mark_episode(self.url, state=self.state, is_played=self.is_played)
989 def mark_old(self):
990 self.is_played = True
991 db.mark_episode(self.url, is_played=True)
993 def file_exists(self):
994 filename = self.local_filename(create=False, check_only=True)
995 if filename is None:
996 return False
997 else:
998 return os.path.exists(filename)
1000 def was_downloaded(self, and_exists=False):
1001 if self.state != db.STATE_DOWNLOADED:
1002 return False
1003 if and_exists and not self.file_exists():
1004 return False
1005 return True
1007 def sync_filename( self):
1008 if gl.config.custom_sync_name_enabled:
1009 if '{channel' in gl.config.custom_sync_name:
1010 log('Fixing OLD syntax {channel.*} => {podcast.*} in custom_sync_name.', sender=self)
1011 gl.config.custom_sync_name = gl.config.custom_sync_name.replace('{channel.', '{podcast.')
1012 return util.object_string_formatter(gl.config.custom_sync_name, episode=self, podcast=self.channel)
1013 else:
1014 return self.title
1016 def file_type( self):
1017 return util.file_type_by_extension( self.extension() )
1019 @property
1020 def basename( self):
1021 return os.path.splitext( os.path.basename( self.url))[0]
1023 @property
1024 def published( self):
1026 Returns published date as YYYYMMDD (or 00000000 if not available)
1028 try:
1029 return datetime.datetime.fromtimestamp(self.pubDate).strftime('%Y%m%d')
1030 except:
1031 log( 'Cannot format pubDate for "%s".', self.title, sender = self)
1032 return '00000000'
1034 @property
1035 def pubtime(self):
1037 Returns published time as HHMM (or 0000 if not available)
1039 try:
1040 return datetime.datetime.fromtimestamp(self.pubDate).strftime('%H%M')
1041 except:
1042 log('Cannot format pubDate (time) for "%s".', self.title, sender=self)
1043 return '0000'
1045 def cute_pubdate(self):
1046 result = util.format_date(self.pubDate)
1047 if result is None:
1048 return '(%s)' % _('unknown')
1049 else:
1050 return result
1052 pubdate_prop = property(fget=cute_pubdate)
1054 def calculate_filesize( self):
1055 filename = self.local_filename(create=False)
1056 if filename is None:
1057 log('calculate_filesized called, but filename is None!', sender=self)
1058 try:
1059 self.length = os.path.getsize(filename)
1060 except:
1061 log( 'Could not get filesize for %s.', self.url)
1063 def get_filesize_string( self):
1064 return gl.format_filesize( self.length)
1066 filesize_prop = property(fget=get_filesize_string)
1068 def get_channel_title( self):
1069 return self.channel.title
1071 channel_prop = property(fget=get_channel_title)
1073 def get_played_string( self):
1074 if not self.is_played:
1075 return _('Unplayed')
1077 return ''
1079 played_prop = property(fget=get_played_string)
1081 def is_duplicate( self, episode ):
1082 if self.title == episode.title and self.pubDate == episode.pubDate:
1083 log('Possible duplicate detected: %s', self.title)
1084 return True
1085 return False
1088 def update_channel_model_by_iter( model, iter, channel,
1089 cover_cache=None, max_width=0, max_height=0, initialize_all=False):
1091 count_downloaded = channel.stat(state=db.STATE_DOWNLOADED)
1092 count_new = channel.stat(state=db.STATE_NORMAL, is_played=False)
1093 count_unplayed = channel.stat(state=db.STATE_DOWNLOADED, is_played=False)
1095 channel.iter = iter
1096 if initialize_all:
1097 model.set(iter, 0, channel.url)
1099 model.set(iter, 1, channel.title)
1100 title_markup = saxutils.escape(channel.title)
1101 description_markup = saxutils.escape(util.get_first_line(channel.description) or _('No description available'))
1102 d = []
1103 if count_new:
1104 d.append('<span weight="bold">')
1105 d.append(title_markup)
1106 if count_new:
1107 d.append('</span>')
1109 description = ''.join(d+['\n', '<small>', description_markup, '</small>'])
1110 model.set(iter, 2, description)
1112 if channel.parse_error:
1113 model.set(iter, 6, str(channel.parse_error))
1114 else:
1115 model.set(iter, 6, None)
1117 if count_unplayed > 0 or count_downloaded > 0:
1118 model.set(iter, 3, draw.draw_pill_pixbuf(str(count_unplayed), str(count_downloaded)))
1119 model.set(iter, 7, True)
1120 else:
1121 model.set(iter, 7, False)
1123 if initialize_all:
1124 # Load the cover if we have it, but don't download
1125 # it if it's not available (to avoid blocking here)
1126 pixbuf = services.cover_downloader.get_cover(channel, avoid_downloading=True)
1127 new_pixbuf = None
1128 if pixbuf is not None:
1129 new_pixbuf = util.resize_pixbuf_keep_ratio(pixbuf, max_width, max_height, channel.url, cover_cache)
1130 model.set(iter, 5, new_pixbuf or pixbuf)
1132 def channels_to_model(channels, cover_cache=None, max_width=0, max_height=0):
1133 new_model = gtk.ListStore( str, str, str, gtk.gdk.Pixbuf, int,
1134 gtk.gdk.Pixbuf, str, bool, str )
1136 urls = []
1137 for channel in channels:
1138 update_channel_model_by_iter(new_model, new_model.append(), channel,
1139 cover_cache, max_width, max_height, True)
1140 urls.append(channel.url)
1142 return (new_model, urls)
1145 def load_channels():
1146 return db.load_channels(factory=PodcastChannel.create_from_dict)
1148 def update_channels(callback_proc=None, callback_error=None, is_cancelled_cb=None):
1149 log('Updating channels....')
1151 channels = load_channels()
1152 count = 0
1154 for channel in channels:
1155 if is_cancelled_cb is not None and is_cancelled_cb():
1156 return channels
1157 callback_proc and callback_proc(count, len(channels))
1158 channel.update()
1159 count += 1
1161 return channels
1163 def save_channels( channels):
1164 exporter = opml.Exporter(gl.channel_opml_file)
1165 return exporter.write(channels)
1167 def can_restore_from_opml():
1168 try:
1169 if len(opml.Importer(gl.channel_opml_file).items):
1170 return gl.channel_opml_file
1171 except:
1172 return None
1176 class LocalDBReader( object):
1178 DEPRECATED - Only used for migration to SQLite
1180 def __init__( self, url):
1181 self.url = url
1183 def get_text( self, nodelist):
1184 return ''.join( [ node.data for node in nodelist if node.nodeType == node.TEXT_NODE ])
1186 def get_text_by_first_node( self, element, name):
1187 return self.get_text( element.getElementsByTagName( name)[0].childNodes)
1189 def get_episode_from_element( self, channel, element):
1190 episode = PodcastEpisode(channel)
1191 episode.title = self.get_text_by_first_node( element, 'title')
1192 episode.description = self.get_text_by_first_node( element, 'description')
1193 episode.url = self.get_text_by_first_node( element, 'url')
1194 episode.link = self.get_text_by_first_node( element, 'link')
1195 episode.guid = self.get_text_by_first_node( element, 'guid')
1197 if not episode.guid:
1198 for k in ('url', 'link'):
1199 if getattr(episode, k) is not None:
1200 episode.guid = getattr(episode, k)
1201 log('Notice: episode has no guid, using %s', episode.guid)
1202 break
1203 try:
1204 episode.pubDate = float(self.get_text_by_first_node(element, 'pubDate'))
1205 except:
1206 log('Looks like you have an old pubDate in your LocalDB -> converting it')
1207 episode.pubDate = self.get_text_by_first_node(element, 'pubDate')
1208 log('FYI: pubDate value is: "%s"', episode.pubDate, sender=self)
1209 pubdate = feedparser._parse_date(episode.pubDate)
1210 if pubdate is None:
1211 log('Error converting the old pubDate - sorry!', sender=self)
1212 episode.pubDate = 0
1213 else:
1214 log('PubDate converted successfully - yay!', sender=self)
1215 episode.pubDate = time.mktime(pubdate)
1216 try:
1217 episode.mimetype = self.get_text_by_first_node( element, 'mimetype')
1218 except:
1219 log('No mimetype info for %s', episode.url, sender=self)
1220 episode.calculate_filesize()
1221 return episode
1223 def load_and_clean( self, filename):
1225 Clean-up a LocalDB XML file that could potentially contain
1226 "unbound prefix" XML elements (generated by the old print-based
1227 LocalDB code). The code removes those lines to make the new
1228 DOM parser happy.
1230 This should be removed in a future version.
1232 lines = []
1233 for line in open(filename).read().split('\n'):
1234 if not line.startswith('<gpodder:info'):
1235 lines.append( line)
1237 return '\n'.join( lines)
1239 def read( self, filename):
1240 doc = xml.dom.minidom.parseString( self.load_and_clean( filename))
1241 rss = doc.getElementsByTagName('rss')[0]
1243 channel_element = rss.getElementsByTagName('channel')[0]
1245 channel = PodcastChannel(url=self.url)
1246 channel.title = self.get_text_by_first_node( channel_element, 'title')
1247 channel.description = self.get_text_by_first_node( channel_element, 'description')
1248 channel.link = self.get_text_by_first_node( channel_element, 'link')
1250 episodes = []
1251 for episode_element in rss.getElementsByTagName('item'):
1252 episode = self.get_episode_from_element( channel, episode_element)
1253 episodes.append(episode)
1255 return episodes