src/gpodder/model.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # gPodder - A media aggregator and podcast client
   4 # Copyright (c) 2005-2011 Thomas Perl and the gPodder Team
   5 #
   6 # gPodder is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # gPodder is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19
  20
  21 #
  22 #  gpodder.model - Core model classes for gPodder (2009-08-13)
  23 #  Based on libpodcasts.py (thp, 2005-10-29)
  24 #
  25
  26 import gpodder
  27 from gpodder import util
  28 from gpodder import feedcore
  29 from gpodder import youtube
  30 from gpodder import gstreamer
  31
  32 from gpodder.liblogger import log
  33
  34 import os
  35 import re
  36 import glob
  37 import shutil
  38 import time
  39 import datetime
  40 import rfc822
  41 import hashlib
  42 import feedparser
  43 import xml.sax.saxutils
  44
  45 _ = gpodder.gettext
  46
  47
  48 class CustomFeed(feedcore.ExceptionWithData): pass
  49
  50 class gPodderFetcher(feedcore.Fetcher):
  51     """
  52     This class extends the feedcore Fetcher with the gPodder User-Agent and the
  53     Proxy handler based on the current settings in gPodder and provides a
  54     convenience method (fetch_channel) for use by PodcastChannel objects.
  55     """
  56     custom_handlers = []
  57
  58     def __init__(self):
  59         feedcore.Fetcher.__init__(self, gpodder.user_agent)
  60
  61     def fetch_channel(self, channel):
  62         etag = channel.etag
  63         modified = feedparser._parse_date(channel.last_modified)
  64         # If we have a username or password, rebuild the url with them included
  65         # Note: using a HTTPBasicAuthHandler would be pain because we need to
  66         # know the realm. It can be done, but I think this method works, too
  67         url = channel.authenticate_url(channel.url)
  68         for handler in self.custom_handlers:
  69             custom_feed = handler.handle_url(url)
  70             if custom_feed is not None:
  71                 raise CustomFeed(custom_feed)
  72         self.fetch(url, etag, modified)
  73
  74     def _resolve_url(self, url):
  75         return youtube.get_real_channel_url(url)
  76
  77     @classmethod
  78     def register(cls, handler):
  79         cls.custom_handlers.append(handler)
  80
  81 #    def _get_handlers(self):
  82 #        # Add a ProxyHandler for fetching data via a proxy server
  83 #        proxies = {'http': 'http://proxy.example.org:8080'}
  84 #        return[urllib2.ProxyHandler(proxies))]
  85
  86 # The "register" method is exposed here for external usage
  87 register_custom_handler = gPodderFetcher.register
  88
  89 class PodcastModelObject(object):
  90     """
  91     A generic base class for our podcast model providing common helper
  92     and utility functions.
  93     """
  94
  95     @classmethod
  96     def create_from_dict(cls, d, *args):
  97         """
  98         Create a new object, passing "args" to the constructor
  99         and then updating the object with the values from "d".
 100         """
 101         o = cls(*args)
 102         o.update_from_dict(d)
 103         return o
 104
 105     def update_from_dict(self, d):
 106         """
 107         Updates the attributes of this object with values from the
 108         dictionary "d" by using the keys found in "d".
 109         """
 110         for k in d:
 111             if hasattr(self, k):
 112                 setattr(self, k, d[k])
 113
 114
 115 class PodcastChannel(PodcastModelObject):
 116     """holds data for a complete channel"""
 117     MAX_FOLDERNAME_LENGTH = 150
 118     SECONDS_PER_WEEK = 7*24*60*60
 119
 120     feed_fetcher = gPodderFetcher()
 121
 122     @classmethod
 123     def build_factory(cls, download_dir):
 124         def factory(dict, db):
 125             return cls.create_from_dict(dict, db, download_dir)
 126         return factory
 127
 128     @classmethod
 129     def load_from_db(cls, db, download_dir):
 130         return db.load_channels(factory=cls.build_factory(download_dir))
 131
 132     @classmethod
 133     def load(cls, db, url, create=True, authentication_tokens=None,\
 134             max_episodes=0, download_dir=None, allow_empty_feeds=False, \
 135             mimetype_prefs=''):
 136         if isinstance(url, unicode):
 137             url = url.encode('utf-8')
 138
 139         tmp = db.load_channels(factory=cls.build_factory(download_dir), url=url)
 140         if len(tmp):
 141             return tmp[0]
 142         elif create:
 143             tmp = PodcastChannel(db, download_dir)
 144             tmp.url = url
 145             if authentication_tokens is not None:
 146                 tmp.username = authentication_tokens[0]
 147                 tmp.password = authentication_tokens[1]
 148
 149             tmp.update(max_episodes, mimetype_prefs)
 150             tmp.save()
 151             db.force_last_new(tmp)
 152             # Subscribing to empty feeds should yield an error (except if
 153             # the user specifically allows empty feeds in the config UI)
 154             if sum(tmp.get_statistics()) == 0 and not allow_empty_feeds:
 155                 tmp.delete()
 156                 raise Exception(_('No downloadable episodes in feed'))
 157             return tmp
 158
 159     def episode_factory(self, d, db__parameter_is_unused=None):
 160         """
 161         This function takes a dictionary containing key-value pairs for
 162         episodes and returns a new PodcastEpisode object that is connected
 163         to this PodcastChannel object.
 164
 165         Returns: A new PodcastEpisode object
 166         """
 167         return PodcastEpisode.create_from_dict(d, self)
 168
 169     def _consume_custom_feed(self, custom_feed, max_episodes=0):
 170         self.title = custom_feed.get_title()
 171         self.link = custom_feed.get_link()
 172         self.description = custom_feed.get_description()
 173         self.image = custom_feed.get_image()
 174         self.pubDate = time.time()
 175         self.save()
 176
 177         guids = [episode.guid for episode in self.get_all_episodes()]
 178
 179         # Insert newly-found episodes into the database
 180         custom_feed.get_new_episodes(self, guids)
 181
 182         self.save()
 183
 184         self.db.purge(max_episodes, self.id)
 185
 186     def _consume_updated_feed(self, feed, max_episodes=0, mimetype_prefs=''):
 187         self.parse_error = feed.get('bozo_exception', None)
 188
 189         # Replace multi-space and newlines with single space (Maemo bug 11173)
 190         self.title = re.sub('\s+', ' ', feed.feed.get('title', self.url))
 191
 192         self.link = feed.feed.get('link', self.link)
 193         self.description = feed.feed.get('subtitle', self.description)
 194         # Start YouTube-specific title FIX
 195         YOUTUBE_PREFIX = 'Uploads by '
 196         if self.title.startswith(YOUTUBE_PREFIX):
 197             self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
 198         # End YouTube-specific title FIX
 199
 200         try:
 201             self.pubDate = rfc822.mktime_tz(feed.feed.get('updated_parsed', None+(0,)))
 202         except:
 203             self.pubDate = time.time()
 204
 205         if hasattr(feed.feed, 'image'):
 206             for attribute in ('href', 'url'):
 207                 new_value = getattr(feed.feed.image, attribute, None)
 208                 if new_value is not None:
 209                     log('Found cover art in %s: %s', attribute, new_value)
 210                     self.image = new_value
 211
 212         if hasattr(feed.feed, 'icon'):
 213             self.image = feed.feed.icon
 214
 215         self.save()
 216
 217         # Load all episodes to update them properly.
 218         existing = self.get_all_episodes()
 219
 220         # We can limit the maximum number of entries that gPodder will parse
 221         if max_episodes > 0 and len(feed.entries) > max_episodes:
 222             # We have to sort the entries in descending chronological order,
 223             # because if the feed lists items in ascending order and has >
 224             # max_episodes old episodes, new episodes will not be shown.
 225             # See also: gPodder Bug 1186
 226             try:
 227                 entries = sorted(feed.entries, \
 228                         key=lambda x: x.get('updated_parsed', (0,)*9), \
 229                         reverse=True)[:max_episodes]
 230             except Exception, e:
 231                 log('Could not sort episodes: %s', e, sender=self, traceback=True)
 232                 entries = feed.entries[:max_episodes]
 233         else:
 234             entries = feed.entries
 235
 236         # Title + PubDate hashes for existing episodes
 237         existing_dupes = dict((e.duplicate_id(), e) for e in existing)
 238
 239         # GUID-based existing episode list
 240         existing_guids = dict((e.guid, e) for e in existing)
 241
 242         # Get most recent pubDate of all episodes
 243         last_pubdate = self.db.get_last_pubdate(self) or 0
 244
 245         # Search all entries for new episodes
 246         for entry in entries:
 247             try:
 248                 episode = PodcastEpisode.from_feedparser_entry(entry, self, mimetype_prefs)
 249                 if episode is not None and not episode.title:
 250                     episode.title, ext = os.path.splitext(os.path.basename(episode.url))
 251             except Exception, e:
 252                 log('Cannot instantiate episode: %s. Skipping.', e, sender=self, traceback=True)
 253                 continue
 254
 255             if episode is None:
 256                 continue
 257
 258             # Detect (and update) existing episode based on GUIDs
 259             existing_episode = existing_guids.get(episode.guid, None)
 260             if existing_episode:
 261                 existing_episode.update_from(episode)
 262                 existing_episode.save()
 263                 continue
 264
 265             # Detect (and update) existing episode based on duplicate ID
 266             existing_episode = existing_dupes.get(episode.duplicate_id(), None)
 267             if existing_episode:
 268                 if existing_episode.is_duplicate(episode):
 269                     existing_episode.update_from(episode)
 270                     existing_episode.save()
 271                     continue
 272
 273             # Workaround for bug 340: If the episode has been
 274             # published earlier than one week before the most
 275             # recent existing episode, do not mark it as new.
 276             if episode.pubDate < last_pubdate - self.SECONDS_PER_WEEK:
 277                 log('Episode with old date: %s', episode.title, sender=self)
 278                 episode.is_played = True
 279
 280             episode.save()
 281
 282         # Remove "unreachable" episodes - episodes that have not been
 283         # downloaded and that the feed does not list as downloadable anymore
 284         if self.id is not None:
 285             seen_guids = set(e.guid for e in feed.entries if hasattr(e, 'guid'))
 286             episodes_to_purge = (e for e in existing if \
 287                     e.state != gpodder.STATE_DOWNLOADED and \
 288                     e.guid not in seen_guids and e.guid is not None)
 289             for episode in episodes_to_purge:
 290                 log('Episode removed from feed: %s (%s)', episode.title, \
 291                         episode.guid, sender=self)
 292                 self.db.delete_episode_by_guid(episode.guid, self.id)
 293
 294         # This *might* cause episodes to be skipped if there were more than
 295         # max_episodes_per_feed items added to the feed between updates.
 296         # The benefit is that it prevents old episodes from apearing as new
 297         # in certain situations (see bug #340).
 298         self.db.purge(max_episodes, self.id)
 299
 300     def update_channel_lock(self):
 301         self.db.update_channel_lock(self)
 302
 303     def _update_etag_modified(self, feed):
 304         self.updated_timestamp = time.time()
 305         self.etag = feed.headers.get('etag', self.etag)
 306         self.last_modified = feed.headers.get('last-modified', self.last_modified)
 307
 308     def update(self, max_episodes=0, mimetype_prefs=''):
 309         try:
 310             self.feed_fetcher.fetch_channel(self)
 311         except CustomFeed, updated:
 312             custom_feed = updated.data
 313             self._consume_custom_feed(custom_feed, max_episodes)
 314             self.save()
 315         except feedcore.UpdatedFeed, updated:
 316             feed = updated.data
 317             self._consume_updated_feed(feed, max_episodes, mimetype_prefs)
 318             self._update_etag_modified(feed)
 319             self.save()
 320         except feedcore.NewLocation, updated:
 321             feed = updated.data
 322             self.url = feed.href
 323             self._consume_updated_feed(feed, max_episodes, mimetype_prefs)
 324             self._update_etag_modified(feed)
 325             self.save()
 326         except feedcore.NotModified, updated:
 327             feed = updated.data
 328             self._update_etag_modified(feed)
 329             self.save()
 330         except Exception, e:
 331             # "Not really" errors
 332             #feedcore.AuthenticationRequired
 333             # Temporary errors
 334             #feedcore.Offline
 335             #feedcore.BadRequest
 336             #feedcore.InternalServerError
 337             #feedcore.WifiLogin
 338             # Permanent errors
 339             #feedcore.Unsubscribe
 340             #feedcore.NotFound
 341             #feedcore.InvalidFeed
 342             #feedcore.UnknownStatusCode
 343             raise
 344
 345         if gpodder.user_hooks is not None:
 346             gpodder.user_hooks.on_podcast_updated(self)
 347
 348         self.db.commit()
 349
 350     def delete(self):
 351         self.db.delete_channel(self)
 352
 353     def save(self):
 354         if gpodder.user_hooks is not None:
 355             gpodder.user_hooks.on_podcast_save(self)
 356         if self.foldername is None:
 357             # get_save_dir() finds a unique value for foldername
 358             self.get_save_dir()
 359         self.db.save_channel(self)
 360
 361     def get_statistics(self):
 362         if self.id is None:
 363             return (0, 0, 0, 0, 0)
 364         else:
 365             return self.db.get_channel_count(int(self.id))
 366
 367     def authenticate_url(self, url):
 368         return util.url_add_authentication(url, self.username, self.password)
 369
 370     def __init__(self, db, download_dir):
 371         self.db = db
 372         self.download_dir = download_dir
 373         self.id = None
 374         self.url = None
 375         self.title = ''
 376         self.link = ''
 377         self.description = ''
 378         self.image = None
 379         self.pubDate = 0
 380         self.parse_error = None
 381         self.foldername = None
 382         self.auto_foldername = 1 # automatically generated foldername
 383
 384         # should this channel be synced to devices? (ex: iPod)
 385         self.sync_to_devices = True
 386         # to which playlist should be synced
 387         self.device_playlist_name = 'gPodder'
 388         # if set, this overrides the channel-provided title
 389         self.override_title = ''
 390         self.username = ''
 391         self.password = ''
 392
 393         self.last_modified = None
 394         self.etag = None
 395
 396         self.save_dir_size = 0
 397         self.__save_dir_size_set = False
 398
 399         self.channel_is_locked = False
 400
 401         self.release_expected = time.time() # <= DEPRECATED
 402         self.release_deviation = 0 # <= DEPRECATED
 403         self.updated_timestamp = 0
 404
 405         self.feed_update_enabled = True
 406
 407     def request_save_dir_size(self):
 408         if not self.__save_dir_size_set:
 409             self.update_save_dir_size()
 410         self.__save_dir_size_set = True
 411
 412     def update_save_dir_size(self):
 413         self.save_dir_size = util.calculate_size(self.save_dir)
 414
 415     def get_title( self):
 416         if self.override_title:
 417             return self.override_title
 418         elif not self.__title.strip():
 419             return self.url
 420         else:
 421             return self.__title
 422
 423     def set_title( self, value):
 424         self.__title = value.strip()
 425
 426     title = property(fget=get_title,
 427                      fset=set_title)
 428
 429     def set_custom_title( self, custom_title):
 430         custom_title = custom_title.strip()
 431
 432         # if the custom title is the same as we have
 433         if custom_title == self.override_title:
 434             return
 435
 436         # if custom title is the same as channel title and we didn't have a custom title
 437         if custom_title == self.__title and self.override_title == '':
 438             return
 439
 440         # make sure self.foldername is initialized
 441         self.get_save_dir()
 442
 443         # rename folder if custom_title looks sane
 444         new_folder_name = self.find_unique_folder_name(custom_title)
 445         if len(new_folder_name) > 0 and new_folder_name != self.foldername:
 446             log('Changing foldername based on custom title: %s', custom_title, sender=self)
 447             new_folder = os.path.join(self.download_dir, new_folder_name)
 448             old_folder = os.path.join(self.download_dir, self.foldername)
 449             if os.path.exists(old_folder):
 450                 if not os.path.exists(new_folder):
 451                     # Old folder exists, new folder does not -> simply rename
 452                     log('Renaming %s => %s', old_folder, new_folder, sender=self)
 453                     os.rename(old_folder, new_folder)
 454                 else:
 455                     # Both folders exist -> move files and delete old folder
 456                     log('Moving files from %s to %s', old_folder, new_folder, sender=self)
 457                     for file in glob.glob(os.path.join(old_folder, '*')):
 458                         shutil.move(file, new_folder)
 459                     log('Removing %s', old_folder, sender=self)
 460                     shutil.rmtree(old_folder, ignore_errors=True)
 461             self.foldername = new_folder_name
 462             self.save()
 463
 464         if custom_title != self.__title:
 465             self.override_title = custom_title
 466         else:
 467             self.override_title = ''
 468
 469     def get_downloaded_episodes(self):
 470         return self.db.load_episodes(self, factory=self.episode_factory, state=gpodder.STATE_DOWNLOADED)
 471
 472     def get_new_episodes(self, downloading=lambda e: False):
 473         """
 474         Get a list of new episodes. You can optionally specify
 475         "downloading" as a callback that takes an episode as
 476         a parameter and returns True if the episode is currently
 477         being downloaded or False if not.
 478
 479         By default, "downloading" is implemented so that it
 480         reports all episodes as not downloading.
 481         """
 482         return [episode for episode in self.db.load_episodes(self, \
 483                 factory=self.episode_factory, state=gpodder.STATE_NORMAL) if \
 484                 episode.check_is_new(downloading=downloading)]
 485
 486     def get_playlist_filename(self):
 487         # If the save_dir doesn't end with a slash (which it really should
 488         # not, if the implementation is correct, we can just append .m3u :)
 489         assert self.save_dir[-1] != '/'
 490         return self.save_dir+'.m3u'
 491
 492     def update_m3u_playlist(self):
 493         m3u_filename = self.get_playlist_filename()
 494
 495         downloaded_episodes = self.get_downloaded_episodes()
 496         if not downloaded_episodes:
 497             log('No episodes - removing %s', m3u_filename, sender=self)
 498             util.delete_file(m3u_filename)
 499             return
 500
 501         log('Writing playlist to %s', m3u_filename, sender=self)
 502         util.write_m3u_playlist(m3u_filename, \
 503                 PodcastEpisode.sort_by_pubdate(downloaded_episodes))
 504
 505     def get_episode_by_url(self, url):
 506         return self.db.load_single_episode(self, \
 507                 factory=self.episode_factory, url=url)
 508
 509     def get_episode_by_filename(self, filename):
 510         return self.db.load_single_episode(self, \
 511                 factory=self.episode_factory, filename=filename)
 512
 513     def get_all_episodes(self):
 514         return self.db.load_episodes(self, factory=self.episode_factory)
 515
 516     def find_unique_folder_name(self, foldername):
 517         # Remove trailing dots to avoid errors on Windows (bug 600)
 518         foldername = foldername.strip().rstrip('.')
 519
 520         current_try = util.sanitize_filename(foldername, \
 521                 self.MAX_FOLDERNAME_LENGTH)
 522         next_try_id = 2
 523
 524         while True:
 525             if self.db.channel_foldername_exists(current_try):
 526                 current_try = '%s (%d)' % (foldername, next_try_id)
 527                 next_try_id += 1
 528             else:
 529                 return current_try
 530
 531     def get_save_dir(self):
 532         urldigest = hashlib.md5(self.url).hexdigest()
 533         sanitizedurl = util.sanitize_filename(self.url, self.MAX_FOLDERNAME_LENGTH)
 534         if self.foldername is None or (self.auto_foldername and (self.foldername == urldigest or self.foldername.startswith(sanitizedurl))):
 535             # we must change the folder name, because it has not been set manually
 536             fn_template = util.sanitize_filename(self.title, self.MAX_FOLDERNAME_LENGTH)
 537
 538             # if this is an empty string, try the basename
 539             if len(fn_template) == 0:
 540                 log('That is one ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
 541                 fn_template = util.sanitize_filename(os.path.basename(self.url), self.MAX_FOLDERNAME_LENGTH)
 542
 543             # If the basename is also empty, use the first 6 md5 hexdigest chars of the URL
 544             if len(fn_template) == 0:
 545                 log('That is one REALLY ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
 546                 fn_template = urldigest # no need for sanitize_filename here
 547
 548             # Find a unique folder name for this podcast
 549             wanted_foldername = self.find_unique_folder_name(fn_template)
 550
 551             # if the foldername has not been set, check if the (old) md5 filename exists
 552             if self.foldername is None and os.path.exists(os.path.join(self.download_dir, urldigest)):
 553                 log('Found pre-0.15.0 download folder for %s: %s', self.title, urldigest, sender=self)
 554                 self.foldername = urldigest
 555
 556             # we have a valid, new folder name in "current_try" -> use that!
 557             if self.foldername is not None and wanted_foldername != self.foldername:
 558                 # there might be an old download folder crawling around - move it!
 559                 new_folder_name = os.path.join(self.download_dir, wanted_foldername)
 560                 old_folder_name = os.path.join(self.download_dir, self.foldername)
 561                 if os.path.exists(old_folder_name):
 562                     if not os.path.exists(new_folder_name):
 563                         # Old folder exists, new folder does not -> simply rename
 564                         log('Renaming %s => %s', old_folder_name, new_folder_name, sender=self)
 565                         os.rename(old_folder_name, new_folder_name)
 566                     else:
 567                         # Both folders exist -> move files and delete old folder
 568                         log('Moving files from %s to %s', old_folder_name, new_folder_name, sender=self)
 569                         for file in glob.glob(os.path.join(old_folder_name, '*')):
 570                             shutil.move(file, new_folder_name)
 571                         log('Removing %s', old_folder_name, sender=self)
 572                         shutil.rmtree(old_folder_name, ignore_errors=True)
 573             log('Updating foldername of %s to "%s".', self.url, wanted_foldername, sender=self)
 574             self.foldername = wanted_foldername
 575             self.save()
 576
 577         save_dir = os.path.join(self.download_dir, self.foldername)
 578
 579         # Create save_dir if it does not yet exist
 580         if not util.make_directory( save_dir):
 581             log( 'Could not create save_dir: %s', save_dir, sender = self)
 582
 583         return save_dir
 584
 585     save_dir = property(fget=get_save_dir)
 586
 587     def remove_downloaded(self):
 588         # Remove the playlist file if it exists
 589         m3u_filename = self.get_playlist_filename()
 590         if os.path.exists(m3u_filename):
 591             util.delete_file(m3u_filename)
 592
 593         # Remove the download directory
 594         shutil.rmtree(self.save_dir, True)
 595
 596     @property
 597     def cover_file(self):
 598         new_name = os.path.join(self.save_dir, 'folder.jpg')
 599         if not os.path.exists(new_name):
 600             old_names = ('cover', '.cover')
 601             for old_name in old_names:
 602                 filename = os.path.join(self.save_dir, old_name)
 603                 if os.path.exists(filename):
 604                     shutil.move(filename, new_name)
 605                     return new_name
 606
 607         return new_name
 608
 609     def delete_episode(self, episode):
 610         filename = episode.local_filename(create=False, check_only=True)
 611         if filename is not None:
 612             util.delete_file(filename)
 613
 614         episode.set_state(gpodder.STATE_DELETED)
 615
 616
 617 class PodcastEpisode(PodcastModelObject):
 618     """holds data for one object in a channel"""
 619     MAX_FILENAME_LENGTH = 200
 620
 621     def _get_played(self):
 622         return self.is_played
 623
 624     def _set_played(self, played):
 625         self.is_played = played
 626
 627     # Alias "is_played" to "played" for DB column mapping
 628     played = property(fget=_get_played, fset=_set_played)
 629
 630     def _get_locked(self):
 631         return self.is_locked
 632
 633     def _set_locked(self, locked):
 634         self.is_locked = locked
 635
 636     # Alias "is_locked" to "locked" for DB column mapping
 637     locked = property(fget=_get_locked, fset=_set_locked)
 638
 639     def _get_channel_id(self):
 640         return self.channel.id
 641
 642     def _set_channel_id(self, channel_id):
 643         assert self.channel.id == channel_id
 644
 645     # Accessor for the "channel_id" DB column
 646     channel_id = property(fget=_get_channel_id, fset=_set_channel_id)
 647
 648     @staticmethod
 649     def sort_by_pubdate(episodes, reverse=False):
 650         """Sort a list of PodcastEpisode objects chronologically
 651
 652         Returns a iterable, sorted sequence of the episodes
 653         """
 654         key_pubdate = lambda e: e.pubDate
 655         return sorted(episodes, key=key_pubdate, reverse=reverse)
 656
 657     def reload_from_db(self):
 658         """
 659         Re-reads all episode details for this object from the
 660         database and updates this object accordingly. Can be
 661         used to refresh existing objects when the database has
 662         been updated (e.g. the filename has been set after a
 663         download where it was not set before the download)
 664         """
 665         d = self.db.load_episode(self.id)
 666         self.update_from_dict(d or {})
 667         return self
 668
 669     def has_website_link(self):
 670         return bool(self.link) and (self.link != self.url or \
 671                 youtube.is_video_link(self.link))
 672
 673     @staticmethod
 674     def from_feedparser_entry(entry, channel, mimetype_prefs=''):
 675         episode = PodcastEpisode(channel)
 676
 677         # Replace multi-space and newlines with single space (Maemo bug 11173)
 678         episode.title = re.sub('\s+', ' ', entry.get('title', ''))
 679         episode.link = entry.get('link', '')
 680         if 'content' in entry and len(entry['content']) and \
 681                 entry['content'][0].get('type', '') == 'text/html':
 682             episode.description = entry['content'][0].value
 683         else:
 684             episode.description = entry.get('summary', '')
 685
 686         try:
 687             # Parse iTunes-specific podcast duration metadata
 688             total_time = util.parse_time(entry.get('itunes_duration', ''))
 689             episode.total_time = total_time
 690         except:
 691             pass
 692
 693         # Fallback to subtitle if summary is not available0
 694         if not episode.description:
 695             episode.description = entry.get('subtitle', '')
 696
 697         episode.guid = entry.get('id', '')
 698         if entry.get('updated_parsed', None):
 699             episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,))
 700
 701         enclosures = entry.get('enclosures', ())
 702         audio_available = any(e.get('type', '').startswith('audio/') \
 703                 for e in enclosures)
 704         video_available = any(e.get('type', '').startswith('video/') \
 705                 for e in enclosures)
 706
 707         # Create the list of preferred mime types
 708         mimetype_prefs = mimetype_prefs.split(',')
 709
 710         def calculate_preference_value(enclosure):
 711             """Calculate preference value of an enclosure
 712
 713             This is based on mime types and allows users to prefer
 714             certain mime types over others (e.g. MP3 over AAC, ...)
 715             """
 716             mimetype = enclosure.get('type', None)
 717             try:
 718                 # If the mime type is found, return its (zero-based) index
 719                 return mimetype_prefs.index(mimetype)
 720             except ValueError:
 721                 # If it is not found, assume it comes after all listed items
 722                 return len(mimetype_prefs)
 723
 724         # Enclosures
 725         for e in sorted(enclosures, key=calculate_preference_value):
 726             episode.mimetype = e.get('type', 'application/octet-stream')
 727             if episode.mimetype == '':
 728                 # See Maemo bug 10036
 729                 log('Fixing empty mimetype in ugly feed', sender=episode)
 730                 episode.mimetype = 'application/octet-stream'
 731
 732             if '/' not in episode.mimetype:
 733                 continue
 734
 735             # Skip images in feeds if audio or video is available (bug 979)
 736             if episode.mimetype.startswith('image/') and \
 737                     (audio_available or video_available):
 738                 continue
 739
 740             episode.url = util.normalize_feed_url(e.get('href', ''))
 741             if not episode.url:
 742                 continue
 743
 744             try:
 745                 episode.length = int(e.length) or -1
 746             except:
 747                 episode.length = -1
 748
 749             return episode
 750
 751         # Media RSS content
 752         for m in entry.get('media_content', ()):
 753             episode.mimetype = m.get('type', 'application/octet-stream')
 754             if '/' not in episode.mimetype:
 755                 continue
 756
 757             episode.url = util.normalize_feed_url(m.get('url', ''))
 758             if not episode.url:
 759                 continue
 760
 761             try:
 762                 episode.length = int(m.fileSize) or -1
 763             except:
 764                 episode.length = -1
 765
 766             return episode
 767
 768         # Brute-force detection of any links
 769         for l in entry.get('links', ()):
 770             episode.url = util.normalize_feed_url(l.get('href', ''))
 771             if not episode.url:
 772                 continue
 773
 774             if youtube.is_video_link(episode.url):
 775                 return episode
 776
 777             # Check if we can resolve this link to a audio/video file
 778             filename, extension = util.filename_from_url(episode.url)
 779             file_type = util.file_type_by_extension(extension)
 780             if file_type is None and hasattr(l, 'type'):
 781                 extension = util.extension_from_mimetype(l.type)
 782                 file_type = util.file_type_by_extension(extension)
 783
 784             # The link points to a audio or video file - use it!
 785             if file_type is not None:
 786                 return episode
 787
 788         # Scan MP3 links in description text
 789         mp3s = re.compile(r'http://[^"]*\.mp3')
 790         for content in entry.get('content', ()):
 791             html = content.value
 792             for match in mp3s.finditer(html):
 793                 episode.url = match.group(0)
 794                 return episode
 795
 796         return None
 797
 798     def __init__(self, channel):
 799         self.db = channel.db
 800         # Used by Storage for faster saving
 801         self.id = None
 802         self.url = ''
 803         self.title = ''
 804         self.length = 0
 805         self.mimetype = 'application/octet-stream'
 806         self.guid = ''
 807         self.description = ''
 808         self.link = ''
 809         self.channel = channel
 810         self.pubDate = 0
 811         self.filename = None
 812         self.auto_filename = 1 # automatically generated filename
 813
 814         self.state = gpodder.STATE_NORMAL
 815         self.is_played = False
 816
 817         # Initialize the "is_locked" property
 818         self._is_locked = False
 819         self.is_locked = channel.channel_is_locked
 820
 821         # Time attributes
 822         self.total_time = 0
 823         self.current_position = 0
 824         self.current_position_updated = 0
 825
 826     def get_is_locked(self):
 827         return self._is_locked
 828
 829     def set_is_locked(self, is_locked):
 830         self._is_locked = bool(is_locked)
 831
 832     is_locked = property(fget=get_is_locked, fset=set_is_locked)
 833
 834     def save(self):
 835         if self.state != gpodder.STATE_DOWNLOADED and self.file_exists():
 836             self.state = gpodder.STATE_DOWNLOADED
 837         if gpodder.user_hooks is not None:
 838             gpodder.user_hooks.on_episode_save(self)
 839         self.db.save_episode(self)
 840
 841     def on_downloaded(self, filename):
 842         self.state = gpodder.STATE_DOWNLOADED
 843         self.is_played = False
 844         self.length = os.path.getsize(filename)
 845
 846         if not self.total_time:
 847             try:
 848                 length = gstreamer.get_track_length(filename)
 849                 if length is not None:
 850                     length = int(length/1000)
 851                     log('Detected media length: %d seconds', length, \
 852                             sender=self)
 853                     self.total_time = length
 854                     self.db.save_episode(self)
 855                     self.db.commit()
 856                     return
 857             except Exception, e:
 858                 log('Error while detecting media length: %s', str(e), \
 859                         sender=self)
 860
 861         self.db.save_downloaded_episode(self)
 862         self.db.commit()
 863
 864     def set_state(self, state):
 865         self.state = state
 866         self.db.update_episode_state(self)
 867
 868     def mark(self, state=None, is_played=None, is_locked=None):
 869         if state is not None:
 870             self.state = state
 871         if is_played is not None:
 872             self.is_played = is_played
 873         if is_locked is not None:
 874             self.is_locked = is_locked
 875         self.db.update_episode_state(self)
 876
 877     @property
 878     def title_markup(self):
 879         return '%s\n<small>%s</small>' % (xml.sax.saxutils.escape(self.title),
 880                           xml.sax.saxutils.escape(self.channel.title))
 881
 882     @property
 883     def maemo_markup(self):
 884         if self.length > 0:
 885             length_str = '%s; ' % self.filesize_prop
 886         else:
 887             length_str = ''
 888         return ('<b>%s</b>\n<small>%s'+_('released %s')+ \
 889                 '; '+_('from %s')+'</small>') % (\
 890                 xml.sax.saxutils.escape(re.sub('\s+', ' ', self.title)), \
 891                 xml.sax.saxutils.escape(length_str), \
 892                 xml.sax.saxutils.escape(self.pubdate_prop), \
 893                 xml.sax.saxutils.escape(re.sub('\s+', ' ', self.channel.title)))
 894
 895     @property
 896     def maemo_remove_markup(self):
 897         if self.total_time and self.current_position:
 898             played_string = self.get_play_info_string()
 899         elif self.is_played:
 900             played_string = _('played')
 901         else:
 902             played_string = _('unplayed')
 903         downloaded_string = self.get_age_string()
 904         if not downloaded_string:
 905             downloaded_string = _('today')
 906         return ('<b>%s</b>\n<small>%s; %s; '+_('downloaded %s')+ \
 907                 '; '+_('from %s')+'</small>') % (\
 908                 xml.sax.saxutils.escape(self.title), \
 909                 xml.sax.saxutils.escape(self.filesize_prop), \
 910                 xml.sax.saxutils.escape(played_string), \
 911                 xml.sax.saxutils.escape(downloaded_string), \
 912                 xml.sax.saxutils.escape(self.channel.title))
 913
 914     def age_in_days(self):
 915         return util.file_age_in_days(self.local_filename(create=False, \
 916                 check_only=True))
 917
 918     age_int_prop = property(fget=age_in_days)
 919
 920     def get_age_string(self):
 921         return util.file_age_to_string(self.age_in_days())
 922
 923     age_prop = property(fget=get_age_string)
 924
 925     def one_line_description(self):
 926         MAX_LINE_LENGTH = 120
 927         desc = util.remove_html_tags(self.description or '')
 928         desc = re.sub('\n', ' ', desc).strip()
 929         if not desc:
 930             return _('No description available')
 931         else:
 932             if len(desc) > MAX_LINE_LENGTH:
 933                 return desc[:MAX_LINE_LENGTH] + '...'
 934             else:
 935                 return desc
 936
 937     def delete_from_disk(self):
 938         try:
 939             self.channel.delete_episode(self)
 940         except:
 941             log('Cannot delete episode from disk: %s', self.title, traceback=True, sender=self)
 942
 943     def find_unique_file_name(self, url, filename, extension):
 944         current_try = util.sanitize_filename(filename, self.MAX_FILENAME_LENGTH)+extension
 945         next_try_id = 2
 946         lookup_url = None
 947
 948         if self.filename == current_try and current_try is not None:
 949             # We already have this filename - good!
 950             return current_try
 951
 952         while self.db.episode_filename_exists(current_try):
 953             current_try = '%s (%d)%s' % (filename, next_try_id, extension)
 954             next_try_id += 1
 955
 956         return current_try
 957
 958     def local_filename(self, create, force_update=False, check_only=False,
 959             template=None):
 960         """Get (and possibly generate) the local saving filename
 961
 962         Pass create=True if you want this function to generate a
 963         new filename if none exists. You only want to do this when
 964         planning to create/download the file after calling this function.
 965
 966         Normally, you should pass create=False. This will only
 967         create a filename when the file already exists from a previous
 968         version of gPodder (where we used md5 filenames). If the file
 969         does not exist (and the filename also does not exist), this
 970         function will return None.
 971
 972         If you pass force_update=True to this function, it will try to
 973         find a new (better) filename and move the current file if this
 974         is the case. This is useful if (during the download) you get
 975         more information about the file, e.g. the mimetype and you want
 976         to include this information in the file name generation process.
 977
 978         If check_only=True is passed to this function, it will never try
 979         to rename the file, even if would be a good idea. Use this if you
 980         only want to check if a file exists.
 981
 982         If "template" is specified, it should be a filename that is to
 983         be used as a template for generating the "real" filename.
 984
 985         The generated filename is stored in the database for future access.
 986         """
 987         ext = self.extension(may_call_local_filename=False).encode('utf-8', 'ignore')
 988
 989         # For compatibility with already-downloaded episodes, we
 990         # have to know md5 filenames if they are downloaded already
 991         urldigest = hashlib.md5(self.url).hexdigest()
 992
 993         if not create and self.filename is None:
 994             urldigest_filename = os.path.join(self.channel.save_dir, urldigest+ext)
 995             if os.path.exists(urldigest_filename):
 996                 # The file exists, so set it up in our database
 997                 log('Recovering pre-0.15.0 file: %s', urldigest_filename, sender=self)
 998                 self.filename = urldigest+ext
 999                 self.auto_filename = 1
1000                 self.save()
1001                 return urldigest_filename
1002             return None
1003
1004         # We only want to check if the file exists, so don't try to
1005         # rename the file, even if it would be reasonable. See also:
1006         # http://bugs.gpodder.org/attachment.cgi?id=236
1007         if check_only:
1008             if self.filename is None:
1009                 return None
1010             else:
1011                 return os.path.join(self.channel.save_dir, self.filename)
1012
1013         if self.filename is None or force_update or (self.auto_filename and self.filename == urldigest+ext):
1014             # Try to find a new filename for the current file
1015             if template is not None:
1016                 # If template is specified, trust the template's extension
1017                 episode_filename, ext = os.path.splitext(template)
1018             else:
1019                 episode_filename, extension_UNUSED = util.filename_from_url(self.url)
1020             fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
1021
1022             if 'redirect' in fn_template and template is None:
1023                 # This looks like a redirection URL - force URL resolving!
1024                 log('Looks like a redirection to me: %s', self.url, sender=self)
1025                 url = util.get_real_url(self.channel.authenticate_url(self.url))
1026                 log('Redirection resolved to: %s', url, sender=self)
1027                 (episode_filename, extension_UNUSED) = util.filename_from_url(url)
1028                 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
1029
1030             # Use the video title for YouTube downloads
1031             for yt_url in ('http://youtube.com/', 'http://www.youtube.com/'):
1032                 if self.url.startswith(yt_url):
1033                     fn_template = util.sanitize_filename(os.path.basename(self.title), self.MAX_FILENAME_LENGTH)
1034
1035             # Nicer download filenames for Soundcloud streams
1036             if fn_template == 'stream':
1037                 sanitized = util.sanitize_filename(self.title, self.MAX_FILENAME_LENGTH)
1038                 if sanitized:
1039                     fn_template = sanitized
1040
1041             # If the basename is empty, use the md5 hexdigest of the URL
1042             if len(fn_template) == 0 or fn_template.startswith('redirect.'):
1043                 log('Report to bugs.gpodder.org: Podcast at %s with episode URL: %s', self.channel.url, self.url, sender=self)
1044                 fn_template = urldigest
1045
1046             # Find a unique filename for this episode
1047             wanted_filename = self.find_unique_file_name(self.url, fn_template, ext)
1048
1049             # We populate the filename field the first time - does the old file still exist?
1050             if self.filename is None and os.path.exists(os.path.join(self.channel.save_dir, urldigest+ext)):
1051                 log('Found pre-0.15.0 downloaded file: %s', urldigest, sender=self)
1052                 self.filename = urldigest+ext
1053
1054             # The old file exists, but we have decided to want a different filename
1055             if self.filename is not None and wanted_filename != self.filename:
1056                 # there might be an old download folder crawling around - move it!
1057                 new_file_name = os.path.join(self.channel.save_dir, wanted_filename)
1058                 old_file_name = os.path.join(self.channel.save_dir, self.filename)
1059                 if os.path.exists(old_file_name) and not os.path.exists(new_file_name):
1060                     log('Renaming %s => %s', old_file_name, new_file_name, sender=self)
1061                     os.rename(old_file_name, new_file_name)
1062                 elif force_update and not os.path.exists(old_file_name):
1063                     # When we call force_update, the file might not yet exist when we
1064                     # call it from the downloading code before saving the file
1065                     log('Choosing new filename: %s', new_file_name, sender=self)
1066                 else:
1067                     log('Warning: %s exists or %s does not.', new_file_name, old_file_name, sender=self)
1068                 log('Updating filename of %s to "%s".', self.url, wanted_filename, sender=self)
1069             elif self.filename is None:
1070                 log('Setting filename to "%s".', wanted_filename, sender=self)
1071             else:
1072                 log('Should update filename. Stays the same (%s). Good!', \
1073                         wanted_filename, sender=self)
1074             self.filename = wanted_filename
1075             self.save()
1076             self.db.commit()
1077
1078         return os.path.join(self.channel.save_dir, self.filename)
1079
1080     def set_mimetype(self, mimetype, commit=False):
1081         """Sets the mimetype for this episode"""
1082         self.mimetype = mimetype
1083         if commit:
1084             self.db.commit()
1085
1086     def extension(self, may_call_local_filename=True):
1087         filename, ext = util.filename_from_url(self.url)
1088         if may_call_local_filename:
1089             filename = self.local_filename(create=False)
1090             if filename is not None:
1091                 filename, ext = os.path.splitext(filename)
1092         # if we can't detect the extension from the url fallback on the mimetype
1093         if ext == '' or util.file_type_by_extension(ext) is None:
1094             ext = util.extension_from_mimetype(self.mimetype)
1095         return ext
1096
1097     def check_is_new(self, downloading=lambda e: False):
1098         """
1099         Returns True if this episode is to be considered new.
1100         "Downloading" should be a callback that gets an episode
1101         as its parameter and returns True if the episode is
1102         being downloaded at the moment.
1103         """
1104         return self.state == gpodder.STATE_NORMAL and \
1105                 not self.is_played and \
1106                 not downloading(self)
1107
1108     def mark_new(self):
1109         self.state = gpodder.STATE_NORMAL
1110         self.is_played = False
1111         self.db.update_episode_state(self)
1112
1113     def mark_old(self):
1114         self.is_played = True
1115         self.db.update_episode_state(self)
1116
1117     def file_exists(self):
1118         filename = self.local_filename(create=False, check_only=True)
1119         if filename is None:
1120             return False
1121         else:
1122             return os.path.exists(filename)
1123
1124     def was_downloaded(self, and_exists=False):
1125         if self.state != gpodder.STATE_DOWNLOADED:
1126             return False
1127         if and_exists and not self.file_exists():
1128             return False
1129         return True
1130
1131     def sync_filename(self, use_custom=False, custom_format=None):
1132         if use_custom:
1133             return util.object_string_formatter(custom_format,
1134                     episode=self, podcast=self.channel)
1135         else:
1136             return self.title
1137
1138     def file_type(self):
1139         # Assume all YouTube links are video files
1140         if youtube.is_video_link(self.url):
1141             return 'video'
1142
1143         return util.file_type_by_extension(self.extension())
1144
1145     @property
1146     def basename( self):
1147         return os.path.splitext( os.path.basename( self.url))[0]
1148
1149     @property
1150     def published( self):
1151         """
1152         Returns published date as YYYYMMDD (or 00000000 if not available)
1153         """
1154         try:
1155             return datetime.datetime.fromtimestamp(self.pubDate).strftime('%Y%m%d')
1156         except:
1157             log( 'Cannot format pubDate for "%s".', self.title, sender = self)
1158             return '00000000'
1159
1160     @property
1161     def pubtime(self):
1162         """
1163         Returns published time as HHMM (or 0000 if not available)
1164         """
1165         try:
1166             return datetime.datetime.fromtimestamp(self.pubDate).strftime('%H%M')
1167         except:
1168             log('Cannot format pubDate (time) for "%s".', self.title, sender=self)
1169             return '0000'
1170
1171     def playlist_title(self):
1172         """Return a title for this episode in a playlist
1173
1174         The title will be composed of the podcast name, the
1175         episode name and the publication date. The return
1176         value is the canonical representation of this episode
1177         in playlists (for example, M3U playlists).
1178         """
1179         return '%s - %s (%s)' % (self.channel.title, \
1180                 self.title, \
1181                 self.cute_pubdate())
1182
1183     def cute_pubdate(self):
1184         result = util.format_date(self.pubDate)
1185         if result is None:
1186             return '(%s)' % _('unknown')
1187         else:
1188             return result
1189
1190     pubdate_prop = property(fget=cute_pubdate)
1191
1192     def calculate_filesize( self):
1193         filename = self.local_filename(create=False)
1194         if filename is None:
1195             log('calculate_filesized called, but filename is None!', sender=self)
1196         try:
1197             self.length = os.path.getsize(filename)
1198         except:
1199             log( 'Could not get filesize for %s.', self.url)
1200
1201     def is_finished(self):
1202         """Return True if this episode is considered "finished playing"
1203
1204         An episode is considered "finished" when there is a
1205         current position mark on the track, and when the
1206         current position is greater than 99 percent of the
1207         total time or inside the last 10 seconds of a track.
1208         """
1209         return self.current_position > 0 and \
1210                 (self.current_position + 10 >= self.total_time or \
1211                  self.current_position >= self.total_time*.99)
1212
1213     def get_play_info_string(self):
1214         if self.is_finished():
1215             return '%s (%s)' % (_('Finished'), self.get_duration_string(),)
1216         if self.current_position > 0:
1217             return '%s / %s' % (self.get_position_string(), \
1218                     self.get_duration_string())
1219         else:
1220             return self.get_duration_string()
1221
1222     def get_position_string(self):
1223         return util.format_time(self.current_position)
1224
1225     def get_duration_string(self):
1226         return util.format_time(self.total_time)
1227
1228     def get_filesize_string(self):
1229         return util.format_filesize(self.length)
1230
1231     filesize_prop = property(fget=get_filesize_string)
1232
1233     def get_played_string( self):
1234         if not self.is_played:
1235             return _('Unplayed')
1236
1237         return ''
1238
1239     played_prop = property(fget=get_played_string)
1240
1241     def is_duplicate(self, episode):
1242         if self.title == episode.title and self.pubDate == episode.pubDate:
1243             log('Possible duplicate detected: %s', self.title)
1244             return True
1245         return False
1246
1247     def duplicate_id(self):
1248         return hash((self.title, self.pubDate))
1249
1250     def update_from(self, episode):
1251         for k in ('title', 'url', 'description', 'link', 'pubDate', 'guid'):
1252             setattr(self, k, getattr(episode, k))
1253