src/gpodder/model.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # gPodder - A media aggregator and podcast client
   4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
   5 #
   6 # gPodder is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # gPodder is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19
  20
  21 #
  22 #  gpodder.model - Core model classes for gPodder (2009-08-13)
  23 #  Based on libpodcasts.py (thp, 2005-10-29)
  24 #
  25
  26 import gpodder
  27 from gpodder import util
  28 from gpodder import feedcore
  29 from gpodder import youtube
  30 from gpodder import corestats
  31 from gpodder import gstreamer
  32
  33 from gpodder.liblogger import log
  34
  35 import os
  36 import re
  37 import glob
  38 import shutil
  39 import time
  40 import datetime
  41 import rfc822
  42 import hashlib
  43 import feedparser
  44 import xml.sax.saxutils
  45
  46 _ = gpodder.gettext
  47
  48
  49 class CustomFeed(feedcore.ExceptionWithData): pass
  50
  51 class gPodderFetcher(feedcore.Fetcher):
  52     """
  53     This class extends the feedcore Fetcher with the gPodder User-Agent and the
  54     Proxy handler based on the current settings in gPodder and provides a
  55     convenience method (fetch_channel) for use by PodcastChannel objects.
  56     """
  57     custom_handlers = []
  58
  59     def __init__(self):
  60         feedcore.Fetcher.__init__(self, gpodder.user_agent)
  61
  62     def fetch_channel(self, channel):
  63         etag = channel.etag
  64         modified = feedparser._parse_date(channel.last_modified)
  65         # If we have a username or password, rebuild the url with them included
  66         # Note: using a HTTPBasicAuthHandler would be pain because we need to
  67         # know the realm. It can be done, but I think this method works, too
  68         url = channel.authenticate_url(channel.url)
  69         for handler in self.custom_handlers:
  70             custom_feed = handler.handle_url(url)
  71             if custom_feed is not None:
  72                 raise CustomFeed(custom_feed)
  73         self.fetch(url, etag, modified)
  74
  75     def _resolve_url(self, url):
  76         return youtube.get_real_channel_url(url)
  77
  78     @classmethod
  79     def register(cls, handler):
  80         cls.custom_handlers.append(handler)
  81
  82 #    def _get_handlers(self):
  83 #        # Add a ProxyHandler for fetching data via a proxy server
  84 #        proxies = {'http': 'http://proxy.example.org:8080'}
  85 #        return[urllib2.ProxyHandler(proxies))]
  86
  87 # The "register" method is exposed here for external usage
  88 register_custom_handler = gPodderFetcher.register
  89
  90 class PodcastModelObject(object):
  91     """
  92     A generic base class for our podcast model providing common helper
  93     and utility functions.
  94     """
  95
  96     @classmethod
  97     def create_from_dict(cls, d, *args):
  98         """
  99         Create a new object, passing "args" to the constructor
 100         and then updating the object with the values from "d".
 101         """
 102         o = cls(*args)
 103         o.update_from_dict(d)
 104         return o
 105
 106     def update_from_dict(self, d):
 107         """
 108         Updates the attributes of this object with values from the
 109         dictionary "d" by using the keys found in "d".
 110         """
 111         for k in d:
 112             if hasattr(self, k):
 113                 setattr(self, k, d[k])
 114
 115
 116 class PodcastChannel(PodcastModelObject):
 117     """holds data for a complete channel"""
 118     MAX_FOLDERNAME_LENGTH = 150
 119     SECONDS_PER_WEEK = 7*24*60*60
 120
 121     feed_fetcher = gPodderFetcher()
 122
 123     @classmethod
 124     def build_factory(cls, download_dir):
 125         def factory(dict, db):
 126             return cls.create_from_dict(dict, db, download_dir)
 127         return factory
 128
 129     @classmethod
 130     def load_from_db(cls, db, download_dir):
 131         return db.load_channels(factory=cls.build_factory(download_dir))
 132
 133     @classmethod
 134     def load(cls, db, url, create=True, authentication_tokens=None,\
 135             max_episodes=0, download_dir=None, allow_empty_feeds=False, \
 136             mimetype_prefs=''):
 137         if isinstance(url, unicode):
 138             url = url.encode('utf-8')
 139
 140         tmp = db.load_channels(factory=cls.build_factory(download_dir), url=url)
 141         if len(tmp):
 142             return tmp[0]
 143         elif create:
 144             tmp = PodcastChannel(db, download_dir)
 145             tmp.url = url
 146             if authentication_tokens is not None:
 147                 tmp.username = authentication_tokens[0]
 148                 tmp.password = authentication_tokens[1]
 149
 150             tmp.update(max_episodes, mimetype_prefs)
 151             tmp.save()
 152             db.force_last_new(tmp)
 153             # Subscribing to empty feeds should yield an error (except if
 154             # the user specifically allows empty feeds in the config UI)
 155             if sum(tmp.get_statistics()) == 0 and not allow_empty_feeds:
 156                 tmp.delete()
 157                 raise Exception(_('No downloadable episodes in feed'))
 158             return tmp
 159
 160     def episode_factory(self, d, db__parameter_is_unused=None):
 161         """
 162         This function takes a dictionary containing key-value pairs for
 163         episodes and returns a new PodcastEpisode object that is connected
 164         to this PodcastChannel object.
 165
 166         Returns: A new PodcastEpisode object
 167         """
 168         return PodcastEpisode.create_from_dict(d, self)
 169
 170     def _consume_custom_feed(self, custom_feed, max_episodes=0):
 171         self.title = custom_feed.get_title()
 172         self.link = custom_feed.get_link()
 173         self.description = custom_feed.get_description()
 174         self.image = custom_feed.get_image()
 175         self.pubDate = time.time()
 176         self.save()
 177
 178         guids = [episode.guid for episode in self.get_all_episodes()]
 179
 180         # Insert newly-found episodes into the database
 181         custom_feed.get_new_episodes(self, guids)
 182
 183         self.save()
 184
 185         self.db.purge(max_episodes, self.id)
 186
 187     def _consume_updated_feed(self, feed, max_episodes=0, mimetype_prefs=''):
 188         self.parse_error = feed.get('bozo_exception', None)
 189
 190         # Replace multi-space and newlines with single space (Maemo bug 11173)
 191         self.title = re.sub('\s+', ' ', feed.feed.get('title', self.url))
 192
 193         self.link = feed.feed.get('link', self.link)
 194         self.description = feed.feed.get('subtitle', self.description)
 195         # Start YouTube-specific title FIX
 196         YOUTUBE_PREFIX = 'Uploads by '
 197         if self.title.startswith(YOUTUBE_PREFIX):
 198             self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
 199         # End YouTube-specific title FIX
 200
 201         try:
 202             self.pubDate = rfc822.mktime_tz(feed.feed.get('updated_parsed', None+(0,)))
 203         except:
 204             self.pubDate = time.time()
 205
 206         if hasattr(feed.feed, 'image'):
 207             for attribute in ('href', 'url'):
 208                 new_value = getattr(feed.feed.image, attribute, None)
 209                 if new_value is not None:
 210                     log('Found cover art in %s: %s', attribute, new_value)
 211                     self.image = new_value
 212
 213         if hasattr(feed.feed, 'icon'):
 214             self.image = feed.feed.icon
 215
 216         self.save()
 217
 218         # Load all episodes to update them properly.
 219         existing = self.get_all_episodes()
 220
 221         # We can limit the maximum number of entries that gPodder will parse
 222         if max_episodes > 0 and len(feed.entries) > max_episodes:
 223             entries = feed.entries[:max_episodes]
 224         else:
 225             entries = feed.entries
 226
 227         # Title + PubDate hashes for existing episodes
 228         existing_dupes = dict((e.duplicate_id(), e) for e in existing)
 229
 230         # GUID-based existing episode list
 231         existing_guids = dict((e.guid, e) for e in existing)
 232
 233         # Get most recent pubDate of all episodes
 234         last_pubdate = self.db.get_last_pubdate(self) or 0
 235
 236         # Search all entries for new episodes
 237         for entry in entries:
 238             try:
 239                 episode = PodcastEpisode.from_feedparser_entry(entry, self, mimetype_prefs)
 240                 if episode is not None and not episode.title:
 241                     episode.title, ext = os.path.splitext(os.path.basename(episode.url))
 242             except Exception, e:
 243                 log('Cannot instantiate episode: %s. Skipping.', e, sender=self, traceback=True)
 244                 continue
 245
 246             if episode is None:
 247                 continue
 248
 249             # Detect (and update) existing episode based on GUIDs
 250             existing_episode = existing_guids.get(episode.guid, None)
 251             if existing_episode:
 252                 existing_episode.update_from(episode)
 253                 existing_episode.save()
 254                 continue
 255
 256             # Detect (and update) existing episode based on duplicate ID
 257             existing_episode = existing_dupes.get(episode.duplicate_id(), None)
 258             if existing_episode:
 259                 if existing_episode.is_duplicate(episode):
 260                     existing_episode.update_from(episode)
 261                     existing_episode.save()
 262                     continue
 263
 264             # Workaround for bug 340: If the episode has been
 265             # published earlier than one week before the most
 266             # recent existing episode, do not mark it as new.
 267             if episode.pubDate < last_pubdate - self.SECONDS_PER_WEEK:
 268                 log('Episode with old date: %s', episode.title, sender=self)
 269                 episode.is_played = True
 270
 271             episode.save()
 272
 273         # Remove "unreachable" episodes - episodes that have not been
 274         # downloaded and that the feed does not list as downloadable anymore
 275         if self.id is not None:
 276             seen_guids = set(e.guid for e in feed.entries if hasattr(e, 'guid'))
 277             episodes_to_purge = (e for e in existing if \
 278                     e.state != gpodder.STATE_DOWNLOADED and \
 279                     e.guid not in seen_guids and e.guid is not None)
 280             for episode in episodes_to_purge:
 281                 log('Episode removed from feed: %s (%s)', episode.title, \
 282                         episode.guid, sender=self)
 283                 self.db.delete_episode_by_guid(episode.guid, self.id)
 284
 285         # This *might* cause episodes to be skipped if there were more than
 286         # max_episodes_per_feed items added to the feed between updates.
 287         # The benefit is that it prevents old episodes from apearing as new
 288         # in certain situations (see bug #340).
 289         self.db.purge(max_episodes, self.id)
 290
 291     def update_channel_lock(self):
 292         self.db.update_channel_lock(self)
 293
 294     def _update_etag_modified(self, feed):
 295         self.updated_timestamp = time.time()
 296         self.calculate_publish_behaviour()
 297         self.etag = feed.headers.get('etag', self.etag)
 298         self.last_modified = feed.headers.get('last-modified', self.last_modified)
 299
 300     def query_automatic_update(self):
 301         """Query if this channel should be updated automatically
 302
 303         Returns True if the update should happen in automatic
 304         mode or False if this channel should be skipped (timeout
 305         not yet reached or release not expected right now).
 306         """
 307         updated = self.updated_timestamp
 308         expected = self.release_expected
 309
 310         now = time.time()
 311         one_day_ago = now - 60*60*24
 312         lastcheck = now - 60*10
 313
 314         return updated < one_day_ago or \
 315                 (expected < now and updated < lastcheck)
 316
 317     def update(self, max_episodes=0, mimetype_prefs=''):
 318         try:
 319             self.feed_fetcher.fetch_channel(self)
 320         except CustomFeed, updated:
 321             custom_feed = updated.data
 322             self._consume_custom_feed(custom_feed, max_episodes)
 323             self.save()
 324         except feedcore.UpdatedFeed, updated:
 325             feed = updated.data
 326             self._consume_updated_feed(feed, max_episodes, mimetype_prefs)
 327             self._update_etag_modified(feed)
 328             self.save()
 329         except feedcore.NewLocation, updated:
 330             feed = updated.data
 331             self.url = feed.href
 332             self._consume_updated_feed(feed, max_episodes, mimetype_prefs)
 333             self._update_etag_modified(feed)
 334             self.save()
 335         except feedcore.NotModified, updated:
 336             feed = updated.data
 337             self._update_etag_modified(feed)
 338             self.save()
 339         except Exception, e:
 340             # "Not really" errors
 341             #feedcore.AuthenticationRequired
 342             # Temporary errors
 343             #feedcore.Offline
 344             #feedcore.BadRequest
 345             #feedcore.InternalServerError
 346             #feedcore.WifiLogin
 347             # Permanent errors
 348             #feedcore.Unsubscribe
 349             #feedcore.NotFound
 350             #feedcore.InvalidFeed
 351             #feedcore.UnknownStatusCode
 352             raise
 353
 354         if gpodder.user_hooks is not None:
 355             gpodder.user_hooks.on_podcast_updated(self)
 356
 357         self.db.commit()
 358
 359     def delete(self):
 360         self.db.delete_channel(self)
 361
 362     def save(self):
 363         if gpodder.user_hooks is not None:
 364             gpodder.user_hooks.on_podcast_save(self)
 365         if self.foldername is None:
 366             # get_save_dir() finds a unique value for foldername
 367             self.get_save_dir()
 368         self.db.save_channel(self)
 369
 370     def get_statistics(self):
 371         if self.id is None:
 372             return (0, 0, 0, 0, 0)
 373         else:
 374             return self.db.get_channel_count(int(self.id))
 375
 376     def authenticate_url(self, url):
 377         return util.url_add_authentication(url, self.username, self.password)
 378
 379     def __init__(self, db, download_dir):
 380         self.db = db
 381         self.download_dir = download_dir
 382         self.id = None
 383         self.url = None
 384         self.title = ''
 385         self.link = ''
 386         self.description = ''
 387         self.image = None
 388         self.pubDate = 0
 389         self.parse_error = None
 390         self.foldername = None
 391         self.auto_foldername = 1 # automatically generated foldername
 392
 393         # should this channel be synced to devices? (ex: iPod)
 394         self.sync_to_devices = True
 395         # to which playlist should be synced
 396         self.device_playlist_name = 'gPodder'
 397         # if set, this overrides the channel-provided title
 398         self.override_title = ''
 399         self.username = ''
 400         self.password = ''
 401
 402         self.last_modified = None
 403         self.etag = None
 404
 405         self.save_dir_size = 0
 406         self.__save_dir_size_set = False
 407
 408         self.channel_is_locked = False
 409
 410         self.release_expected = time.time()
 411         self.release_deviation = 0
 412         self.updated_timestamp = 0
 413         self.feed_update_enabled = True
 414
 415     def calculate_publish_behaviour(self):
 416         episodes = self.db.load_episodes(self, factory=self.episode_factory, limit=30)
 417         if len(episodes) < 3:
 418             return
 419
 420         deltas = []
 421         latest = max(e.pubDate for e in episodes)
 422         for index in range(len(episodes)-1):
 423             if episodes[index].pubDate != 0 and episodes[index+1].pubDate != 0:
 424                 deltas.append(episodes[index].pubDate - episodes[index+1].pubDate)
 425
 426         if len(deltas) > 1:
 427             stats = corestats.Stats(deltas)
 428             self.release_expected = min([latest+stats.stdev(), latest+(stats.min()+stats.avg())*.5])
 429             self.release_deviation = stats.stdev()
 430         else:
 431             self.release_expected = latest
 432             self.release_deviation = 0
 433
 434     def request_save_dir_size(self):
 435         if not self.__save_dir_size_set:
 436             self.update_save_dir_size()
 437         self.__save_dir_size_set = True
 438
 439     def update_save_dir_size(self):
 440         self.save_dir_size = util.calculate_size(self.save_dir)
 441
 442     def get_title( self):
 443         if self.override_title:
 444             return self.override_title
 445         elif not self.__title.strip():
 446             return self.url
 447         else:
 448             return self.__title
 449
 450     def set_title( self, value):
 451         self.__title = value.strip()
 452
 453     title = property(fget=get_title,
 454                      fset=set_title)
 455
 456     def set_custom_title( self, custom_title):
 457         custom_title = custom_title.strip()
 458
 459         # if the custom title is the same as we have
 460         if custom_title == self.override_title:
 461             return
 462
 463         # if custom title is the same as channel title and we didn't have a custom title
 464         if custom_title == self.__title and self.override_title == '':
 465             return
 466
 467         # make sure self.foldername is initialized
 468         self.get_save_dir()
 469
 470         # rename folder if custom_title looks sane
 471         new_folder_name = self.find_unique_folder_name(custom_title)
 472         if len(new_folder_name) > 0 and new_folder_name != self.foldername:
 473             log('Changing foldername based on custom title: %s', custom_title, sender=self)
 474             new_folder = os.path.join(self.download_dir, new_folder_name)
 475             old_folder = os.path.join(self.download_dir, self.foldername)
 476             if os.path.exists(old_folder):
 477                 if not os.path.exists(new_folder):
 478                     # Old folder exists, new folder does not -> simply rename
 479                     log('Renaming %s => %s', old_folder, new_folder, sender=self)
 480                     os.rename(old_folder, new_folder)
 481                 else:
 482                     # Both folders exist -> move files and delete old folder
 483                     log('Moving files from %s to %s', old_folder, new_folder, sender=self)
 484                     for file in glob.glob(os.path.join(old_folder, '*')):
 485                         shutil.move(file, new_folder)
 486                     log('Removing %s', old_folder, sender=self)
 487                     shutil.rmtree(old_folder, ignore_errors=True)
 488             self.foldername = new_folder_name
 489             self.save()
 490
 491         if custom_title != self.__title:
 492             self.override_title = custom_title
 493         else:
 494             self.override_title = ''
 495
 496     def get_downloaded_episodes(self):
 497         return self.db.load_episodes(self, factory=self.episode_factory, state=gpodder.STATE_DOWNLOADED)
 498
 499     def get_new_episodes(self, downloading=lambda e: False):
 500         """
 501         Get a list of new episodes. You can optionally specify
 502         "downloading" as a callback that takes an episode as
 503         a parameter and returns True if the episode is currently
 504         being downloaded or False if not.
 505
 506         By default, "downloading" is implemented so that it
 507         reports all episodes as not downloading.
 508         """
 509         return [episode for episode in self.db.load_episodes(self, \
 510                 factory=self.episode_factory, state=gpodder.STATE_NORMAL) if \
 511                 episode.check_is_new(downloading=downloading)]
 512
 513     def get_playlist_filename(self):
 514         # If the save_dir doesn't end with a slash (which it really should
 515         # not, if the implementation is correct, we can just append .m3u :)
 516         assert self.save_dir[-1] != '/'
 517         return self.save_dir+'.m3u'
 518
 519     def update_m3u_playlist(self):
 520         m3u_filename = self.get_playlist_filename()
 521
 522         downloaded_episodes = self.get_downloaded_episodes()
 523         if not downloaded_episodes:
 524             log('No episodes - removing %s', m3u_filename, sender=self)
 525             util.delete_file(m3u_filename)
 526             return
 527
 528         log('Writing playlist to %s', m3u_filename, sender=self)
 529         util.write_m3u_playlist(m3u_filename, \
 530                 PodcastEpisode.sort_by_pubdate(downloaded_episodes))
 531
 532     def get_episode_by_url(self, url):
 533         return self.db.load_single_episode(self, \
 534                 factory=self.episode_factory, url=url)
 535
 536     def get_episode_by_filename(self, filename):
 537         return self.db.load_single_episode(self, \
 538                 factory=self.episode_factory, filename=filename)
 539
 540     def get_all_episodes(self):
 541         return self.db.load_episodes(self, factory=self.episode_factory)
 542
 543     def find_unique_folder_name(self, foldername):
 544         # Remove trailing dots to avoid errors on Windows (bug 600)
 545         foldername = foldername.strip().rstrip('.')
 546
 547         current_try = util.sanitize_filename(foldername, \
 548                 self.MAX_FOLDERNAME_LENGTH)
 549         next_try_id = 2
 550
 551         while True:
 552             if self.db.channel_foldername_exists(current_try):
 553                 current_try = '%s (%d)' % (foldername, next_try_id)
 554                 next_try_id += 1
 555             else:
 556                 return current_try
 557
 558     def get_save_dir(self):
 559         urldigest = hashlib.md5(self.url).hexdigest()
 560         sanitizedurl = util.sanitize_filename(self.url, self.MAX_FOLDERNAME_LENGTH)
 561         if self.foldername is None or (self.auto_foldername and (self.foldername == urldigest or self.foldername.startswith(sanitizedurl))):
 562             # we must change the folder name, because it has not been set manually
 563             fn_template = util.sanitize_filename(self.title, self.MAX_FOLDERNAME_LENGTH)
 564
 565             # if this is an empty string, try the basename
 566             if len(fn_template) == 0:
 567                 log('That is one ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
 568                 fn_template = util.sanitize_filename(os.path.basename(self.url), self.MAX_FOLDERNAME_LENGTH)
 569
 570             # If the basename is also empty, use the first 6 md5 hexdigest chars of the URL
 571             if len(fn_template) == 0:
 572                 log('That is one REALLY ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
 573                 fn_template = urldigest # no need for sanitize_filename here
 574
 575             # Find a unique folder name for this podcast
 576             wanted_foldername = self.find_unique_folder_name(fn_template)
 577
 578             # if the foldername has not been set, check if the (old) md5 filename exists
 579             if self.foldername is None and os.path.exists(os.path.join(self.download_dir, urldigest)):
 580                 log('Found pre-0.15.0 download folder for %s: %s', self.title, urldigest, sender=self)
 581                 self.foldername = urldigest
 582
 583             # we have a valid, new folder name in "current_try" -> use that!
 584             if self.foldername is not None and wanted_foldername != self.foldername:
 585                 # there might be an old download folder crawling around - move it!
 586                 new_folder_name = os.path.join(self.download_dir, wanted_foldername)
 587                 old_folder_name = os.path.join(self.download_dir, self.foldername)
 588                 if os.path.exists(old_folder_name):
 589                     if not os.path.exists(new_folder_name):
 590                         # Old folder exists, new folder does not -> simply rename
 591                         log('Renaming %s => %s', old_folder_name, new_folder_name, sender=self)
 592                         os.rename(old_folder_name, new_folder_name)
 593                     else:
 594                         # Both folders exist -> move files and delete old folder
 595                         log('Moving files from %s to %s', old_folder_name, new_folder_name, sender=self)
 596                         for file in glob.glob(os.path.join(old_folder_name, '*')):
 597                             shutil.move(file, new_folder_name)
 598                         log('Removing %s', old_folder_name, sender=self)
 599                         shutil.rmtree(old_folder_name, ignore_errors=True)
 600             log('Updating foldername of %s to "%s".', self.url, wanted_foldername, sender=self)
 601             self.foldername = wanted_foldername
 602             self.save()
 603
 604         save_dir = os.path.join(self.download_dir, self.foldername)
 605
 606         # Create save_dir if it does not yet exist
 607         if not util.make_directory( save_dir):
 608             log( 'Could not create save_dir: %s', save_dir, sender = self)
 609
 610         return save_dir
 611
 612     save_dir = property(fget=get_save_dir)
 613
 614     def remove_downloaded(self):
 615         # Remove the playlist file if it exists
 616         m3u_filename = self.get_playlist_filename()
 617         if os.path.exists(m3u_filename):
 618             util.delete_file(m3u_filename)
 619
 620         # Remove the download directory
 621         shutil.rmtree(self.save_dir, True)
 622
 623     @property
 624     def cover_file(self):
 625         new_name = os.path.join(self.save_dir, 'folder.jpg')
 626         if not os.path.exists(new_name):
 627             old_names = ('cover', '.cover')
 628             for old_name in old_names:
 629                 filename = os.path.join(self.save_dir, old_name)
 630                 if os.path.exists(filename):
 631                     shutil.move(filename, new_name)
 632                     return new_name
 633
 634         return new_name
 635
 636     def delete_episode(self, episode):
 637         filename = episode.local_filename(create=False, check_only=True)
 638         if filename is not None:
 639             util.delete_file(filename)
 640
 641         episode.set_state(gpodder.STATE_DELETED)
 642
 643
 644 class PodcastEpisode(PodcastModelObject):
 645     """holds data for one object in a channel"""
 646     MAX_FILENAME_LENGTH = 200
 647
 648     def _get_played(self):
 649         return self.is_played
 650
 651     def _set_played(self, played):
 652         self.is_played = played
 653
 654     # Alias "is_played" to "played" for DB column mapping
 655     played = property(fget=_get_played, fset=_set_played)
 656
 657     def _get_locked(self):
 658         return self.is_locked
 659
 660     def _set_locked(self, locked):
 661         self.is_locked = locked
 662
 663     # Alias "is_locked" to "locked" for DB column mapping
 664     locked = property(fget=_get_locked, fset=_set_locked)
 665
 666     def _get_channel_id(self):
 667         return self.channel.id
 668
 669     def _set_channel_id(self, channel_id):
 670         assert self.channel.id == channel_id
 671
 672     # Accessor for the "channel_id" DB column
 673     channel_id = property(fget=_get_channel_id, fset=_set_channel_id)
 674
 675     @staticmethod
 676     def sort_by_pubdate(episodes, reverse=False):
 677         """Sort a list of PodcastEpisode objects chronologically
 678
 679         Returns a iterable, sorted sequence of the episodes
 680         """
 681         key_pubdate = lambda e: e.pubDate
 682         return sorted(episodes, key=key_pubdate, reverse=reverse)
 683
 684     def reload_from_db(self):
 685         """
 686         Re-reads all episode details for this object from the
 687         database and updates this object accordingly. Can be
 688         used to refresh existing objects when the database has
 689         been updated (e.g. the filename has been set after a
 690         download where it was not set before the download)
 691         """
 692         d = self.db.load_episode(self.id)
 693         self.update_from_dict(d or {})
 694         return self
 695
 696     def has_website_link(self):
 697         return bool(self.link) and (self.link != self.url or \
 698                 youtube.is_video_link(self.link))
 699
 700     @staticmethod
 701     def from_feedparser_entry(entry, channel, mimetype_prefs=''):
 702         episode = PodcastEpisode(channel)
 703
 704         # Replace multi-space and newlines with single space (Maemo bug 11173)
 705         episode.title = re.sub('\s+', ' ', entry.get('title', ''))
 706         episode.link = entry.get('link', '')
 707         if 'content' in entry and len(entry['content']) and \
 708                 entry['content'][0].type == 'text/html':
 709             episode.description = entry['content'][0].value
 710         else:
 711             episode.description = entry.get('summary', '')
 712
 713         try:
 714             # Parse iTunes-specific podcast duration metadata
 715             total_time = util.parse_time(entry.get('itunes_duration', ''))
 716             episode.total_time = total_time
 717         except:
 718             pass
 719
 720         # Fallback to subtitle if summary is not available0
 721         if not episode.description:
 722             episode.description = entry.get('subtitle', '')
 723
 724         episode.guid = entry.get('id', '')
 725         if entry.get('updated_parsed', None):
 726             episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,))
 727
 728         enclosures = entry.get('enclosures', ())
 729         audio_available = any(e.get('type', '').startswith('audio/') \
 730                 for e in enclosures)
 731         video_available = any(e.get('type', '').startswith('video/') \
 732                 for e in enclosures)
 733
 734         # Create the list of preferred mime types
 735         mimetype_prefs = mimetype_prefs.split(',')
 736
 737         def calculate_preference_value(enclosure):
 738             """Calculate preference value of an enclosure
 739
 740             This is based on mime types and allows users to prefer
 741             certain mime types over others (e.g. MP3 over AAC, ...)
 742             """
 743             mimetype = enclosure.get('type', None)
 744             try:
 745                 # If the mime type is found, return its (zero-based) index
 746                 return mimetype_prefs.index(mimetype)
 747             except ValueError:
 748                 # If it is not found, assume it comes after all listed items
 749                 return len(mimetype_prefs)
 750
 751         # Enclosures
 752         for e in sorted(enclosures, key=calculate_preference_value):
 753             episode.mimetype = e.get('type', 'application/octet-stream')
 754             if episode.mimetype == '':
 755                 # See Maemo bug 10036
 756                 log('Fixing empty mimetype in ugly feed', sender=episode)
 757                 episode.mimetype = 'application/octet-stream'
 758
 759             if '/' not in episode.mimetype:
 760                 continue
 761
 762             # Skip images in feeds if audio or video is available (bug 979)
 763             if episode.mimetype.startswith('image/') and \
 764                     (audio_available or video_available):
 765                 continue
 766
 767             episode.url = util.normalize_feed_url(e.get('href', ''))
 768             if not episode.url:
 769                 continue
 770
 771             try:
 772                 episode.length = int(e.length) or -1
 773             except:
 774                 episode.length = -1
 775
 776             return episode
 777
 778         # Media RSS content
 779         for m in entry.get('media_content', ()):
 780             episode.mimetype = m.get('type', 'application/octet-stream')
 781             if '/' not in episode.mimetype:
 782                 continue
 783
 784             episode.url = util.normalize_feed_url(m.get('url', ''))
 785             if not episode.url:
 786                 continue
 787
 788             try:
 789                 episode.length = int(m.fileSize) or -1
 790             except:
 791                 episode.length = -1
 792
 793             return episode
 794
 795         # Brute-force detection of any links
 796         for l in entry.get('links', ()):
 797             episode.url = util.normalize_feed_url(l.get('href', ''))
 798             if not episode.url:
 799                 continue
 800
 801             if youtube.is_video_link(episode.url):
 802                 return episode
 803
 804             # Check if we can resolve this link to a audio/video file
 805             filename, extension = util.filename_from_url(episode.url)
 806             file_type = util.file_type_by_extension(extension)
 807             if file_type is None and hasattr(l, 'type'):
 808                 extension = util.extension_from_mimetype(l.type)
 809                 file_type = util.file_type_by_extension(extension)
 810
 811             # The link points to a audio or video file - use it!
 812             if file_type is not None:
 813                 return episode
 814
 815         # Scan MP3 links in description text
 816         mp3s = re.compile(r'http://[^"]*\.mp3')
 817         for content in entry.get('content', ()):
 818             html = content.value
 819             for match in mp3s.finditer(html):
 820                 episode.url = match.group(0)
 821                 return episode
 822
 823         return None
 824
 825     def __init__(self, channel):
 826         self.db = channel.db
 827         # Used by Storage for faster saving
 828         self.id = None
 829         self.url = ''
 830         self.title = ''
 831         self.length = 0
 832         self.mimetype = 'application/octet-stream'
 833         self.guid = ''
 834         self.description = ''
 835         self.link = ''
 836         self.channel = channel
 837         self.pubDate = 0
 838         self.filename = None
 839         self.auto_filename = 1 # automatically generated filename
 840
 841         self.state = gpodder.STATE_NORMAL
 842         self.is_played = False
 843
 844         # Initialize the "is_locked" property
 845         self._is_locked = False
 846         self.is_locked = channel.channel_is_locked
 847
 848         # Time attributes
 849         self.total_time = 0
 850         self.current_position = 0
 851         self.current_position_updated = 0
 852
 853     def get_is_locked(self):
 854         return self._is_locked
 855
 856     def set_is_locked(self, is_locked):
 857         self._is_locked = bool(is_locked)
 858
 859     is_locked = property(fget=get_is_locked, fset=set_is_locked)
 860
 861     def save(self):
 862         if self.state != gpodder.STATE_DOWNLOADED and self.file_exists():
 863             self.state = gpodder.STATE_DOWNLOADED
 864         if gpodder.user_hooks is not None:
 865             gpodder.user_hooks.on_episode_save(self)
 866         self.db.save_episode(self)
 867
 868     def on_downloaded(self, filename):
 869         self.state = gpodder.STATE_DOWNLOADED
 870         self.is_played = False
 871         self.length = os.path.getsize(filename)
 872
 873         if not self.total_time:
 874             try:
 875                 length = gstreamer.get_track_length(filename)
 876                 if length is not None:
 877                     length = int(length/1000)
 878                     log('Detected media length: %d seconds', length, \
 879                             sender=self)
 880                     self.total_time = length
 881                     self.db.save_episode(self)
 882                     self.db.commit()
 883                     return
 884             except Exception, e:
 885                 log('Error while detecting media length: %s', str(e), \
 886                         sender=self)
 887
 888         self.db.save_downloaded_episode(self)
 889         self.db.commit()
 890
 891     def set_state(self, state):
 892         self.state = state
 893         self.db.update_episode_state(self)
 894
 895     def mark(self, state=None, is_played=None, is_locked=None):
 896         if state is not None:
 897             self.state = state
 898         if is_played is not None:
 899             self.is_played = is_played
 900         if is_locked is not None:
 901             self.is_locked = is_locked
 902         self.db.update_episode_state(self)
 903
 904     @property
 905     def title_markup(self):
 906         return '%s\n<small>%s</small>' % (xml.sax.saxutils.escape(self.title),
 907                           xml.sax.saxutils.escape(self.channel.title))
 908
 909     @property
 910     def maemo_markup(self):
 911         if self.length > 0:
 912             length_str = '%s; ' % self.filesize_prop
 913         else:
 914             length_str = ''
 915         return ('<b>%s</b>\n<small>%s'+_('released %s')+ \
 916                 '; '+_('from %s')+'</small>') % (\
 917                 xml.sax.saxutils.escape(re.sub('\s+', ' ', self.title)), \
 918                 xml.sax.saxutils.escape(length_str), \
 919                 xml.sax.saxutils.escape(self.pubdate_prop), \
 920                 xml.sax.saxutils.escape(re.sub('\s+', ' ', self.channel.title)))
 921
 922     @property
 923     def maemo_remove_markup(self):
 924         if self.is_played:
 925             played_string = _('played')
 926         else:
 927             played_string = _('unplayed')
 928         downloaded_string = self.get_age_string()
 929         if not downloaded_string:
 930             downloaded_string = _('today')
 931         return ('<b>%s</b>\n<small>%s; %s; '+_('downloaded %s')+ \
 932                 '; '+_('from %s')+'</small>') % (\
 933                 xml.sax.saxutils.escape(self.title), \
 934                 xml.sax.saxutils.escape(self.filesize_prop), \
 935                 xml.sax.saxutils.escape(played_string), \
 936                 xml.sax.saxutils.escape(downloaded_string), \
 937                 xml.sax.saxutils.escape(self.channel.title))
 938
 939     def age_in_days(self):
 940         return util.file_age_in_days(self.local_filename(create=False, \
 941                 check_only=True))
 942
 943     age_int_prop = property(fget=age_in_days)
 944
 945     def get_age_string(self):
 946         return util.file_age_to_string(self.age_in_days())
 947
 948     age_prop = property(fget=get_age_string)
 949
 950     def one_line_description( self):
 951         lines = util.remove_html_tags(self.description or '').strip().splitlines()
 952         if not lines or lines[0] == '':
 953             return _('No description available')
 954         else:
 955             return ' '.join(lines)
 956
 957     def delete_from_disk(self):
 958         try:
 959             self.channel.delete_episode(self)
 960         except:
 961             log('Cannot delete episode from disk: %s', self.title, traceback=True, sender=self)
 962
 963     def find_unique_file_name(self, url, filename, extension):
 964         current_try = util.sanitize_filename(filename, self.MAX_FILENAME_LENGTH)+extension
 965         next_try_id = 2
 966         lookup_url = None
 967
 968         if self.filename == current_try and current_try is not None:
 969             # We already have this filename - good!
 970             return current_try
 971
 972         while self.db.episode_filename_exists(current_try):
 973             current_try = '%s (%d)%s' % (filename, next_try_id, extension)
 974             next_try_id += 1
 975
 976         return current_try
 977
 978     def local_filename(self, create, force_update=False, check_only=False,
 979             template=None):
 980         """Get (and possibly generate) the local saving filename
 981
 982         Pass create=True if you want this function to generate a
 983         new filename if none exists. You only want to do this when
 984         planning to create/download the file after calling this function.
 985
 986         Normally, you should pass create=False. This will only
 987         create a filename when the file already exists from a previous
 988         version of gPodder (where we used md5 filenames). If the file
 989         does not exist (and the filename also does not exist), this
 990         function will return None.
 991
 992         If you pass force_update=True to this function, it will try to
 993         find a new (better) filename and move the current file if this
 994         is the case. This is useful if (during the download) you get
 995         more information about the file, e.g. the mimetype and you want
 996         to include this information in the file name generation process.
 997
 998         If check_only=True is passed to this function, it will never try
 999         to rename the file, even if would be a good idea. Use this if you
1000         only want to check if a file exists.
1001
1002         If "template" is specified, it should be a filename that is to
1003         be used as a template for generating the "real" filename.
1004
1005         The generated filename is stored in the database for future access.
1006         """
1007         ext = self.extension(may_call_local_filename=False).encode('utf-8', 'ignore')
1008
1009         # For compatibility with already-downloaded episodes, we
1010         # have to know md5 filenames if they are downloaded already
1011         urldigest = hashlib.md5(self.url).hexdigest()
1012
1013         if not create and self.filename is None:
1014             urldigest_filename = os.path.join(self.channel.save_dir, urldigest+ext)
1015             if os.path.exists(urldigest_filename):
1016                 # The file exists, so set it up in our database
1017                 log('Recovering pre-0.15.0 file: %s', urldigest_filename, sender=self)
1018                 self.filename = urldigest+ext
1019                 self.auto_filename = 1
1020                 self.save()
1021                 return urldigest_filename
1022             return None
1023
1024         # We only want to check if the file exists, so don't try to
1025         # rename the file, even if it would be reasonable. See also:
1026         # http://bugs.gpodder.org/attachment.cgi?id=236
1027         if check_only:
1028             if self.filename is None:
1029                 return None
1030             else:
1031                 return os.path.join(self.channel.save_dir, self.filename)
1032
1033         if self.filename is None or force_update or (self.auto_filename and self.filename == urldigest+ext):
1034             # Try to find a new filename for the current file
1035             if template is not None:
1036                 # If template is specified, trust the template's extension
1037                 episode_filename, ext = os.path.splitext(template)
1038             else:
1039                 episode_filename, extension_UNUSED = util.filename_from_url(self.url)
1040             fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
1041
1042             if 'redirect' in fn_template and template is None:
1043                 # This looks like a redirection URL - force URL resolving!
1044                 log('Looks like a redirection to me: %s', self.url, sender=self)
1045                 url = util.get_real_url(self.channel.authenticate_url(self.url))
1046                 log('Redirection resolved to: %s', url, sender=self)
1047                 (episode_filename, extension_UNUSED) = util.filename_from_url(url)
1048                 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
1049
1050             # Use the video title for YouTube downloads
1051             for yt_url in ('http://youtube.com/', 'http://www.youtube.com/'):
1052                 if self.url.startswith(yt_url):
1053                     fn_template = util.sanitize_filename(os.path.basename(self.title), self.MAX_FILENAME_LENGTH)
1054
1055             # If the basename is empty, use the md5 hexdigest of the URL
1056             if len(fn_template) == 0 or fn_template.startswith('redirect.'):
1057                 log('Report to bugs.gpodder.org: Podcast at %s with episode URL: %s', self.channel.url, self.url, sender=self)
1058                 fn_template = urldigest
1059
1060             # Find a unique filename for this episode
1061             wanted_filename = self.find_unique_file_name(self.url, fn_template, ext)
1062
1063             # We populate the filename field the first time - does the old file still exist?
1064             if self.filename is None and os.path.exists(os.path.join(self.channel.save_dir, urldigest+ext)):
1065                 log('Found pre-0.15.0 downloaded file: %s', urldigest, sender=self)
1066                 self.filename = urldigest+ext
1067
1068             # The old file exists, but we have decided to want a different filename
1069             if self.filename is not None and wanted_filename != self.filename:
1070                 # there might be an old download folder crawling around - move it!
1071                 new_file_name = os.path.join(self.channel.save_dir, wanted_filename)
1072                 old_file_name = os.path.join(self.channel.save_dir, self.filename)
1073                 if os.path.exists(old_file_name) and not os.path.exists(new_file_name):
1074                     log('Renaming %s => %s', old_file_name, new_file_name, sender=self)
1075                     os.rename(old_file_name, new_file_name)
1076                 elif force_update and not os.path.exists(old_file_name):
1077                     # When we call force_update, the file might not yet exist when we
1078                     # call it from the downloading code before saving the file
1079                     log('Choosing new filename: %s', new_file_name, sender=self)
1080                 else:
1081                     log('Warning: %s exists or %s does not.', new_file_name, old_file_name, sender=self)
1082                 log('Updating filename of %s to "%s".', self.url, wanted_filename, sender=self)
1083             elif self.filename is None:
1084                 log('Setting filename to "%s".', wanted_filename, sender=self)
1085             else:
1086                 log('Should update filename. Stays the same (%s). Good!', \
1087                         wanted_filename, sender=self)
1088             self.filename = wanted_filename
1089             self.save()
1090             self.db.commit()
1091
1092         return os.path.join(self.channel.save_dir, self.filename)
1093
1094     def set_mimetype(self, mimetype, commit=False):
1095         """Sets the mimetype for this episode"""
1096         self.mimetype = mimetype
1097         if commit:
1098             self.db.commit()
1099
1100     def extension(self, may_call_local_filename=True):
1101         filename, ext = util.filename_from_url(self.url)
1102         if may_call_local_filename:
1103             filename = self.local_filename(create=False)
1104             if filename is not None:
1105                 filename, ext = os.path.splitext(filename)
1106         # if we can't detect the extension from the url fallback on the mimetype
1107         if ext == '' or util.file_type_by_extension(ext) is None:
1108             ext = util.extension_from_mimetype(self.mimetype)
1109         return ext
1110
1111     def check_is_new(self, downloading=lambda e: False):
1112         """
1113         Returns True if this episode is to be considered new.
1114         "Downloading" should be a callback that gets an episode
1115         as its parameter and returns True if the episode is
1116         being downloaded at the moment.
1117         """
1118         return self.state == gpodder.STATE_NORMAL and \
1119                 not self.is_played and \
1120                 not downloading(self)
1121
1122     def mark_new(self):
1123         self.state = gpodder.STATE_NORMAL
1124         self.is_played = False
1125         self.db.update_episode_state(self)
1126
1127     def mark_old(self):
1128         self.is_played = True
1129         self.db.update_episode_state(self)
1130
1131     def file_exists(self):
1132         filename = self.local_filename(create=False, check_only=True)
1133         if filename is None:
1134             return False
1135         else:
1136             return os.path.exists(filename)
1137
1138     def was_downloaded(self, and_exists=False):
1139         if self.state != gpodder.STATE_DOWNLOADED:
1140             return False
1141         if and_exists and not self.file_exists():
1142             return False
1143         return True
1144
1145     def sync_filename(self, use_custom=False, custom_format=None):
1146         if use_custom:
1147             return util.object_string_formatter(custom_format,
1148                     episode=self, podcast=self.channel)
1149         else:
1150             return self.title
1151
1152     def file_type(self):
1153         # Assume all YouTube links are video files
1154         if youtube.is_video_link(self.url):
1155             return 'video'
1156
1157         return util.file_type_by_extension(self.extension())
1158
1159     @property
1160     def basename( self):
1161         return os.path.splitext( os.path.basename( self.url))[0]
1162
1163     @property
1164     def published( self):
1165         """
1166         Returns published date as YYYYMMDD (or 00000000 if not available)
1167         """
1168         try:
1169             return datetime.datetime.fromtimestamp(self.pubDate).strftime('%Y%m%d')
1170         except:
1171             log( 'Cannot format pubDate for "%s".', self.title, sender = self)
1172             return '00000000'
1173
1174     @property
1175     def pubtime(self):
1176         """
1177         Returns published time as HHMM (or 0000 if not available)
1178         """
1179         try:
1180             return datetime.datetime.fromtimestamp(self.pubDate).strftime('%H%M')
1181         except:
1182             log('Cannot format pubDate (time) for "%s".', self.title, sender=self)
1183             return '0000'
1184
1185     def playlist_title(self):
1186         """Return a title for this episode in a playlist
1187
1188         The title will be composed of the podcast name, the
1189         episode name and the publication date. The return
1190         value is the canonical representation of this episode
1191         in playlists (for example, M3U playlists).
1192         """
1193         return '%s - %s (%s)' % (self.channel.title, \
1194                 self.title, \
1195                 self.cute_pubdate())
1196
1197     def cute_pubdate(self):
1198         result = util.format_date(self.pubDate)
1199         if result is None:
1200             return '(%s)' % _('unknown')
1201         else:
1202             return result
1203
1204     pubdate_prop = property(fget=cute_pubdate)
1205
1206     def calculate_filesize( self):
1207         filename = self.local_filename(create=False)
1208         if filename is None:
1209             log('calculate_filesized called, but filename is None!', sender=self)
1210         try:
1211             self.length = os.path.getsize(filename)
1212         except:
1213             log( 'Could not get filesize for %s.', self.url)
1214
1215     def get_play_info_string(self):
1216         if self.current_position > 0 and \
1217                 self.total_time <= self.current_position:
1218             return '%s (%s)' % (_('Finished'), self.get_duration_string(),)
1219         if self.current_position > 0:
1220             return '%s / %s' % (self.get_position_string(), \
1221                     self.get_duration_string())
1222         else:
1223             return self.get_duration_string()
1224
1225     def get_position_string(self):
1226         return util.format_time(self.current_position)
1227
1228     def get_duration_string(self):
1229         return util.format_time(self.total_time)
1230
1231     def get_filesize_string(self):
1232         return util.format_filesize(self.length)
1233
1234     filesize_prop = property(fget=get_filesize_string)
1235
1236     def get_played_string( self):
1237         if not self.is_played:
1238             return _('Unplayed')
1239
1240         return ''
1241
1242     played_prop = property(fget=get_played_string)
1243
1244     def is_duplicate(self, episode):
1245         if self.title == episode.title and self.pubDate == episode.pubDate:
1246             log('Possible duplicate detected: %s', self.title)
1247             return True
1248         return False
1249
1250     def duplicate_id(self):
1251         return hash((self.title, self.pubDate))
1252
1253     def update_from(self, episode):
1254         for k in ('title', 'url', 'description', 'link', 'pubDate', 'guid'):
1255             setattr(self, k, getattr(episode, k))
1256