src/gpodder/model.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # gPodder - A media aggregator and podcast client
   4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
   5 #
   6 # gPodder is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # gPodder is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19
  20
  21 #
  22 #  gpodder.model - Core model classes for gPodder (2009-08-13)
  23 #  Based on libpodcasts.py (thp, 2005-10-29)
  24 #
  25
  26 import gpodder
  27 from gpodder import util
  28 from gpodder import feedcore
  29 from gpodder import youtube
  30 from gpodder import corestats
  31 from gpodder import gstreamer
  32
  33 from gpodder.liblogger import log
  34
  35 import os
  36 import re
  37 import glob
  38 import shutil
  39 import time
  40 import datetime
  41 import rfc822
  42 import hashlib
  43 import feedparser
  44 import xml.sax.saxutils
  45
  46 _ = gpodder.gettext
  47
  48
  49 class CustomFeed(feedcore.ExceptionWithData): pass
  50
  51 class gPodderFetcher(feedcore.Fetcher):
  52     """
  53     This class extends the feedcore Fetcher with the gPodder User-Agent and the
  54     Proxy handler based on the current settings in gPodder and provides a
  55     convenience method (fetch_channel) for use by PodcastChannel objects.
  56     """
  57     custom_handlers = []
  58
  59     def __init__(self):
  60         feedcore.Fetcher.__init__(self, gpodder.user_agent)
  61
  62     def fetch_channel(self, channel):
  63         etag = channel.etag
  64         modified = feedparser._parse_date(channel.last_modified)
  65         # If we have a username or password, rebuild the url with them included
  66         # Note: using a HTTPBasicAuthHandler would be pain because we need to
  67         # know the realm. It can be done, but I think this method works, too
  68         url = channel.authenticate_url(channel.url)
  69         for handler in self.custom_handlers:
  70             custom_feed = handler.handle_url(url)
  71             if custom_feed is not None:
  72                 raise CustomFeed(custom_feed)
  73         self.fetch(url, etag, modified)
  74
  75     def _resolve_url(self, url):
  76         return youtube.get_real_channel_url(url)
  77
  78     @classmethod
  79     def register(cls, handler):
  80         cls.custom_handlers.append(handler)
  81
  82 #    def _get_handlers(self):
  83 #        # Add a ProxyHandler for fetching data via a proxy server
  84 #        proxies = {'http': 'http://proxy.example.org:8080'}
  85 #        return[urllib2.ProxyHandler(proxies))]
  86
  87 # The "register" method is exposed here for external usage
  88 register_custom_handler = gPodderFetcher.register
  89
  90 class PodcastModelObject(object):
  91     """
  92     A generic base class for our podcast model providing common helper
  93     and utility functions.
  94     """
  95
  96     @classmethod
  97     def create_from_dict(cls, d, *args):
  98         """
  99         Create a new object, passing "args" to the constructor
 100         and then updating the object with the values from "d".
 101         """
 102         o = cls(*args)
 103         o.update_from_dict(d)
 104         return o
 105
 106     def update_from_dict(self, d):
 107         """
 108         Updates the attributes of this object with values from the
 109         dictionary "d" by using the keys found in "d".
 110         """
 111         for k in d:
 112             if hasattr(self, k):
 113                 setattr(self, k, d[k])
 114
 115
 116 class PodcastChannel(PodcastModelObject):
 117     """holds data for a complete channel"""
 118     MAX_FOLDERNAME_LENGTH = 150
 119     SECONDS_PER_WEEK = 7*24*60*60
 120
 121     feed_fetcher = gPodderFetcher()
 122
 123     @classmethod
 124     def build_factory(cls, download_dir):
 125         def factory(dict, db):
 126             return cls.create_from_dict(dict, db, download_dir)
 127         return factory
 128
 129     @classmethod
 130     def load_from_db(cls, db, download_dir):
 131         return db.load_channels(factory=cls.build_factory(download_dir))
 132
 133     @classmethod
 134     def load(cls, db, url, create=True, authentication_tokens=None,\
 135             max_episodes=0, download_dir=None, allow_empty_feeds=False):
 136         if isinstance(url, unicode):
 137             url = url.encode('utf-8')
 138
 139         tmp = db.load_channels(factory=cls.build_factory(download_dir), url=url)
 140         if len(tmp):
 141             return tmp[0]
 142         elif create:
 143             tmp = PodcastChannel(db, download_dir)
 144             tmp.url = url
 145             if authentication_tokens is not None:
 146                 tmp.username = authentication_tokens[0]
 147                 tmp.password = authentication_tokens[1]
 148
 149             tmp.update(max_episodes)
 150             tmp.save()
 151             db.force_last_new(tmp)
 152             # Subscribing to empty feeds should yield an error (except if
 153             # the user specifically allows empty feeds in the config UI)
 154             if sum(tmp.get_statistics()) == 0 and not allow_empty_feeds:
 155                 tmp.delete()
 156                 raise Exception(_('No downloadable episodes in feed'))
 157             return tmp
 158
 159     def episode_factory(self, d, db__parameter_is_unused=None):
 160         """
 161         This function takes a dictionary containing key-value pairs for
 162         episodes and returns a new PodcastEpisode object that is connected
 163         to this PodcastChannel object.
 164
 165         Returns: A new PodcastEpisode object
 166         """
 167         return PodcastEpisode.create_from_dict(d, self)
 168
 169     def _consume_custom_feed(self, custom_feed, max_episodes=0):
 170         self.title = custom_feed.get_title()
 171         self.link = custom_feed.get_link()
 172         self.description = custom_feed.get_description()
 173         self.image = custom_feed.get_image()
 174         self.pubDate = time.time()
 175         self.save()
 176
 177         guids = [episode.guid for episode in self.get_all_episodes()]
 178
 179         # Insert newly-found episodes into the database
 180         custom_feed.get_new_episodes(self, guids)
 181
 182         self.save()
 183
 184         self.db.purge(max_episodes, self.id)
 185
 186     def _consume_updated_feed(self, feed, max_episodes=0):
 187         self.parse_error = feed.get('bozo_exception', None)
 188
 189         self.title = feed.feed.get('title', self.url)
 190         self.link = feed.feed.get('link', self.link)
 191         self.description = feed.feed.get('subtitle', self.description)
 192         # Start YouTube-specific title FIX
 193         YOUTUBE_PREFIX = 'Uploads by '
 194         if self.title.startswith(YOUTUBE_PREFIX):
 195             self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
 196         # End YouTube-specific title FIX
 197
 198         try:
 199             self.pubDate = rfc822.mktime_tz(feed.feed.get('updated_parsed', None+(0,)))
 200         except:
 201             self.pubDate = time.time()
 202
 203         if hasattr(feed.feed, 'image'):
 204             for attribute in ('href', 'url'):
 205                 new_value = getattr(feed.feed.image, attribute, None)
 206                 if new_value is not None:
 207                     log('Found cover art in %s: %s', attribute, new_value)
 208                     self.image = new_value
 209
 210         if hasattr(feed.feed, 'icon'):
 211             self.image = feed.feed.icon
 212
 213         self.save()
 214
 215         # Load all episodes to update them properly.
 216         existing = self.get_all_episodes()
 217
 218         # We can limit the maximum number of entries that gPodder will parse
 219         if max_episodes > 0 and len(feed.entries) > max_episodes:
 220             entries = feed.entries[:max_episodes]
 221         else:
 222             entries = feed.entries
 223
 224         # Title + PubDate hashes for existing episodes
 225         existing_dupes = dict((e.duplicate_id(), e) for e in existing)
 226
 227         # GUID-based existing episode list
 228         existing_guids = dict((e.guid, e) for e in existing)
 229
 230         # Get most recent pubDate of all episodes
 231         last_pubdate = self.db.get_last_pubdate(self) or 0
 232
 233         # Search all entries for new episodes
 234         for entry in entries:
 235             try:
 236                 episode = PodcastEpisode.from_feedparser_entry(entry, self)
 237                 if episode is not None and not episode.title:
 238                     episode.title, ext = os.path.splitext(os.path.basename(episode.url))
 239             except Exception, e:
 240                 log('Cannot instantiate episode: %s. Skipping.', e, sender=self, traceback=True)
 241                 continue
 242
 243             if episode is None:
 244                 continue
 245
 246             # Detect (and update) existing episode based on GUIDs
 247             existing_episode = existing_guids.get(episode.guid, None)
 248             if existing_episode:
 249                 existing_episode.update_from(episode)
 250                 existing_episode.save()
 251                 continue
 252
 253             # Detect (and update) existing episode based on duplicate ID
 254             existing_episode = existing_dupes.get(episode.duplicate_id(), None)
 255             if existing_episode:
 256                 if existing_episode.is_duplicate(episode):
 257                     existing_episode.update_from(episode)
 258                     existing_episode.save()
 259                     continue
 260
 261             # Workaround for bug 340: If the episode has been
 262             # published earlier than one week before the most
 263             # recent existing episode, do not mark it as new.
 264             if episode.pubDate < last_pubdate - self.SECONDS_PER_WEEK:
 265                 log('Episode with old date: %s', episode.title, sender=self)
 266                 episode.is_played = True
 267
 268             episode.save()
 269
 270         # Remove "unreachable" episodes - episodes that have not been
 271         # downloaded and that the feed does not list as downloadable anymore
 272         if self.id is not None:
 273             seen_guids = set(e.guid for e in feed.entries if hasattr(e, 'guid'))
 274             episodes_to_purge = (e for e in existing if \
 275                     e.state != gpodder.STATE_DOWNLOADED and \
 276                     e.guid not in seen_guids and e.guid is not None)
 277             for episode in episodes_to_purge:
 278                 log('Episode removed from feed: %s (%s)', episode.title, \
 279                         episode.guid, sender=self)
 280                 self.db.delete_episode_by_guid(episode.guid, self.id)
 281
 282         # This *might* cause episodes to be skipped if there were more than
 283         # max_episodes_per_feed items added to the feed between updates.
 284         # The benefit is that it prevents old episodes from apearing as new
 285         # in certain situations (see bug #340).
 286         self.db.purge(max_episodes, self.id)
 287
 288     def update_channel_lock(self):
 289         self.db.update_channel_lock(self)
 290
 291     def _update_etag_modified(self, feed):
 292         self.updated_timestamp = time.time()
 293         self.calculate_publish_behaviour()
 294         self.etag = feed.headers.get('etag', self.etag)
 295         self.last_modified = feed.headers.get('last-modified', self.last_modified)
 296
 297     def query_automatic_update(self):
 298         """Query if this channel should be updated automatically
 299
 300         Returns True if the update should happen in automatic
 301         mode or False if this channel should be skipped (timeout
 302         not yet reached or release not expected right now).
 303         """
 304         updated = self.updated_timestamp
 305         expected = self.release_expected
 306
 307         now = time.time()
 308         one_day_ago = now - 60*60*24
 309         lastcheck = now - 60*10
 310
 311         return updated < one_day_ago or \
 312                 (expected < now and updated < lastcheck)
 313
 314     def update(self, max_episodes=0):
 315         try:
 316             self.feed_fetcher.fetch_channel(self)
 317         except CustomFeed, updated:
 318             custom_feed = updated.data
 319             self._consume_custom_feed(custom_feed, max_episodes)
 320             self.save()
 321         except feedcore.UpdatedFeed, updated:
 322             feed = updated.data
 323             self._consume_updated_feed(feed, max_episodes)
 324             self._update_etag_modified(feed)
 325             self.save()
 326         except feedcore.NewLocation, updated:
 327             feed = updated.data
 328             self.url = feed.href
 329             self._consume_updated_feed(feed, max_episodes)
 330             self._update_etag_modified(feed)
 331             self.save()
 332         except feedcore.NotModified, updated:
 333             feed = updated.data
 334             self._update_etag_modified(feed)
 335             self.save()
 336         except Exception, e:
 337             # "Not really" errors
 338             #feedcore.AuthenticationRequired
 339             # Temporary errors
 340             #feedcore.Offline
 341             #feedcore.BadRequest
 342             #feedcore.InternalServerError
 343             #feedcore.WifiLogin
 344             # Permanent errors
 345             #feedcore.Unsubscribe
 346             #feedcore.NotFound
 347             #feedcore.InvalidFeed
 348             #feedcore.UnknownStatusCode
 349             raise
 350
 351         gpodder.user_extensions.call('channel_updated', self)
 352         self.db.commit()
 353
 354     def delete(self):
 355         self.db.delete_channel(self)
 356
 357     def save(self):
 358         gpodder.user_extensions.call('channel_save', self)
 359         self.db.save_channel(self)
 360
 361     def get_statistics(self):
 362         if self.id is None:
 363             return (0, 0, 0, 0, 0)
 364         else:
 365             return self.db.get_channel_count(int(self.id))
 366
 367     def authenticate_url(self, url):
 368         return util.url_add_authentication(url, self.username, self.password)
 369
 370     def __init__(self, db, download_dir):
 371         self.db = db
 372         self.download_dir = download_dir
 373         self.id = None
 374         self.url = None
 375         self.title = ''
 376         self.link = ''
 377         self.description = ''
 378         self.image = None
 379         self.pubDate = 0
 380         self.parse_error = None
 381         self.foldername = None
 382         self.auto_foldername = 1 # automatically generated foldername
 383
 384         # should this channel be synced to devices? (ex: iPod)
 385         self.sync_to_devices = True
 386         # to which playlist should be synced
 387         self.device_playlist_name = 'gPodder'
 388         # if set, this overrides the channel-provided title
 389         self.override_title = ''
 390         self.username = ''
 391         self.password = ''
 392
 393         self.last_modified = None
 394         self.etag = None
 395
 396         self.save_dir_size = 0
 397         self.__save_dir_size_set = False
 398
 399         self.channel_is_locked = False
 400
 401         self.release_expected = time.time()
 402         self.release_deviation = 0
 403         self.updated_timestamp = 0
 404
 405     def calculate_publish_behaviour(self):
 406         episodes = self.db.load_episodes(self, factory=self.episode_factory, limit=30)
 407         if len(episodes) < 3:
 408             return
 409
 410         deltas = []
 411         latest = max(e.pubDate for e in episodes)
 412         for index in range(len(episodes)-1):
 413             if episodes[index].pubDate != 0 and episodes[index+1].pubDate != 0:
 414                 deltas.append(episodes[index].pubDate - episodes[index+1].pubDate)
 415
 416         if len(deltas) > 1:
 417             stats = corestats.Stats(deltas)
 418             self.release_expected = min([latest+stats.stdev(), latest+(stats.min()+stats.avg())*.5])
 419             self.release_deviation = stats.stdev()
 420         else:
 421             self.release_expected = latest
 422             self.release_deviation = 0
 423
 424     def request_save_dir_size(self):
 425         if not self.__save_dir_size_set:
 426             self.update_save_dir_size()
 427         self.__save_dir_size_set = True
 428
 429     def update_save_dir_size(self):
 430         self.save_dir_size = util.calculate_size(self.save_dir)
 431
 432     def get_title( self):
 433         if self.override_title:
 434             return self.override_title
 435         elif not self.__title.strip():
 436             return self.url
 437         else:
 438             return self.__title
 439
 440     def set_title( self, value):
 441         self.__title = value.strip()
 442
 443     title = property(fget=get_title,
 444                      fset=set_title)
 445
 446     def set_custom_title( self, custom_title):
 447         custom_title = custom_title.strip()
 448
 449         # if the custom title is the same as we have
 450         if custom_title == self.override_title:
 451             return
 452
 453         # if custom title is the same as channel title and we didn't have a custom title
 454         if custom_title == self.__title and self.override_title == '':
 455             return
 456
 457         # make sure self.foldername is initialized
 458         self.get_save_dir()
 459
 460         # rename folder if custom_title looks sane
 461         new_folder_name = self.find_unique_folder_name(custom_title)
 462         if len(new_folder_name) > 0 and new_folder_name != self.foldername:
 463             log('Changing foldername based on custom title: %s', custom_title, sender=self)
 464             new_folder = os.path.join(self.download_dir, new_folder_name)
 465             old_folder = os.path.join(self.download_dir, self.foldername)
 466             if os.path.exists(old_folder):
 467                 if not os.path.exists(new_folder):
 468                     # Old folder exists, new folder does not -> simply rename
 469                     log('Renaming %s => %s', old_folder, new_folder, sender=self)
 470                     os.rename(old_folder, new_folder)
 471                 else:
 472                     # Both folders exist -> move files and delete old folder
 473                     log('Moving files from %s to %s', old_folder, new_folder, sender=self)
 474                     for file in glob.glob(os.path.join(old_folder, '*')):
 475                         shutil.move(file, new_folder)
 476                     log('Removing %s', old_folder, sender=self)
 477                     shutil.rmtree(old_folder, ignore_errors=True)
 478             self.foldername = new_folder_name
 479             self.save()
 480
 481         if custom_title != self.__title:
 482             self.override_title = custom_title
 483         else:
 484             self.override_title = ''
 485
 486     def get_downloaded_episodes(self):
 487         return self.db.load_episodes(self, factory=self.episode_factory, state=gpodder.STATE_DOWNLOADED)
 488
 489     def get_new_episodes(self, downloading=lambda e: False):
 490         """
 491         Get a list of new episodes. You can optionally specify
 492         "downloading" as a callback that takes an episode as
 493         a parameter and returns True if the episode is currently
 494         being downloaded or False if not.
 495
 496         By default, "downloading" is implemented so that it
 497         reports all episodes as not downloading.
 498         """
 499         return [episode for episode in self.db.load_episodes(self, \
 500                 factory=self.episode_factory, state=gpodder.STATE_NORMAL) if \
 501                 episode.check_is_new(downloading=downloading)]
 502
 503     def get_playlist_filename(self):
 504         # If the save_dir doesn't end with a slash (which it really should
 505         # not, if the implementation is correct, we can just append .m3u :)
 506         assert self.save_dir[-1] != '/'
 507         return self.save_dir+'.m3u'
 508
 509     def update_m3u_playlist(self):
 510         m3u_filename = self.get_playlist_filename()
 511
 512         downloaded_episodes = self.get_downloaded_episodes()
 513         if not downloaded_episodes:
 514             log('No episodes - removing %s', m3u_filename, sender=self)
 515             util.delete_file(m3u_filename)
 516             return
 517
 518         log('Writing playlist to %s', m3u_filename, sender=self)
 519         f = open(m3u_filename, 'w')
 520         f.write('#EXTM3U\n')
 521
 522         for episode in PodcastEpisode.sort_by_pubdate(downloaded_episodes):
 523             if episode.was_downloaded(and_exists=True):
 524                 filename = episode.local_filename(create=False)
 525                 assert filename is not None
 526
 527                 if os.path.dirname(filename).startswith(os.path.dirname(m3u_filename)):
 528                     filename = filename[len(os.path.dirname(m3u_filename)+os.sep):]
 529                 f.write('#EXTINF:0,'+self.title+' - '+episode.title+' ('+episode.cute_pubdate()+')\n')
 530                 f.write(filename+'\n')
 531
 532         f.close()
 533
 534     def get_episode_by_url(self, url):
 535         return self.db.load_single_episode(self, \
 536                 factory=self.episode_factory, url=url)
 537
 538     def get_episode_by_filename(self, filename):
 539         return self.db.load_single_episode(self, \
 540                 factory=self.episode_factory, filename=filename)
 541
 542     def get_all_episodes(self):
 543         return self.db.load_episodes(self, factory=self.episode_factory)
 544
 545     def find_unique_folder_name(self, foldername):
 546         # Remove trailing dots to avoid errors on Windows (bug 600)
 547         foldername = foldername.strip().rstrip('.')
 548
 549         current_try = util.sanitize_filename(foldername, \
 550                 self.MAX_FOLDERNAME_LENGTH)
 551         next_try_id = 2
 552
 553         while True:
 554             if self.db.channel_foldername_exists(current_try):
 555                 current_try = '%s (%d)' % (foldername, next_try_id)
 556                 next_try_id += 1
 557             else:
 558                 return current_try
 559
 560     def get_save_dir(self):
 561         urldigest = hashlib.md5(self.url).hexdigest()
 562         sanitizedurl = util.sanitize_filename(self.url, self.MAX_FOLDERNAME_LENGTH)
 563         if self.foldername is None or (self.auto_foldername and (self.foldername == urldigest or self.foldername.startswith(sanitizedurl))):
 564             # we must change the folder name, because it has not been set manually
 565             fn_template = util.sanitize_filename(self.title, self.MAX_FOLDERNAME_LENGTH)
 566
 567             # if this is an empty string, try the basename
 568             if len(fn_template) == 0:
 569                 log('That is one ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
 570                 fn_template = util.sanitize_filename(os.path.basename(self.url), self.MAX_FOLDERNAME_LENGTH)
 571
 572             # If the basename is also empty, use the first 6 md5 hexdigest chars of the URL
 573             if len(fn_template) == 0:
 574                 log('That is one REALLY ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
 575                 fn_template = urldigest # no need for sanitize_filename here
 576
 577             # Find a unique folder name for this podcast
 578             wanted_foldername = self.find_unique_folder_name(fn_template)
 579
 580             # if the foldername has not been set, check if the (old) md5 filename exists
 581             if self.foldername is None and os.path.exists(os.path.join(self.download_dir, urldigest)):
 582                 log('Found pre-0.15.0 download folder for %s: %s', self.title, urldigest, sender=self)
 583                 self.foldername = urldigest
 584
 585             # we have a valid, new folder name in "current_try" -> use that!
 586             if self.foldername is not None and wanted_foldername != self.foldername:
 587                 # there might be an old download folder crawling around - move it!
 588                 new_folder_name = os.path.join(self.download_dir, wanted_foldername)
 589                 old_folder_name = os.path.join(self.download_dir, self.foldername)
 590                 if os.path.exists(old_folder_name):
 591                     if not os.path.exists(new_folder_name):
 592                         # Old folder exists, new folder does not -> simply rename
 593                         log('Renaming %s => %s', old_folder_name, new_folder_name, sender=self)
 594                         os.rename(old_folder_name, new_folder_name)
 595                     else:
 596                         # Both folders exist -> move files and delete old folder
 597                         log('Moving files from %s to %s', old_folder_name, new_folder_name, sender=self)
 598                         for file in glob.glob(os.path.join(old_folder_name, '*')):
 599                             shutil.move(file, new_folder_name)
 600                         log('Removing %s', old_folder_name, sender=self)
 601                         shutil.rmtree(old_folder_name, ignore_errors=True)
 602             log('Updating foldername of %s to "%s".', self.url, wanted_foldername, sender=self)
 603             self.foldername = wanted_foldername
 604             self.save()
 605
 606         save_dir = os.path.join(self.download_dir, self.foldername)
 607
 608         # Create save_dir if it does not yet exist
 609         if not util.make_directory( save_dir):
 610             log( 'Could not create save_dir: %s', save_dir, sender = self)
 611
 612         return save_dir
 613
 614     save_dir = property(fget=get_save_dir)
 615
 616     def remove_downloaded(self):
 617         # Remove the playlist file if it exists
 618         m3u_filename = self.get_playlist_filename()
 619         if os.path.exists(m3u_filename):
 620             util.delete_file(m3u_filename)
 621
 622         # Remove the download directory
 623         shutil.rmtree(self.save_dir, True)
 624
 625     @property
 626     def cover_file(self):
 627         new_name = os.path.join(self.save_dir, 'folder.jpg')
 628         if not os.path.exists(new_name):
 629             old_names = ('cover', '.cover')
 630             for old_name in old_names:
 631                 filename = os.path.join(self.save_dir, old_name)
 632                 if os.path.exists(filename):
 633                     shutil.move(filename, new_name)
 634                     return new_name
 635
 636         return new_name
 637
 638     def delete_episode(self, episode):
 639         filename = episode.local_filename(create=False, check_only=True)
 640         if filename is not None:
 641             util.delete_file(filename)
 642
 643         episode.set_state(gpodder.STATE_DELETED)
 644
 645
 646 class PodcastEpisode(PodcastModelObject):
 647     """holds data for one object in a channel"""
 648     MAX_FILENAME_LENGTH = 200
 649
 650     def _get_played(self):
 651         return self.is_played
 652
 653     def _set_played(self, played):
 654         self.is_played = played
 655
 656     # Alias "is_played" to "played" for DB column mapping
 657     played = property(fget=_get_played, fset=_set_played)
 658
 659     def _get_locked(self):
 660         return self.is_locked
 661
 662     def _set_locked(self, locked):
 663         self.is_locked = locked
 664
 665     # Alias "is_locked" to "locked" for DB column mapping
 666     locked = property(fget=_get_locked, fset=_set_locked)
 667
 668     def _get_channel_id(self):
 669         return self.channel.id
 670
 671     def _set_channel_id(self, channel_id):
 672         assert self.channel.id == channel_id
 673
 674     # Accessor for the "channel_id" DB column
 675     channel_id = property(fget=_get_channel_id, fset=_set_channel_id)
 676
 677     @staticmethod
 678     def sort_by_pubdate(episodes, reverse=False):
 679         """Sort a list of PodcastEpisode objects chronologically
 680
 681         Returns a iterable, sorted sequence of the episodes
 682         """
 683         key_pubdate = lambda e: e.pubDate
 684         return sorted(episodes, key=key_pubdate, reverse=reverse)
 685
 686     def reload_from_db(self):
 687         """
 688         Re-reads all episode details for this object from the
 689         database and updates this object accordingly. Can be
 690         used to refresh existing objects when the database has
 691         been updated (e.g. the filename has been set after a
 692         download where it was not set before the download)
 693         """
 694         d = self.db.load_episode(self.id)
 695         self.update_from_dict(d or {})
 696         return self
 697
 698     def has_website_link(self):
 699         return bool(self.link) and (self.link != self.url or \
 700                 youtube.is_video_link(self.link))
 701
 702     @staticmethod
 703     def from_feedparser_entry(entry, channel):
 704         episode = PodcastEpisode(channel)
 705
 706         episode.title = entry.get('title', '')
 707         episode.link = entry.get('link', '')
 708         episode.description = entry.get('summary', '')
 709
 710         try:
 711             # Parse iTunes-specific podcast duration metadata
 712             total_time = util.parse_time(entry.get('itunes_duration', ''))
 713             episode.total_time = total_time
 714         except:
 715             pass
 716
 717         # Fallback to subtitle if summary is not available0
 718         if not episode.description:
 719             episode.description = entry.get('subtitle', '')
 720
 721         episode.guid = entry.get('id', '')
 722         if entry.get('updated_parsed', None):
 723             episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,))
 724
 725         enclosures = entry.get('enclosures', ())
 726         audio_available = any(e.get('type', '').startswith('audio/') \
 727                 for e in enclosures)
 728         video_available = any(e.get('type', '').startswith('video/') \
 729                 for e in enclosures)
 730
 731         # Enclosures
 732         for e in enclosures:
 733             episode.mimetype = e.get('type', 'application/octet-stream')
 734             if episode.mimetype == '':
 735                 # See Maemo bug 10036
 736                 log('Fixing empty mimetype in ugly feed', sender=episode)
 737                 episode.mimetype = 'application/octet-stream'
 738
 739             if '/' not in episode.mimetype:
 740                 continue
 741
 742             # Skip images in feeds if audio or video is available (bug 979)
 743             if episode.mimetype.startswith('image/') and \
 744                     (audio_available or video_available):
 745                 continue
 746
 747             episode.url = util.normalize_feed_url(e.get('href', ''))
 748             if not episode.url:
 749                 continue
 750
 751             try:
 752                 episode.length = int(e.length) or -1
 753             except:
 754                 episode.length = -1
 755
 756             return episode
 757
 758         # Media RSS content
 759         for m in entry.get('media_content', ()):
 760             episode.mimetype = m.get('type', 'application/octet-stream')
 761             if '/' not in episode.mimetype:
 762                 continue
 763
 764             episode.url = util.normalize_feed_url(m.get('url', ''))
 765             if not episode.url:
 766                 continue
 767
 768             try:
 769                 episode.length = int(m.fileSize) or -1
 770             except:
 771                 episode.length = -1
 772
 773             return episode
 774
 775         # Brute-force detection of any links
 776         for l in entry.get('links', ()):
 777             episode.url = util.normalize_feed_url(l.get('href', ''))
 778             if not episode.url:
 779                 continue
 780
 781             if youtube.is_video_link(episode.url):
 782                 return episode
 783
 784             # Check if we can resolve this link to a audio/video file
 785             filename, extension = util.filename_from_url(episode.url)
 786             file_type = util.file_type_by_extension(extension)
 787             if file_type is None and hasattr(l, 'type'):
 788                 extension = util.extension_from_mimetype(l.type)
 789                 file_type = util.file_type_by_extension(extension)
 790
 791             # The link points to a audio or video file - use it!
 792             if file_type is not None:
 793                 return episode
 794
 795         # Scan MP3 links in description text
 796         mp3s = re.compile(r'http://[^"]*\.mp3')
 797         for content in entry.get('content', ()):
 798             html = content.value
 799             for match in mp3s.finditer(html):
 800                 episode.url = match.group(0)
 801                 return episode
 802
 803         return None
 804
 805     def __init__(self, channel):
 806         self.db = channel.db
 807         # Used by Storage for faster saving
 808         self.id = None
 809         self.url = ''
 810         self.title = ''
 811         self.length = 0
 812         self.mimetype = 'application/octet-stream'
 813         self.guid = ''
 814         self.description = ''
 815         self.link = ''
 816         self.channel = channel
 817         self.pubDate = 0
 818         self.filename = None
 819         self.auto_filename = 1 # automatically generated filename
 820
 821         self.state = gpodder.STATE_NORMAL
 822         self.is_played = False
 823
 824         # Initialize the "is_locked" property
 825         self._is_locked = False
 826         self.is_locked = channel.channel_is_locked
 827
 828         # Time attributes
 829         self.total_time = 0
 830         self.current_position = 0
 831         self.current_position_updated = 0
 832
 833     def get_is_locked(self):
 834         return self._is_locked
 835
 836     def set_is_locked(self, is_locked):
 837         self._is_locked = bool(is_locked)
 838
 839     is_locked = property(fget=get_is_locked, fset=set_is_locked)
 840
 841     def save(self):
 842         if self.state != gpodder.STATE_DOWNLOADED and self.file_exists():
 843             self.state = gpodder.STATE_DOWNLOADED
 844         gpodder.user_extensions.call('episode_save', self)
 845         self.db.save_episode(self)
 846
 847     def on_downloaded(self, filename):
 848         self.state = gpodder.STATE_DOWNLOADED
 849         self.is_played = False
 850         self.length = os.path.getsize(filename)
 851
 852         if not self.total_time:
 853             try:
 854                 length = gstreamer.get_track_length(filename)
 855                 if length is not None:
 856                     length = int(length/1000)
 857                     log('Detected media length: %d seconds', length, \
 858                             sender=self)
 859                     self.total_time = length
 860                     self.db.save_episode(self)
 861                     self.db.commit()
 862                     return
 863             except Exception, e:
 864                 log('Error while detecting media length: %s', str(e), \
 865                         sender=self)
 866
 867         self.db.save_downloaded_episode(self)
 868         self.db.commit()
 869
 870     def set_state(self, state):
 871         self.state = state
 872         self.db.update_episode_state(self)
 873
 874     def mark(self, state=None, is_played=None, is_locked=None):
 875         if state is not None:
 876             self.state = state
 877         if is_played is not None:
 878             self.is_played = is_played
 879         if is_locked is not None:
 880             self.is_locked = is_locked
 881         self.db.update_episode_state(self)
 882
 883     @property
 884     def title_markup(self):
 885         return '%s\n<small>%s</small>' % (xml.sax.saxutils.escape(self.title),
 886                           xml.sax.saxutils.escape(self.channel.title))
 887
 888     @property
 889     def maemo_markup(self):
 890         if self.length > 0:
 891             length_str = '%s; ' % self.filesize_prop
 892         else:
 893             length_str = ''
 894         return ('<b>%s</b>\n<small>%s'+_('released %s')+ \
 895                 '; '+_('from %s')+'</small>') % (\
 896                 xml.sax.saxutils.escape(self.title), \
 897                 xml.sax.saxutils.escape(length_str), \
 898                 xml.sax.saxutils.escape(self.pubdate_prop), \
 899                 xml.sax.saxutils.escape(self.channel.title))
 900
 901     @property
 902     def maemo_remove_markup(self):
 903         if self.is_played:
 904             played_string = _('played')
 905         else:
 906             played_string = _('unplayed')
 907         downloaded_string = self.get_age_string()
 908         if not downloaded_string:
 909             downloaded_string = _('today')
 910         return ('<b>%s</b>\n<small>%s; %s; '+_('downloaded %s')+ \
 911                 '; '+_('from %s')+'</small>') % (\
 912                 xml.sax.saxutils.escape(self.title), \
 913                 xml.sax.saxutils.escape(self.filesize_prop), \
 914                 xml.sax.saxutils.escape(played_string), \
 915                 xml.sax.saxutils.escape(downloaded_string), \
 916                 xml.sax.saxutils.escape(self.channel.title))
 917
 918     def age_in_days(self):
 919         return util.file_age_in_days(self.local_filename(create=False, \
 920                 check_only=True))
 921
 922     def get_age_string(self):
 923         return util.file_age_to_string(self.age_in_days())
 924
 925     age_prop = property(fget=get_age_string)
 926
 927     def one_line_description( self):
 928         lines = util.remove_html_tags(self.description).strip().splitlines()
 929         if not lines or lines[0] == '':
 930             return _('No description available')
 931         else:
 932             return ' '.join(lines)
 933
 934     def delete_from_disk(self):
 935         try:
 936             self.channel.delete_episode(self)
 937         except:
 938             log('Cannot delete episode from disk: %s', self.title, traceback=True, sender=self)
 939
 940     def find_unique_file_name(self, url, filename, extension):
 941         current_try = util.sanitize_filename(filename, self.MAX_FILENAME_LENGTH)+extension
 942         next_try_id = 2
 943         lookup_url = None
 944
 945         if self.filename == current_try and current_try is not None:
 946             # We already have this filename - good!
 947             return current_try
 948
 949         while self.db.episode_filename_exists(current_try):
 950             current_try = '%s (%d)%s' % (filename, next_try_id, extension)
 951             next_try_id += 1
 952
 953         return current_try
 954
 955     def local_filename(self, create, force_update=False, check_only=False,
 956             template=None):
 957         """Get (and possibly generate) the local saving filename
 958
 959         Pass create=True if you want this function to generate a
 960         new filename if none exists. You only want to do this when
 961         planning to create/download the file after calling this function.
 962
 963         Normally, you should pass create=False. This will only
 964         create a filename when the file already exists from a previous
 965         version of gPodder (where we used md5 filenames). If the file
 966         does not exist (and the filename also does not exist), this
 967         function will return None.
 968
 969         If you pass force_update=True to this function, it will try to
 970         find a new (better) filename and move the current file if this
 971         is the case. This is useful if (during the download) you get
 972         more information about the file, e.g. the mimetype and you want
 973         to include this information in the file name generation process.
 974
 975         If check_only=True is passed to this function, it will never try
 976         to rename the file, even if would be a good idea. Use this if you
 977         only want to check if a file exists.
 978
 979         If "template" is specified, it should be a filename that is to
 980         be used as a template for generating the "real" filename.
 981
 982         The generated filename is stored in the database for future access.
 983         """
 984         ext = self.extension(may_call_local_filename=False).encode('utf-8', 'ignore')
 985
 986         # For compatibility with already-downloaded episodes, we
 987         # have to know md5 filenames if they are downloaded already
 988         urldigest = hashlib.md5(self.url).hexdigest()
 989
 990         if not create and self.filename is None:
 991             urldigest_filename = os.path.join(self.channel.save_dir, urldigest+ext)
 992             if os.path.exists(urldigest_filename):
 993                 # The file exists, so set it up in our database
 994                 log('Recovering pre-0.15.0 file: %s', urldigest_filename, sender=self)
 995                 self.filename = urldigest+ext
 996                 self.auto_filename = 1
 997                 self.save()
 998                 return urldigest_filename
 999             return None
1000
1001         # We only want to check if the file exists, so don't try to
1002         # rename the file, even if it would be reasonable. See also:
1003         # http://bugs.gpodder.org/attachment.cgi?id=236
1004         if check_only:
1005             if self.filename is None:
1006                 return None
1007             else:
1008                 return os.path.join(self.channel.save_dir, self.filename)
1009
1010         if self.filename is None or force_update or (self.auto_filename and self.filename == urldigest+ext):
1011             # Try to find a new filename for the current file
1012             if template is not None:
1013                 # If template is specified, trust the template's extension
1014                 episode_filename, ext = os.path.splitext(template)
1015             else:
1016                 episode_filename, extension_UNUSED = util.filename_from_url(self.url)
1017             fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
1018
1019             if 'redirect' in fn_template and template is None:
1020                 # This looks like a redirection URL - force URL resolving!
1021                 log('Looks like a redirection to me: %s', self.url, sender=self)
1022                 url = util.get_real_url(self.channel.authenticate_url(self.url))
1023                 log('Redirection resolved to: %s', url, sender=self)
1024                 (episode_filename, extension_UNUSED) = util.filename_from_url(url)
1025                 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
1026
1027             # Use the video title for YouTube downloads
1028             for yt_url in ('http://youtube.com/', 'http://www.youtube.com/'):
1029                 if self.url.startswith(yt_url):
1030                     fn_template = util.sanitize_filename(os.path.basename(self.title), self.MAX_FILENAME_LENGTH)
1031
1032             # If the basename is empty, use the md5 hexdigest of the URL
1033             if len(fn_template) == 0 or fn_template.startswith('redirect.'):
1034                 log('Report to bugs.gpodder.org: Podcast at %s with episode URL: %s', self.channel.url, self.url, sender=self)
1035                 fn_template = urldigest
1036
1037             # Find a unique filename for this episode
1038             wanted_filename = self.find_unique_file_name(self.url, fn_template, ext)
1039
1040             # We populate the filename field the first time - does the old file still exist?
1041             if self.filename is None and os.path.exists(os.path.join(self.channel.save_dir, urldigest+ext)):
1042                 log('Found pre-0.15.0 downloaded file: %s', urldigest, sender=self)
1043                 self.filename = urldigest+ext
1044
1045             # The old file exists, but we have decided to want a different filename
1046             if self.filename is not None and wanted_filename != self.filename:
1047                 # there might be an old download folder crawling around - move it!
1048                 new_file_name = os.path.join(self.channel.save_dir, wanted_filename)
1049                 old_file_name = os.path.join(self.channel.save_dir, self.filename)
1050                 if os.path.exists(old_file_name) and not os.path.exists(new_file_name):
1051                     log('Renaming %s => %s', old_file_name, new_file_name, sender=self)
1052                     os.rename(old_file_name, new_file_name)
1053                 elif force_update and not os.path.exists(old_file_name):
1054                     # When we call force_update, the file might not yet exist when we
1055                     # call it from the downloading code before saving the file
1056                     log('Choosing new filename: %s', new_file_name, sender=self)
1057                 else:
1058                     log('Warning: %s exists or %s does not.', new_file_name, old_file_name, sender=self)
1059                 log('Updating filename of %s to "%s".', self.url, wanted_filename, sender=self)
1060             elif self.filename is None:
1061                 log('Setting filename to "%s".', wanted_filename, sender=self)
1062             else:
1063                 log('Should update filename. Stays the same (%s). Good!', \
1064                         wanted_filename, sender=self)
1065             self.filename = wanted_filename
1066             self.save()
1067             self.db.commit()
1068
1069         return os.path.join(self.channel.save_dir, self.filename)
1070
1071     def set_mimetype(self, mimetype, commit=False):
1072         """Sets the mimetype for this episode"""
1073         self.mimetype = mimetype
1074         if commit:
1075             self.db.commit()
1076
1077     def extension(self, may_call_local_filename=True):
1078         filename, ext = util.filename_from_url(self.url)
1079         if may_call_local_filename:
1080             filename = self.local_filename(create=False)
1081             if filename is not None:
1082                 filename, ext = os.path.splitext(filename)
1083         # if we can't detect the extension from the url fallback on the mimetype
1084         if ext == '' or util.file_type_by_extension(ext) is None:
1085             ext = util.extension_from_mimetype(self.mimetype)
1086         return ext
1087
1088     def check_is_new(self, downloading=lambda e: False):
1089         """
1090         Returns True if this episode is to be considered new.
1091         "Downloading" should be a callback that gets an episode
1092         as its parameter and returns True if the episode is
1093         being downloaded at the moment.
1094         """
1095         return self.state == gpodder.STATE_NORMAL and \
1096                 not self.is_played and \
1097                 not downloading(self)
1098
1099     def mark_new(self):
1100         self.state = gpodder.STATE_NORMAL
1101         self.is_played = False
1102         self.db.update_episode_state(self)
1103
1104     def mark_old(self):
1105         self.is_played = True
1106         self.db.update_episode_state(self)
1107
1108     def file_exists(self):
1109         filename = self.local_filename(create=False, check_only=True)
1110         if filename is None:
1111             return False
1112         else:
1113             return os.path.exists(filename)
1114
1115     def was_downloaded(self, and_exists=False):
1116         if self.state != gpodder.STATE_DOWNLOADED:
1117             return False
1118         if and_exists and not self.file_exists():
1119             return False
1120         return True
1121
1122     def sync_filename(self, use_custom=False, custom_format=None):
1123         if use_custom:
1124             return util.object_string_formatter(custom_format,
1125                     episode=self, podcast=self.channel)
1126         else:
1127             return self.title
1128
1129     def file_type(self):
1130         # Assume all YouTube links are video files
1131         if youtube.is_video_link(self.url):
1132             return 'video'
1133
1134         return util.file_type_by_extension(self.extension())
1135
1136     @property
1137     def basename( self):
1138         return os.path.splitext( os.path.basename( self.url))[0]
1139
1140     @property
1141     def published( self):
1142         """
1143         Returns published date as YYYYMMDD (or 00000000 if not available)
1144         """
1145         try:
1146             return datetime.datetime.fromtimestamp(self.pubDate).strftime('%Y%m%d')
1147         except:
1148             log( 'Cannot format pubDate for "%s".', self.title, sender = self)
1149             return '00000000'
1150
1151     @property
1152     def pubtime(self):
1153         """
1154         Returns published time as HHMM (or 0000 if not available)
1155         """
1156         try:
1157             return datetime.datetime.fromtimestamp(self.pubDate).strftime('%H%M')
1158         except:
1159             log('Cannot format pubDate (time) for "%s".', self.title, sender=self)
1160             return '0000'
1161
1162     def cute_pubdate(self):
1163         result = util.format_date(self.pubDate)
1164         if result is None:
1165             return '(%s)' % _('unknown')
1166         else:
1167             return result
1168
1169     pubdate_prop = property(fget=cute_pubdate)
1170
1171     def calculate_filesize( self):
1172         filename = self.local_filename(create=False)
1173         if filename is None:
1174             log('calculate_filesized called, but filename is None!', sender=self)
1175         try:
1176             self.length = os.path.getsize(filename)
1177         except:
1178             log( 'Could not get filesize for %s.', self.url)
1179
1180     def get_play_info_string(self):
1181         if self.current_position > 0 and \
1182                 self.total_time <= self.current_position:
1183             return '%s (%s)' % (_('Finished'), self.get_duration_string(),)
1184         if self.current_position > 0:
1185             return '%s / %s' % (self.get_position_string(), \
1186                     self.get_duration_string())
1187         else:
1188             return self.get_duration_string()
1189
1190     def get_position_string(self):
1191         return util.format_time(self.current_position)
1192
1193     def get_duration_string(self):
1194         return util.format_time(self.total_time)
1195
1196     def get_filesize_string(self):
1197         return util.format_filesize(self.length)
1198
1199     filesize_prop = property(fget=get_filesize_string)
1200
1201     def get_played_string( self):
1202         if not self.is_played:
1203             return _('Unplayed')
1204
1205         return ''
1206
1207     played_prop = property(fget=get_played_string)
1208
1209     def is_duplicate(self, episode):
1210         if self.title == episode.title and self.pubDate == episode.pubDate:
1211             log('Possible duplicate detected: %s', self.title)
1212             return True
1213         return False
1214
1215     def duplicate_id(self):
1216         return hash((self.title, self.pubDate))
1217
1218     def update_from(self, episode):
1219         for k in ('title', 'url', 'description', 'link', 'pubDate', 'guid'):
1220             setattr(self, k, getattr(episode, k))
1221