src/gpodder/model.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # gPodder - A media aggregator and podcast client
   4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
   5 #
   6 # gPodder is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # gPodder is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19
  20
  21 #
  22 #  gpodder.model - Core model classes for gPodder (2009-08-13)
  23 #  Based on libpodcasts.py (thp, 2005-10-29)
  24 #
  25
  26 import gpodder
  27 from gpodder import util
  28 from gpodder import feedcore
  29 from gpodder import youtube
  30 from gpodder import corestats
  31 from gpodder import gstreamer
  32
  33 from gpodder.liblogger import log
  34
  35 import os
  36 import re
  37 import glob
  38 import shutil
  39 import time
  40 import datetime
  41 import rfc822
  42 import hashlib
  43 import feedparser
  44 import xml.sax.saxutils
  45
  46 _ = gpodder.gettext
  47
  48
  49 class CustomFeed(feedcore.ExceptionWithData): pass
  50
  51 class gPodderFetcher(feedcore.Fetcher):
  52     """
  53     This class extends the feedcore Fetcher with the gPodder User-Agent and the
  54     Proxy handler based on the current settings in gPodder and provides a
  55     convenience method (fetch_channel) for use by PodcastChannel objects.
  56     """
  57     custom_handlers = []
  58
  59     def __init__(self):
  60         feedcore.Fetcher.__init__(self, gpodder.user_agent)
  61
  62     def fetch_channel(self, channel):
  63         etag = channel.etag
  64         modified = feedparser._parse_date(channel.last_modified)
  65         # If we have a username or password, rebuild the url with them included
  66         # Note: using a HTTPBasicAuthHandler would be pain because we need to
  67         # know the realm. It can be done, but I think this method works, too
  68         url = channel.authenticate_url(channel.url)
  69         for handler in self.custom_handlers:
  70             custom_feed = handler.handle_url(url)
  71             if custom_feed is not None:
  72                 raise CustomFeed(custom_feed)
  73         self.fetch(url, etag, modified)
  74
  75     def _resolve_url(self, url):
  76         return youtube.get_real_channel_url(url)
  77
  78     @classmethod
  79     def register(cls, handler):
  80         cls.custom_handlers.append(handler)
  81
  82 #    def _get_handlers(self):
  83 #        # Add a ProxyHandler for fetching data via a proxy server
  84 #        proxies = {'http': 'http://proxy.example.org:8080'}
  85 #        return[urllib2.ProxyHandler(proxies))]
  86
  87 # The "register" method is exposed here for external usage
  88 register_custom_handler = gPodderFetcher.register
  89
  90 class PodcastModelObject(object):
  91     """
  92     A generic base class for our podcast model providing common helper
  93     and utility functions.
  94     """
  95
  96     @classmethod
  97     def create_from_dict(cls, d, *args):
  98         """
  99         Create a new object, passing "args" to the constructor
 100         and then updating the object with the values from "d".
 101         """
 102         o = cls(*args)
 103         o.update_from_dict(d)
 104         return o
 105
 106     def update_from_dict(self, d):
 107         """
 108         Updates the attributes of this object with values from the
 109         dictionary "d" by using the keys found in "d".
 110         """
 111         for k in d:
 112             if hasattr(self, k):
 113                 setattr(self, k, d[k])
 114
 115
 116 class PodcastChannel(PodcastModelObject):
 117     """holds data for a complete channel"""
 118     MAX_FOLDERNAME_LENGTH = 150
 119     SECONDS_PER_WEEK = 7*24*60*60
 120
 121     feed_fetcher = gPodderFetcher()
 122
 123     @classmethod
 124     def build_factory(cls, download_dir):
 125         def factory(dict, db):
 126             return cls.create_from_dict(dict, db, download_dir)
 127         return factory
 128
 129     @classmethod
 130     def load_from_db(cls, db, download_dir):
 131         return db.load_channels(factory=cls.build_factory(download_dir))
 132
 133     @classmethod
 134     def load(cls, db, url, create=True, authentication_tokens=None,\
 135             max_episodes=0, download_dir=None, allow_empty_feeds=False, \
 136             mimetype_prefs=''):
 137         if isinstance(url, unicode):
 138             url = url.encode('utf-8')
 139
 140         tmp = db.load_channels(factory=cls.build_factory(download_dir), url=url)
 141         if len(tmp):
 142             return tmp[0]
 143         elif create:
 144             tmp = PodcastChannel(db, download_dir)
 145             tmp.url = url
 146             if authentication_tokens is not None:
 147                 tmp.username = authentication_tokens[0]
 148                 tmp.password = authentication_tokens[1]
 149
 150             tmp.update(max_episodes, mimetype_prefs)
 151             tmp.save()
 152             db.force_last_new(tmp)
 153             # Subscribing to empty feeds should yield an error (except if
 154             # the user specifically allows empty feeds in the config UI)
 155             if sum(tmp.get_statistics()) == 0 and not allow_empty_feeds:
 156                 tmp.delete()
 157                 raise Exception(_('No downloadable episodes in feed'))
 158             return tmp
 159
 160     def episode_factory(self, d, db__parameter_is_unused=None):
 161         """
 162         This function takes a dictionary containing key-value pairs for
 163         episodes and returns a new PodcastEpisode object that is connected
 164         to this PodcastChannel object.
 165
 166         Returns: A new PodcastEpisode object
 167         """
 168         return PodcastEpisode.create_from_dict(d, self)
 169
 170     def _consume_custom_feed(self, custom_feed, max_episodes=0):
 171         self.title = custom_feed.get_title()
 172         self.link = custom_feed.get_link()
 173         self.description = custom_feed.get_description()
 174         self.image = custom_feed.get_image()
 175         self.pubDate = time.time()
 176         self.save()
 177
 178         guids = [episode.guid for episode in self.get_all_episodes()]
 179
 180         # Insert newly-found episodes into the database
 181         custom_feed.get_new_episodes(self, guids)
 182
 183         self.save()
 184
 185         self.db.purge(max_episodes, self.id)
 186
 187     def _consume_updated_feed(self, feed, max_episodes=0, mimetype_prefs=''):
 188         self.parse_error = feed.get('bozo_exception', None)
 189
 190         # Replace multi-space and newlines with single space (Maemo bug 11173)
 191         self.title = re.sub('\s+', ' ', feed.feed.get('title', self.url))
 192
 193         self.link = feed.feed.get('link', self.link)
 194         self.description = feed.feed.get('subtitle', self.description)
 195         # Start YouTube-specific title FIX
 196         YOUTUBE_PREFIX = 'Uploads by '
 197         if self.title.startswith(YOUTUBE_PREFIX):
 198             self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
 199         # End YouTube-specific title FIX
 200
 201         try:
 202             self.pubDate = rfc822.mktime_tz(feed.feed.get('updated_parsed', None+(0,)))
 203         except:
 204             self.pubDate = time.time()
 205
 206         if hasattr(feed.feed, 'image'):
 207             for attribute in ('href', 'url'):
 208                 new_value = getattr(feed.feed.image, attribute, None)
 209                 if new_value is not None:
 210                     log('Found cover art in %s: %s', attribute, new_value)
 211                     self.image = new_value
 212
 213         if hasattr(feed.feed, 'icon'):
 214             self.image = feed.feed.icon
 215
 216         self.save()
 217
 218         # Load all episodes to update them properly.
 219         existing = self.get_all_episodes()
 220
 221         # We can limit the maximum number of entries that gPodder will parse
 222         if max_episodes > 0 and len(feed.entries) > max_episodes:
 223             # We have to sort the entries in descending chronological order,
 224             # because if the feed lists items in ascending order and has >
 225             # max_episodes old episodes, new episodes will not be shown.
 226             # See also: gPodder Bug 1186
 227             try:
 228                 entries = sorted(feed.entries, \
 229                         key=lambda x: x.get('updated_parsed', (0,)*9), \
 230                         reverse=True)[:max_episodes]
 231             except Exception, e:
 232                 log('Could not sort episodes: %s', e, sender=self, traceback=True)
 233                 entries = feed.entries[:max_episodes]
 234         else:
 235             entries = feed.entries
 236
 237         # Title + PubDate hashes for existing episodes
 238         existing_dupes = dict((e.duplicate_id(), e) for e in existing)
 239
 240         # GUID-based existing episode list
 241         existing_guids = dict((e.guid, e) for e in existing)
 242
 243         # Get most recent pubDate of all episodes
 244         last_pubdate = self.db.get_last_pubdate(self) or 0
 245
 246         # Search all entries for new episodes
 247         for entry in entries:
 248             try:
 249                 episode = PodcastEpisode.from_feedparser_entry(entry, self, mimetype_prefs)
 250                 if episode is not None and not episode.title:
 251                     episode.title, ext = os.path.splitext(os.path.basename(episode.url))
 252             except Exception, e:
 253                 log('Cannot instantiate episode: %s. Skipping.', e, sender=self, traceback=True)
 254                 continue
 255
 256             if episode is None:
 257                 continue
 258
 259             # Detect (and update) existing episode based on GUIDs
 260             existing_episode = existing_guids.get(episode.guid, None)
 261             if existing_episode:
 262                 existing_episode.update_from(episode)
 263                 existing_episode.save()
 264                 continue
 265
 266             # Detect (and update) existing episode based on duplicate ID
 267             existing_episode = existing_dupes.get(episode.duplicate_id(), None)
 268             if existing_episode:
 269                 if existing_episode.is_duplicate(episode):
 270                     existing_episode.update_from(episode)
 271                     existing_episode.save()
 272                     continue
 273
 274             # Workaround for bug 340: If the episode has been
 275             # published earlier than one week before the most
 276             # recent existing episode, do not mark it as new.
 277             if episode.pubDate < last_pubdate - self.SECONDS_PER_WEEK:
 278                 log('Episode with old date: %s', episode.title, sender=self)
 279                 episode.is_played = True
 280
 281             episode.save()
 282
 283         # Remove "unreachable" episodes - episodes that have not been
 284         # downloaded and that the feed does not list as downloadable anymore
 285         if self.id is not None:
 286             seen_guids = set(e.guid for e in feed.entries if hasattr(e, 'guid'))
 287             episodes_to_purge = (e for e in existing if \
 288                     e.state != gpodder.STATE_DOWNLOADED and \
 289                     e.guid not in seen_guids and e.guid is not None)
 290             for episode in episodes_to_purge:
 291                 log('Episode removed from feed: %s (%s)', episode.title, \
 292                         episode.guid, sender=self)
 293                 self.db.delete_episode_by_guid(episode.guid, self.id)
 294
 295         # This *might* cause episodes to be skipped if there were more than
 296         # max_episodes_per_feed items added to the feed between updates.
 297         # The benefit is that it prevents old episodes from apearing as new
 298         # in certain situations (see bug #340).
 299         self.db.purge(max_episodes, self.id)
 300
 301     def update_channel_lock(self):
 302         self.db.update_channel_lock(self)
 303
 304     def _update_etag_modified(self, feed):
 305         self.updated_timestamp = time.time()
 306         self.calculate_publish_behaviour()
 307         self.etag = feed.headers.get('etag', self.etag)
 308         self.last_modified = feed.headers.get('last-modified', self.last_modified)
 309
 310     def query_automatic_update(self):
 311         """Query if this channel should be updated automatically
 312
 313         Returns True if the update should happen in automatic
 314         mode or False if this channel should be skipped (timeout
 315         not yet reached or release not expected right now).
 316         """
 317         updated = self.updated_timestamp
 318         expected = self.release_expected
 319
 320         now = time.time()
 321         one_day_ago = now - 60*60*24
 322         lastcheck = now - 60*10
 323
 324         return updated < one_day_ago or \
 325                 (expected < now and updated < lastcheck)
 326
 327     def update(self, max_episodes=0, mimetype_prefs=''):
 328         try:
 329             self.feed_fetcher.fetch_channel(self)
 330         except CustomFeed, updated:
 331             custom_feed = updated.data
 332             self._consume_custom_feed(custom_feed, max_episodes)
 333             self.save()
 334         except feedcore.UpdatedFeed, updated:
 335             feed = updated.data
 336             self._consume_updated_feed(feed, max_episodes, mimetype_prefs)
 337             self._update_etag_modified(feed)
 338             self.save()
 339         except feedcore.NewLocation, updated:
 340             feed = updated.data
 341             self.url = feed.href
 342             self._consume_updated_feed(feed, max_episodes, mimetype_prefs)
 343             self._update_etag_modified(feed)
 344             self.save()
 345         except feedcore.NotModified, updated:
 346             feed = updated.data
 347             self._update_etag_modified(feed)
 348             self.save()
 349         except Exception, e:
 350             # "Not really" errors
 351             #feedcore.AuthenticationRequired
 352             # Temporary errors
 353             #feedcore.Offline
 354             #feedcore.BadRequest
 355             #feedcore.InternalServerError
 356             #feedcore.WifiLogin
 357             # Permanent errors
 358             #feedcore.Unsubscribe
 359             #feedcore.NotFound
 360             #feedcore.InvalidFeed
 361             #feedcore.UnknownStatusCode
 362             raise
 363
 364         if gpodder.user_hooks is not None:
 365             gpodder.user_hooks.on_podcast_updated(self)
 366
 367         self.db.commit()
 368
 369     def delete(self):
 370         self.db.delete_channel(self)
 371
 372     def save(self):
 373         if gpodder.user_hooks is not None:
 374             gpodder.user_hooks.on_podcast_save(self)
 375         if self.foldername is None:
 376             # get_save_dir() finds a unique value for foldername
 377             self.get_save_dir()
 378         self.db.save_channel(self)
 379
 380     def get_statistics(self):
 381         if self.id is None:
 382             return (0, 0, 0, 0, 0)
 383         else:
 384             return self.db.get_channel_count(int(self.id))
 385
 386     def authenticate_url(self, url):
 387         return util.url_add_authentication(url, self.username, self.password)
 388
 389     def __init__(self, db, download_dir):
 390         self.db = db
 391         self.download_dir = download_dir
 392         self.id = None
 393         self.url = None
 394         self.title = ''
 395         self.link = ''
 396         self.description = ''
 397         self.image = None
 398         self.pubDate = 0
 399         self.parse_error = None
 400         self.foldername = None
 401         self.auto_foldername = 1 # automatically generated foldername
 402
 403         # should this channel be synced to devices? (ex: iPod)
 404         self.sync_to_devices = True
 405         # to which playlist should be synced
 406         self.device_playlist_name = 'gPodder'
 407         # if set, this overrides the channel-provided title
 408         self.override_title = ''
 409         self.username = ''
 410         self.password = ''
 411
 412         self.last_modified = None
 413         self.etag = None
 414
 415         self.save_dir_size = 0
 416         self.__save_dir_size_set = False
 417
 418         self.channel_is_locked = False
 419
 420         self.release_expected = time.time()
 421         self.release_deviation = 0
 422         self.updated_timestamp = 0
 423         self.feed_update_enabled = True
 424
 425     def calculate_publish_behaviour(self):
 426         episodes = self.db.load_episodes(self, factory=self.episode_factory, limit=30)
 427         if len(episodes) < 3:
 428             return
 429
 430         deltas = []
 431         latest = max(e.pubDate for e in episodes)
 432         for index in range(len(episodes)-1):
 433             if episodes[index].pubDate != 0 and episodes[index+1].pubDate != 0:
 434                 deltas.append(episodes[index].pubDate - episodes[index+1].pubDate)
 435
 436         if len(deltas) > 1:
 437             stats = corestats.Stats(deltas)
 438             self.release_expected = min([latest+stats.stdev(), latest+(stats.min()+stats.avg())*.5])
 439             self.release_deviation = stats.stdev()
 440         else:
 441             self.release_expected = latest
 442             self.release_deviation = 0
 443
 444     def request_save_dir_size(self):
 445         if not self.__save_dir_size_set:
 446             self.update_save_dir_size()
 447         self.__save_dir_size_set = True
 448
 449     def update_save_dir_size(self):
 450         self.save_dir_size = util.calculate_size(self.save_dir)
 451
 452     def get_title( self):
 453         if self.override_title:
 454             return self.override_title
 455         elif not self.__title.strip():
 456             return self.url
 457         else:
 458             return self.__title
 459
 460     def set_title( self, value):
 461         self.__title = value.strip()
 462
 463     title = property(fget=get_title,
 464                      fset=set_title)
 465
 466     def set_custom_title( self, custom_title):
 467         custom_title = custom_title.strip()
 468
 469         # if the custom title is the same as we have
 470         if custom_title == self.override_title:
 471             return
 472
 473         # if custom title is the same as channel title and we didn't have a custom title
 474         if custom_title == self.__title and self.override_title == '':
 475             return
 476
 477         # make sure self.foldername is initialized
 478         self.get_save_dir()
 479
 480         # rename folder if custom_title looks sane
 481         new_folder_name = self.find_unique_folder_name(custom_title)
 482         if len(new_folder_name) > 0 and new_folder_name != self.foldername:
 483             log('Changing foldername based on custom title: %s', custom_title, sender=self)
 484             new_folder = os.path.join(self.download_dir, new_folder_name)
 485             old_folder = os.path.join(self.download_dir, self.foldername)
 486             if os.path.exists(old_folder):
 487                 if not os.path.exists(new_folder):
 488                     # Old folder exists, new folder does not -> simply rename
 489                     log('Renaming %s => %s', old_folder, new_folder, sender=self)
 490                     os.rename(old_folder, new_folder)
 491                 else:
 492                     # Both folders exist -> move files and delete old folder
 493                     log('Moving files from %s to %s', old_folder, new_folder, sender=self)
 494                     for file in glob.glob(os.path.join(old_folder, '*')):
 495                         shutil.move(file, new_folder)
 496                     log('Removing %s', old_folder, sender=self)
 497                     shutil.rmtree(old_folder, ignore_errors=True)
 498             self.foldername = new_folder_name
 499             self.save()
 500
 501         if custom_title != self.__title:
 502             self.override_title = custom_title
 503         else:
 504             self.override_title = ''
 505
 506     def get_downloaded_episodes(self):
 507         return self.db.load_episodes(self, factory=self.episode_factory, state=gpodder.STATE_DOWNLOADED)
 508
 509     def get_new_episodes(self, downloading=lambda e: False):
 510         """
 511         Get a list of new episodes. You can optionally specify
 512         "downloading" as a callback that takes an episode as
 513         a parameter and returns True if the episode is currently
 514         being downloaded or False if not.
 515
 516         By default, "downloading" is implemented so that it
 517         reports all episodes as not downloading.
 518         """
 519         return [episode for episode in self.db.load_episodes(self, \
 520                 factory=self.episode_factory, state=gpodder.STATE_NORMAL) if \
 521                 episode.check_is_new(downloading=downloading)]
 522
 523     def get_playlist_filename(self):
 524         # If the save_dir doesn't end with a slash (which it really should
 525         # not, if the implementation is correct, we can just append .m3u :)
 526         assert self.save_dir[-1] != '/'
 527         return self.save_dir+'.m3u'
 528
 529     def update_m3u_playlist(self):
 530         m3u_filename = self.get_playlist_filename()
 531
 532         downloaded_episodes = self.get_downloaded_episodes()
 533         if not downloaded_episodes:
 534             log('No episodes - removing %s', m3u_filename, sender=self)
 535             util.delete_file(m3u_filename)
 536             return
 537
 538         log('Writing playlist to %s', m3u_filename, sender=self)
 539         util.write_m3u_playlist(m3u_filename, \
 540                 PodcastEpisode.sort_by_pubdate(downloaded_episodes))
 541
 542     def get_episode_by_url(self, url):
 543         return self.db.load_single_episode(self, \
 544                 factory=self.episode_factory, url=url)
 545
 546     def get_episode_by_filename(self, filename):
 547         return self.db.load_single_episode(self, \
 548                 factory=self.episode_factory, filename=filename)
 549
 550     def get_all_episodes(self):
 551         return self.db.load_episodes(self, factory=self.episode_factory)
 552
 553     def find_unique_folder_name(self, foldername):
 554         # Remove trailing dots to avoid errors on Windows (bug 600)
 555         foldername = foldername.strip().rstrip('.')
 556
 557         current_try = util.sanitize_filename(foldername, \
 558                 self.MAX_FOLDERNAME_LENGTH)
 559         next_try_id = 2
 560
 561         while True:
 562             if self.db.channel_foldername_exists(current_try):
 563                 current_try = '%s (%d)' % (foldername, next_try_id)
 564                 next_try_id += 1
 565             else:
 566                 return current_try
 567
 568     def get_save_dir(self):
 569         urldigest = hashlib.md5(self.url).hexdigest()
 570         sanitizedurl = util.sanitize_filename(self.url, self.MAX_FOLDERNAME_LENGTH)
 571         if self.foldername is None or (self.auto_foldername and (self.foldername == urldigest or self.foldername.startswith(sanitizedurl))):
 572             # we must change the folder name, because it has not been set manually
 573             fn_template = util.sanitize_filename(self.title, self.MAX_FOLDERNAME_LENGTH)
 574
 575             # if this is an empty string, try the basename
 576             if len(fn_template) == 0:
 577                 log('That is one ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
 578                 fn_template = util.sanitize_filename(os.path.basename(self.url), self.MAX_FOLDERNAME_LENGTH)
 579
 580             # If the basename is also empty, use the first 6 md5 hexdigest chars of the URL
 581             if len(fn_template) == 0:
 582                 log('That is one REALLY ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
 583                 fn_template = urldigest # no need for sanitize_filename here
 584
 585             # Find a unique folder name for this podcast
 586             wanted_foldername = self.find_unique_folder_name(fn_template)
 587
 588             # if the foldername has not been set, check if the (old) md5 filename exists
 589             if self.foldername is None and os.path.exists(os.path.join(self.download_dir, urldigest)):
 590                 log('Found pre-0.15.0 download folder for %s: %s', self.title, urldigest, sender=self)
 591                 self.foldername = urldigest
 592
 593             # we have a valid, new folder name in "current_try" -> use that!
 594             if self.foldername is not None and wanted_foldername != self.foldername:
 595                 # there might be an old download folder crawling around - move it!
 596                 new_folder_name = os.path.join(self.download_dir, wanted_foldername)
 597                 old_folder_name = os.path.join(self.download_dir, self.foldername)
 598                 if os.path.exists(old_folder_name):
 599                     if not os.path.exists(new_folder_name):
 600                         # Old folder exists, new folder does not -> simply rename
 601                         log('Renaming %s => %s', old_folder_name, new_folder_name, sender=self)
 602                         os.rename(old_folder_name, new_folder_name)
 603                     else:
 604                         # Both folders exist -> move files and delete old folder
 605                         log('Moving files from %s to %s', old_folder_name, new_folder_name, sender=self)
 606                         for file in glob.glob(os.path.join(old_folder_name, '*')):
 607                             shutil.move(file, new_folder_name)
 608                         log('Removing %s', old_folder_name, sender=self)
 609                         shutil.rmtree(old_folder_name, ignore_errors=True)
 610             log('Updating foldername of %s to "%s".', self.url, wanted_foldername, sender=self)
 611             self.foldername = wanted_foldername
 612             self.save()
 613
 614         save_dir = os.path.join(self.download_dir, self.foldername)
 615
 616         # Create save_dir if it does not yet exist
 617         if not util.make_directory( save_dir):
 618             log( 'Could not create save_dir: %s', save_dir, sender = self)
 619
 620         return save_dir
 621
 622     save_dir = property(fget=get_save_dir)
 623
 624     def remove_downloaded(self):
 625         # Remove the playlist file if it exists
 626         m3u_filename = self.get_playlist_filename()
 627         if os.path.exists(m3u_filename):
 628             util.delete_file(m3u_filename)
 629
 630         # Remove the download directory
 631         shutil.rmtree(self.save_dir, True)
 632
 633     @property
 634     def cover_file(self):
 635         new_name = os.path.join(self.save_dir, 'folder.jpg')
 636         if not os.path.exists(new_name):
 637             old_names = ('cover', '.cover')
 638             for old_name in old_names:
 639                 filename = os.path.join(self.save_dir, old_name)
 640                 if os.path.exists(filename):
 641                     shutil.move(filename, new_name)
 642                     return new_name
 643
 644         return new_name
 645
 646     def delete_episode(self, episode):
 647         filename = episode.local_filename(create=False, check_only=True)
 648         if filename is not None:
 649             util.delete_file(filename)
 650
 651         episode.set_state(gpodder.STATE_DELETED)
 652
 653
 654 class PodcastEpisode(PodcastModelObject):
 655     """holds data for one object in a channel"""
 656     MAX_FILENAME_LENGTH = 200
 657
 658     def _get_played(self):
 659         return self.is_played
 660
 661     def _set_played(self, played):
 662         self.is_played = played
 663
 664     # Alias "is_played" to "played" for DB column mapping
 665     played = property(fget=_get_played, fset=_set_played)
 666
 667     def _get_locked(self):
 668         return self.is_locked
 669
 670     def _set_locked(self, locked):
 671         self.is_locked = locked
 672
 673     # Alias "is_locked" to "locked" for DB column mapping
 674     locked = property(fget=_get_locked, fset=_set_locked)
 675
 676     def _get_channel_id(self):
 677         return self.channel.id
 678
 679     def _set_channel_id(self, channel_id):
 680         assert self.channel.id == channel_id
 681
 682     # Accessor for the "channel_id" DB column
 683     channel_id = property(fget=_get_channel_id, fset=_set_channel_id)
 684
 685     @staticmethod
 686     def sort_by_pubdate(episodes, reverse=False):
 687         """Sort a list of PodcastEpisode objects chronologically
 688
 689         Returns a iterable, sorted sequence of the episodes
 690         """
 691         key_pubdate = lambda e: e.pubDate
 692         return sorted(episodes, key=key_pubdate, reverse=reverse)
 693
 694     def reload_from_db(self):
 695         """
 696         Re-reads all episode details for this object from the
 697         database and updates this object accordingly. Can be
 698         used to refresh existing objects when the database has
 699         been updated (e.g. the filename has been set after a
 700         download where it was not set before the download)
 701         """
 702         d = self.db.load_episode(self.id)
 703         self.update_from_dict(d or {})
 704         return self
 705
 706     def has_website_link(self):
 707         return bool(self.link) and (self.link != self.url or \
 708                 youtube.is_video_link(self.link))
 709
 710     @staticmethod
 711     def from_feedparser_entry(entry, channel, mimetype_prefs=''):
 712         episode = PodcastEpisode(channel)
 713
 714         # Replace multi-space and newlines with single space (Maemo bug 11173)
 715         episode.title = re.sub('\s+', ' ', entry.get('title', ''))
 716         episode.link = entry.get('link', '')
 717         if 'content' in entry and len(entry['content']) and \
 718                 entry['content'][0].type == 'text/html':
 719             episode.description = entry['content'][0].value
 720         else:
 721             episode.description = entry.get('summary', '')
 722
 723         try:
 724             # Parse iTunes-specific podcast duration metadata
 725             total_time = util.parse_time(entry.get('itunes_duration', ''))
 726             episode.total_time = total_time
 727         except:
 728             pass
 729
 730         # Fallback to subtitle if summary is not available0
 731         if not episode.description:
 732             episode.description = entry.get('subtitle', '')
 733
 734         episode.guid = entry.get('id', '')
 735         if entry.get('updated_parsed', None):
 736             episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,))
 737
 738         enclosures = entry.get('enclosures', ())
 739         audio_available = any(e.get('type', '').startswith('audio/') \
 740                 for e in enclosures)
 741         video_available = any(e.get('type', '').startswith('video/') \
 742                 for e in enclosures)
 743
 744         # Create the list of preferred mime types
 745         mimetype_prefs = mimetype_prefs.split(',')
 746
 747         def calculate_preference_value(enclosure):
 748             """Calculate preference value of an enclosure
 749
 750             This is based on mime types and allows users to prefer
 751             certain mime types over others (e.g. MP3 over AAC, ...)
 752             """
 753             mimetype = enclosure.get('type', None)
 754             try:
 755                 # If the mime type is found, return its (zero-based) index
 756                 return mimetype_prefs.index(mimetype)
 757             except ValueError:
 758                 # If it is not found, assume it comes after all listed items
 759                 return len(mimetype_prefs)
 760
 761         # Enclosures
 762         for e in sorted(enclosures, key=calculate_preference_value):
 763             episode.mimetype = e.get('type', 'application/octet-stream')
 764             if episode.mimetype == '':
 765                 # See Maemo bug 10036
 766                 log('Fixing empty mimetype in ugly feed', sender=episode)
 767                 episode.mimetype = 'application/octet-stream'
 768
 769             if '/' not in episode.mimetype:
 770                 continue
 771
 772             # Skip images in feeds if audio or video is available (bug 979)
 773             if episode.mimetype.startswith('image/') and \
 774                     (audio_available or video_available):
 775                 continue
 776
 777             episode.url = util.normalize_feed_url(e.get('href', ''))
 778             if not episode.url:
 779                 continue
 780
 781             try:
 782                 episode.length = int(e.length) or -1
 783             except:
 784                 episode.length = -1
 785
 786             return episode
 787
 788         # Media RSS content
 789         for m in entry.get('media_content', ()):
 790             episode.mimetype = m.get('type', 'application/octet-stream')
 791             if '/' not in episode.mimetype:
 792                 continue
 793
 794             episode.url = util.normalize_feed_url(m.get('url', ''))
 795             if not episode.url:
 796                 continue
 797
 798             try:
 799                 episode.length = int(m.fileSize) or -1
 800             except:
 801                 episode.length = -1
 802
 803             return episode
 804
 805         # Brute-force detection of any links
 806         for l in entry.get('links', ()):
 807             episode.url = util.normalize_feed_url(l.get('href', ''))
 808             if not episode.url:
 809                 continue
 810
 811             if youtube.is_video_link(episode.url):
 812                 return episode
 813
 814             # Check if we can resolve this link to a audio/video file
 815             filename, extension = util.filename_from_url(episode.url)
 816             file_type = util.file_type_by_extension(extension)
 817             if file_type is None and hasattr(l, 'type'):
 818                 extension = util.extension_from_mimetype(l.type)
 819                 file_type = util.file_type_by_extension(extension)
 820
 821             # The link points to a audio or video file - use it!
 822             if file_type is not None:
 823                 return episode
 824
 825         # Scan MP3 links in description text
 826         mp3s = re.compile(r'http://[^"]*\.mp3')
 827         for content in entry.get('content', ()):
 828             html = content.value
 829             for match in mp3s.finditer(html):
 830                 episode.url = match.group(0)
 831                 return episode
 832
 833         return None
 834
 835     def __init__(self, channel):
 836         self.db = channel.db
 837         # Used by Storage for faster saving
 838         self.id = None
 839         self.url = ''
 840         self.title = ''
 841         self.length = 0
 842         self.mimetype = 'application/octet-stream'
 843         self.guid = ''
 844         self.description = ''
 845         self.link = ''
 846         self.channel = channel
 847         self.pubDate = 0
 848         self.filename = None
 849         self.auto_filename = 1 # automatically generated filename
 850
 851         self.state = gpodder.STATE_NORMAL
 852         self.is_played = False
 853
 854         # Initialize the "is_locked" property
 855         self._is_locked = False
 856         self.is_locked = channel.channel_is_locked
 857
 858         # Time attributes
 859         self.total_time = 0
 860         self.current_position = 0
 861         self.current_position_updated = 0
 862
 863     def get_is_locked(self):
 864         return self._is_locked
 865
 866     def set_is_locked(self, is_locked):
 867         self._is_locked = bool(is_locked)
 868
 869     is_locked = property(fget=get_is_locked, fset=set_is_locked)
 870
 871     def save(self):
 872         if self.state != gpodder.STATE_DOWNLOADED and self.file_exists():
 873             self.state = gpodder.STATE_DOWNLOADED
 874         if gpodder.user_hooks is not None:
 875             gpodder.user_hooks.on_episode_save(self)
 876         self.db.save_episode(self)
 877
 878     def on_downloaded(self, filename):
 879         self.state = gpodder.STATE_DOWNLOADED
 880         self.is_played = False
 881         self.length = os.path.getsize(filename)
 882
 883         if not self.total_time:
 884             try:
 885                 length = gstreamer.get_track_length(filename)
 886                 if length is not None:
 887                     length = int(length/1000)
 888                     log('Detected media length: %d seconds', length, \
 889                             sender=self)
 890                     self.total_time = length
 891                     self.db.save_episode(self)
 892                     self.db.commit()
 893                     return
 894             except Exception, e:
 895                 log('Error while detecting media length: %s', str(e), \
 896                         sender=self)
 897
 898         self.db.save_downloaded_episode(self)
 899         self.db.commit()
 900
 901     def set_state(self, state):
 902         self.state = state
 903         self.db.update_episode_state(self)
 904
 905     def mark(self, state=None, is_played=None, is_locked=None):
 906         if state is not None:
 907             self.state = state
 908         if is_played is not None:
 909             self.is_played = is_played
 910         if is_locked is not None:
 911             self.is_locked = is_locked
 912         self.db.update_episode_state(self)
 913
 914     @property
 915     def title_markup(self):
 916         return '%s\n<small>%s</small>' % (xml.sax.saxutils.escape(self.title),
 917                           xml.sax.saxutils.escape(self.channel.title))
 918
 919     @property
 920     def maemo_markup(self):
 921         if self.length > 0:
 922             length_str = '%s; ' % self.filesize_prop
 923         else:
 924             length_str = ''
 925         return ('<b>%s</b>\n<small>%s'+_('released %s')+ \
 926                 '; '+_('from %s')+'</small>') % (\
 927                 xml.sax.saxutils.escape(re.sub('\s+', ' ', self.title)), \
 928                 xml.sax.saxutils.escape(length_str), \
 929                 xml.sax.saxutils.escape(self.pubdate_prop), \
 930                 xml.sax.saxutils.escape(re.sub('\s+', ' ', self.channel.title)))
 931
 932     @property
 933     def maemo_remove_markup(self):
 934         if self.total_time and self.current_position:
 935             played_string = self.get_play_info_string()
 936         elif self.is_played:
 937             played_string = _('played')
 938         else:
 939             played_string = _('unplayed')
 940         downloaded_string = self.get_age_string()
 941         if not downloaded_string:
 942             downloaded_string = _('today')
 943         return ('<b>%s</b>\n<small>%s; %s; '+_('downloaded %s')+ \
 944                 '; '+_('from %s')+'</small>') % (\
 945                 xml.sax.saxutils.escape(self.title), \
 946                 xml.sax.saxutils.escape(self.filesize_prop), \
 947                 xml.sax.saxutils.escape(played_string), \
 948                 xml.sax.saxutils.escape(downloaded_string), \
 949                 xml.sax.saxutils.escape(self.channel.title))
 950
 951     def age_in_days(self):
 952         return util.file_age_in_days(self.local_filename(create=False, \
 953                 check_only=True))
 954
 955     age_int_prop = property(fget=age_in_days)
 956
 957     def get_age_string(self):
 958         return util.file_age_to_string(self.age_in_days())
 959
 960     age_prop = property(fget=get_age_string)
 961
 962     def one_line_description( self):
 963         lines = util.remove_html_tags(self.description or '').strip().splitlines()
 964         if not lines or lines[0] == '':
 965             return _('No description available')
 966         else:
 967             return ' '.join(lines)
 968
 969     def delete_from_disk(self):
 970         try:
 971             self.channel.delete_episode(self)
 972         except:
 973             log('Cannot delete episode from disk: %s', self.title, traceback=True, sender=self)
 974
 975     def find_unique_file_name(self, url, filename, extension):
 976         current_try = util.sanitize_filename(filename, self.MAX_FILENAME_LENGTH)+extension
 977         next_try_id = 2
 978         lookup_url = None
 979
 980         if self.filename == current_try and current_try is not None:
 981             # We already have this filename - good!
 982             return current_try
 983
 984         while self.db.episode_filename_exists(current_try):
 985             current_try = '%s (%d)%s' % (filename, next_try_id, extension)
 986             next_try_id += 1
 987
 988         return current_try
 989
 990     def local_filename(self, create, force_update=False, check_only=False,
 991             template=None):
 992         """Get (and possibly generate) the local saving filename
 993
 994         Pass create=True if you want this function to generate a
 995         new filename if none exists. You only want to do this when
 996         planning to create/download the file after calling this function.
 997
 998         Normally, you should pass create=False. This will only
 999         create a filename when the file already exists from a previous
1000         version of gPodder (where we used md5 filenames). If the file
1001         does not exist (and the filename also does not exist), this
1002         function will return None.
1003
1004         If you pass force_update=True to this function, it will try to
1005         find a new (better) filename and move the current file if this
1006         is the case. This is useful if (during the download) you get
1007         more information about the file, e.g. the mimetype and you want
1008         to include this information in the file name generation process.
1009
1010         If check_only=True is passed to this function, it will never try
1011         to rename the file, even if would be a good idea. Use this if you
1012         only want to check if a file exists.
1013
1014         If "template" is specified, it should be a filename that is to
1015         be used as a template for generating the "real" filename.
1016
1017         The generated filename is stored in the database for future access.
1018         """
1019         ext = self.extension(may_call_local_filename=False).encode('utf-8', 'ignore')
1020
1021         # For compatibility with already-downloaded episodes, we
1022         # have to know md5 filenames if they are downloaded already
1023         urldigest = hashlib.md5(self.url).hexdigest()
1024
1025         if not create and self.filename is None:
1026             urldigest_filename = os.path.join(self.channel.save_dir, urldigest+ext)
1027             if os.path.exists(urldigest_filename):
1028                 # The file exists, so set it up in our database
1029                 log('Recovering pre-0.15.0 file: %s', urldigest_filename, sender=self)
1030                 self.filename = urldigest+ext
1031                 self.auto_filename = 1
1032                 self.save()
1033                 return urldigest_filename
1034             return None
1035
1036         # We only want to check if the file exists, so don't try to
1037         # rename the file, even if it would be reasonable. See also:
1038         # http://bugs.gpodder.org/attachment.cgi?id=236
1039         if check_only:
1040             if self.filename is None:
1041                 return None
1042             else:
1043                 return os.path.join(self.channel.save_dir, self.filename)
1044
1045         if self.filename is None or force_update or (self.auto_filename and self.filename == urldigest+ext):
1046             # Try to find a new filename for the current file
1047             if template is not None:
1048                 # If template is specified, trust the template's extension
1049                 episode_filename, ext = os.path.splitext(template)
1050             else:
1051                 episode_filename, extension_UNUSED = util.filename_from_url(self.url)
1052             fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
1053
1054             if 'redirect' in fn_template and template is None:
1055                 # This looks like a redirection URL - force URL resolving!
1056                 log('Looks like a redirection to me: %s', self.url, sender=self)
1057                 url = util.get_real_url(self.channel.authenticate_url(self.url))
1058                 log('Redirection resolved to: %s', url, sender=self)
1059                 (episode_filename, extension_UNUSED) = util.filename_from_url(url)
1060                 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
1061
1062             # Use the video title for YouTube downloads
1063             for yt_url in ('http://youtube.com/', 'http://www.youtube.com/'):
1064                 if self.url.startswith(yt_url):
1065                     fn_template = util.sanitize_filename(os.path.basename(self.title), self.MAX_FILENAME_LENGTH)
1066
1067             # If the basename is empty, use the md5 hexdigest of the URL
1068             if len(fn_template) == 0 or fn_template.startswith('redirect.'):
1069                 log('Report to bugs.gpodder.org: Podcast at %s with episode URL: %s', self.channel.url, self.url, sender=self)
1070                 fn_template = urldigest
1071
1072             # Find a unique filename for this episode
1073             wanted_filename = self.find_unique_file_name(self.url, fn_template, ext)
1074
1075             # We populate the filename field the first time - does the old file still exist?
1076             if self.filename is None and os.path.exists(os.path.join(self.channel.save_dir, urldigest+ext)):
1077                 log('Found pre-0.15.0 downloaded file: %s', urldigest, sender=self)
1078                 self.filename = urldigest+ext
1079
1080             # The old file exists, but we have decided to want a different filename
1081             if self.filename is not None and wanted_filename != self.filename:
1082                 # there might be an old download folder crawling around - move it!
1083                 new_file_name = os.path.join(self.channel.save_dir, wanted_filename)
1084                 old_file_name = os.path.join(self.channel.save_dir, self.filename)
1085                 if os.path.exists(old_file_name) and not os.path.exists(new_file_name):
1086                     log('Renaming %s => %s', old_file_name, new_file_name, sender=self)
1087                     os.rename(old_file_name, new_file_name)
1088                 elif force_update and not os.path.exists(old_file_name):
1089                     # When we call force_update, the file might not yet exist when we
1090                     # call it from the downloading code before saving the file
1091                     log('Choosing new filename: %s', new_file_name, sender=self)
1092                 else:
1093                     log('Warning: %s exists or %s does not.', new_file_name, old_file_name, sender=self)
1094                 log('Updating filename of %s to "%s".', self.url, wanted_filename, sender=self)
1095             elif self.filename is None:
1096                 log('Setting filename to "%s".', wanted_filename, sender=self)
1097             else:
1098                 log('Should update filename. Stays the same (%s). Good!', \
1099                         wanted_filename, sender=self)
1100             self.filename = wanted_filename
1101             self.save()
1102             self.db.commit()
1103
1104         return os.path.join(self.channel.save_dir, self.filename)
1105
1106     def set_mimetype(self, mimetype, commit=False):
1107         """Sets the mimetype for this episode"""
1108         self.mimetype = mimetype
1109         if commit:
1110             self.db.commit()
1111
1112     def extension(self, may_call_local_filename=True):
1113         filename, ext = util.filename_from_url(self.url)
1114         if may_call_local_filename:
1115             filename = self.local_filename(create=False)
1116             if filename is not None:
1117                 filename, ext = os.path.splitext(filename)
1118         # if we can't detect the extension from the url fallback on the mimetype
1119         if ext == '' or util.file_type_by_extension(ext) is None:
1120             ext = util.extension_from_mimetype(self.mimetype)
1121         return ext
1122
1123     def check_is_new(self, downloading=lambda e: False):
1124         """
1125         Returns True if this episode is to be considered new.
1126         "Downloading" should be a callback that gets an episode
1127         as its parameter and returns True if the episode is
1128         being downloaded at the moment.
1129         """
1130         return self.state == gpodder.STATE_NORMAL and \
1131                 not self.is_played and \
1132                 not downloading(self)
1133
1134     def mark_new(self):
1135         self.state = gpodder.STATE_NORMAL
1136         self.is_played = False
1137         self.db.update_episode_state(self)
1138
1139     def mark_old(self):
1140         self.is_played = True
1141         self.db.update_episode_state(self)
1142
1143     def file_exists(self):
1144         filename = self.local_filename(create=False, check_only=True)
1145         if filename is None:
1146             return False
1147         else:
1148             return os.path.exists(filename)
1149
1150     def was_downloaded(self, and_exists=False):
1151         if self.state != gpodder.STATE_DOWNLOADED:
1152             return False
1153         if and_exists and not self.file_exists():
1154             return False
1155         return True
1156
1157     def sync_filename(self, use_custom=False, custom_format=None):
1158         if use_custom:
1159             return util.object_string_formatter(custom_format,
1160                     episode=self, podcast=self.channel)
1161         else:
1162             return self.title
1163
1164     def file_type(self):
1165         # Assume all YouTube links are video files
1166         if youtube.is_video_link(self.url):
1167             return 'video'
1168
1169         return util.file_type_by_extension(self.extension())
1170
1171     @property
1172     def basename( self):
1173         return os.path.splitext( os.path.basename( self.url))[0]
1174
1175     @property
1176     def published( self):
1177         """
1178         Returns published date as YYYYMMDD (or 00000000 if not available)
1179         """
1180         try:
1181             return datetime.datetime.fromtimestamp(self.pubDate).strftime('%Y%m%d')
1182         except:
1183             log( 'Cannot format pubDate for "%s".', self.title, sender = self)
1184             return '00000000'
1185
1186     @property
1187     def pubtime(self):
1188         """
1189         Returns published time as HHMM (or 0000 if not available)
1190         """
1191         try:
1192             return datetime.datetime.fromtimestamp(self.pubDate).strftime('%H%M')
1193         except:
1194             log('Cannot format pubDate (time) for "%s".', self.title, sender=self)
1195             return '0000'
1196
1197     def playlist_title(self):
1198         """Return a title for this episode in a playlist
1199
1200         The title will be composed of the podcast name, the
1201         episode name and the publication date. The return
1202         value is the canonical representation of this episode
1203         in playlists (for example, M3U playlists).
1204         """
1205         return '%s - %s (%s)' % (self.channel.title, \
1206                 self.title, \
1207                 self.cute_pubdate())
1208
1209     def cute_pubdate(self):
1210         result = util.format_date(self.pubDate)
1211         if result is None:
1212             return '(%s)' % _('unknown')
1213         else:
1214             return result
1215
1216     pubdate_prop = property(fget=cute_pubdate)
1217
1218     def calculate_filesize( self):
1219         filename = self.local_filename(create=False)
1220         if filename is None:
1221             log('calculate_filesized called, but filename is None!', sender=self)
1222         try:
1223             self.length = os.path.getsize(filename)
1224         except:
1225             log( 'Could not get filesize for %s.', self.url)
1226
1227     def is_finished(self):
1228         """Return True if this episode is considered "finished playing"
1229
1230         An episode is considered "finished" when there is a
1231         current position mark on the track, and when the
1232         current position is greater than 99 percent of the
1233         total time or inside the last 10 seconds of a track.
1234         """
1235         return self.current_position > 0 and \
1236                 (self.current_position + 10 >= self.total_time or \
1237                  self.current_position >= self.total_time*.99)
1238
1239     def get_play_info_string(self):
1240         if self.is_finished():
1241             return '%s (%s)' % (_('Finished'), self.get_duration_string(),)
1242         if self.current_position > 0:
1243             return '%s / %s' % (self.get_position_string(), \
1244                     self.get_duration_string())
1245         else:
1246             return self.get_duration_string()
1247
1248     def get_position_string(self):
1249         return util.format_time(self.current_position)
1250
1251     def get_duration_string(self):
1252         return util.format_time(self.total_time)
1253
1254     def get_filesize_string(self):
1255         return util.format_filesize(self.length)
1256
1257     filesize_prop = property(fget=get_filesize_string)
1258
1259     def get_played_string( self):
1260         if not self.is_played:
1261             return _('Unplayed')
1262
1263         return ''
1264
1265     played_prop = property(fget=get_played_string)
1266
1267     def is_duplicate(self, episode):
1268         if self.title == episode.title and self.pubDate == episode.pubDate:
1269             log('Possible duplicate detected: %s', self.title)
1270             return True
1271         return False
1272
1273     def duplicate_id(self):
1274         return hash((self.title, self.pubDate))
1275
1276     def update_from(self, episode):
1277         for k in ('title', 'url', 'description', 'link', 'pubDate', 'guid'):
1278             setattr(self, k, getattr(episode, k))
1279