src/gpodder/model.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # gPodder - A media aggregator and podcast client
   4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
   5 #
   6 # gPodder is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # gPodder is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19
  20
  21 #
  22 #  gpodder.model - Core model classes for gPodder (2009-08-13)
  23 #  Based on libpodcasts.py (thp, 2005-10-29)
  24 #
  25
  26 import gpodder
  27 from gpodder import util
  28 from gpodder import feedcore
  29 from gpodder import youtube
  30 from gpodder import corestats
  31
  32 from gpodder.liblogger import log
  33
  34 import os
  35 import re
  36 import glob
  37 import shutil
  38 import urllib
  39 import urlparse
  40 import time
  41 import datetime
  42 import rfc822
  43 import hashlib
  44 import feedparser
  45 import xml.sax.saxutils
  46
  47 _ = gpodder.gettext
  48
  49
  50 class CustomFeed(feedcore.ExceptionWithData): pass
  51
  52 class gPodderFetcher(feedcore.Fetcher):
  53     """
  54     This class extends the feedcore Fetcher with the gPodder User-Agent and the
  55     Proxy handler based on the current settings in gPodder and provides a
  56     convenience method (fetch_channel) for use by PodcastChannel objects.
  57     """
  58     custom_handlers = []
  59
  60     def __init__(self):
  61         feedcore.Fetcher.__init__(self, gpodder.user_agent)
  62
  63     def fetch_channel(self, channel):
  64         etag = channel.etag
  65         modified = feedparser._parse_date(channel.last_modified)
  66         # If we have a username or password, rebuild the url with them included
  67         # Note: using a HTTPBasicAuthHandler would be pain because we need to
  68         # know the realm. It can be done, but I think this method works, too
  69         url = channel.authenticate_url(channel.url)
  70         for handler in self.custom_handlers:
  71             custom_feed = handler.handle_url(url)
  72             if custom_feed is not None:
  73                 raise CustomFeed(custom_feed)
  74         self.fetch(url, etag, modified)
  75
  76     def _resolve_url(self, url):
  77         return youtube.get_real_channel_url(url)
  78
  79     @classmethod
  80     def register(cls, handler):
  81         cls.custom_handlers.append(handler)
  82
  83 #    def _get_handlers(self):
  84 #        # Add a ProxyHandler for fetching data via a proxy server
  85 #        proxies = {'http': 'http://proxy.example.org:8080'}
  86 #        return[urllib2.ProxyHandler(proxies))]
  87
  88 # The "register" method is exposed here for external usage
  89 register_custom_handler = gPodderFetcher.register
  90
  91 class PodcastModelObject(object):
  92     """
  93     A generic base class for our podcast model providing common helper
  94     and utility functions.
  95     """
  96
  97     @classmethod
  98     def create_from_dict(cls, d, *args):
  99         """
 100         Create a new object, passing "args" to the constructor
 101         and then updating the object with the values from "d".
 102         """
 103         o = cls(*args)
 104         o.update_from_dict(d)
 105         return o
 106
 107     def update_from_dict(self, d):
 108         """
 109         Updates the attributes of this object with values from the
 110         dictionary "d" by using the keys found in "d".
 111         """
 112         for k in d:
 113             if hasattr(self, k):
 114                 setattr(self, k, d[k])
 115
 116
 117 class PodcastChannel(PodcastModelObject):
 118     """holds data for a complete channel"""
 119     MAX_FOLDERNAME_LENGTH = 150
 120     SECONDS_PER_WEEK = 7*24*60*60
 121
 122     feed_fetcher = gPodderFetcher()
 123
 124     @classmethod
 125     def build_factory(cls, download_dir):
 126         def factory(dict, db):
 127             return cls.create_from_dict(dict, db, download_dir)
 128         return factory
 129
 130     @classmethod
 131     def load_from_db(cls, db, download_dir):
 132         return db.load_channels(factory=cls.build_factory(download_dir))
 133
 134     @classmethod
 135     def load(cls, db, url, create=True, authentication_tokens=None,\
 136             max_episodes=0, download_dir=None, allow_empty_feeds=False):
 137         if isinstance(url, unicode):
 138             url = url.encode('utf-8')
 139
 140         tmp = db.load_channels(factory=cls.build_factory(download_dir), url=url)
 141         if len(tmp):
 142             return tmp[0]
 143         elif create:
 144             tmp = PodcastChannel(db, download_dir)
 145             tmp.url = url
 146             if authentication_tokens is not None:
 147                 tmp.username = authentication_tokens[0]
 148                 tmp.password = authentication_tokens[1]
 149
 150             tmp.update(max_episodes)
 151             tmp.save()
 152             db.force_last_new(tmp)
 153             # Subscribing to empty feeds should yield an error (except if
 154             # the user specifically allows empty feeds in the config UI)
 155             if sum(tmp.get_statistics()) == 0 and not allow_empty_feeds:
 156                 tmp.delete()
 157                 raise Exception(_('No downloadable episodes in feed'))
 158             return tmp
 159
 160     def episode_factory(self, d, db__parameter_is_unused=None):
 161         """
 162         This function takes a dictionary containing key-value pairs for
 163         episodes and returns a new PodcastEpisode object that is connected
 164         to this PodcastChannel object.
 165
 166         Returns: A new PodcastEpisode object
 167         """
 168         return PodcastEpisode.create_from_dict(d, self)
 169
 170     def _consume_custom_feed(self, custom_feed, max_episodes=0):
 171         self.title = custom_feed.get_title()
 172         self.link = custom_feed.get_link()
 173         self.description = custom_feed.get_description()
 174         self.image = custom_feed.get_image()
 175         self.pubDate = time.time()
 176         self.save()
 177
 178         guids = [episode.guid for episode in self.get_all_episodes()]
 179         self.save()
 180
 181         self.db.purge(max_episodes, self.id)
 182
 183     def _consume_updated_feed(self, feed, max_episodes=0):
 184         self.parse_error = feed.get('bozo_exception', None)
 185
 186         self.title = feed.feed.get('title', self.url)
 187         self.link = feed.feed.get('link', self.link)
 188         self.description = feed.feed.get('subtitle', self.description)
 189         # Start YouTube-specific title FIX
 190         YOUTUBE_PREFIX = 'Uploads by '
 191         if self.title.startswith(YOUTUBE_PREFIX):
 192             self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
 193         # End YouTube-specific title FIX
 194
 195         try:
 196             self.pubDate = rfc822.mktime_tz(feed.feed.get('updated_parsed', None+(0,)))
 197         except:
 198             self.pubDate = time.time()
 199
 200         if hasattr(feed.feed, 'image'):
 201             for attribute in ('href', 'url'):
 202                 new_value = getattr(feed.feed.image, attribute, None)
 203                 if new_value is not None:
 204                     log('Found cover art in %s: %s', attribute, new_value)
 205                     self.image = new_value
 206
 207         if hasattr(feed.feed, 'icon'):
 208             self.image = feed.feed.icon
 209
 210         self.save()
 211
 212         # Load all episodes to update them properly.
 213         existing = self.get_all_episodes()
 214
 215         # We can limit the maximum number of entries that gPodder will parse
 216         if max_episodes > 0 and len(feed.entries) > max_episodes:
 217             entries = feed.entries[:max_episodes]
 218         else:
 219             entries = feed.entries
 220
 221         # Title + PubDate hashes for existing episodes
 222         existing_dupes = dict((e.duplicate_id(), e) for e in existing)
 223
 224         # GUID-based existing episode list
 225         existing_guids = dict((e.guid, e) for e in existing)
 226
 227         # Get most recent pubDate of all episodes
 228         last_pubdate = self.db.get_last_pubdate(self) or 0
 229
 230         # Search all entries for new episodes
 231         for entry in entries:
 232             try:
 233                 episode = PodcastEpisode.from_feedparser_entry(entry, self)
 234                 if episode is not None and not episode.title:
 235                     episode.title, ext = os.path.splitext(os.path.basename(episode.url))
 236             except Exception, e:
 237                 log('Cannot instantiate episode: %s. Skipping.', e, sender=self, traceback=True)
 238                 continue
 239
 240             if episode is None:
 241                 continue
 242
 243             # Detect (and update) existing episode based on GUIDs
 244             existing_episode = existing_guids.get(episode.guid, None)
 245             if existing_episode:
 246                 existing_episode.update_from(episode)
 247                 existing_episode.save()
 248                 continue
 249
 250             # Detect (and update) existing episode based on duplicate ID
 251             existing_episode = existing_dupes.get(episode.duplicate_id(), None)
 252             if existing_episode:
 253                 if existing_episode.is_duplicate(episode):
 254                     existing_episode.update_from(episode)
 255                     existing_episode.save()
 256                     continue
 257
 258             # Workaround for bug 340: If the episode has been
 259             # published earlier than one week before the most
 260             # recent existing episode, do not mark it as new.
 261             if episode.pubDate < last_pubdate - self.SECONDS_PER_WEEK:
 262                 log('Episode with old date: %s', episode.title, sender=self)
 263                 episode.is_played = True
 264
 265             episode.save()
 266
 267         # Remove "unreachable" episodes - episodes that have not been
 268         # downloaded and that the feed does not list as downloadable anymore
 269         if self.id is not None:
 270             seen_guids = set(e.guid for e in feed.entries if hasattr(e, 'guid'))
 271             episodes_to_purge = (e for e in existing if \
 272                     e.state != gpodder.STATE_DOWNLOADED and \
 273                     e.guid not in seen_guids and e.guid is not None)
 274             for episode in episodes_to_purge:
 275                 log('Episode removed from feed: %s (%s)', episode.title, \
 276                         episode.guid, sender=self)
 277                 self.db.delete_episode_by_guid(episode.guid, self.id)
 278
 279         # This *might* cause episodes to be skipped if there were more than
 280         # max_episodes_per_feed items added to the feed between updates.
 281         # The benefit is that it prevents old episodes from apearing as new
 282         # in certain situations (see bug #340).
 283         self.db.purge(max_episodes, self.id)
 284
 285     def update_channel_lock(self):
 286         self.db.update_channel_lock(self)
 287
 288     def _update_etag_modified(self, feed):
 289         self.updated_timestamp = time.time()
 290         self.calculate_publish_behaviour()
 291         self.etag = feed.headers.get('etag', self.etag)
 292         self.last_modified = feed.headers.get('last-modified', self.last_modified)
 293
 294     def query_automatic_update(self):
 295         """Query if this channel should be updated automatically
 296
 297         Returns True if the update should happen in automatic
 298         mode or False if this channel should be skipped (timeout
 299         not yet reached or release not expected right now).
 300         """
 301         updated = self.updated_timestamp
 302         expected = self.release_expected
 303
 304         now = time.time()
 305         one_day_ago = now - 60*60*24
 306         lastcheck = now - 60*10
 307
 308         return updated < one_day_ago or \
 309                 (expected < now and updated < lastcheck)
 310
 311     def update(self, max_episodes=0):
 312         try:
 313             self.feed_fetcher.fetch_channel(self)
 314         except CustomFeed, updated:
 315             custom_feed = updated.data
 316             self._consume_custom_feed(custom_feed, max_episodes)
 317             self.save()
 318         except feedcore.UpdatedFeed, updated:
 319             feed = updated.data
 320             self._consume_updated_feed(feed, max_episodes)
 321             self._update_etag_modified(feed)
 322             self.save()
 323         except feedcore.NewLocation, updated:
 324             feed = updated.data
 325             self.url = feed.href
 326             self._consume_updated_feed(feed, max_episodes)
 327             self._update_etag_modified(feed)
 328             self.save()
 329         except feedcore.NotModified, updated:
 330             feed = updated.data
 331             self._update_etag_modified(feed)
 332             self.save()
 333         except Exception, e:
 334             # "Not really" errors
 335             #feedcore.AuthenticationRequired
 336             # Temporary errors
 337             #feedcore.Offline
 338             #feedcore.BadRequest
 339             #feedcore.InternalServerError
 340             #feedcore.WifiLogin
 341             # Permanent errors
 342             #feedcore.Unsubscribe
 343             #feedcore.NotFound
 344             #feedcore.InvalidFeed
 345             #feedcore.UnknownStatusCode
 346             raise
 347
 348         self.db.commit()
 349
 350     def delete(self):
 351         self.db.delete_channel(self)
 352
 353     def save(self):
 354         self.db.save_channel(self)
 355
 356     def get_statistics(self):
 357         if self.id is None:
 358             return (0, 0, 0, 0, 0)
 359         else:
 360             return self.db.get_channel_count(int(self.id))
 361
 362     def authenticate_url(self, url):
 363         return util.url_add_authentication(url, self.username, self.password)
 364
 365     def __init__(self, db, download_dir):
 366         self.db = db
 367         self.download_dir = download_dir
 368         self.id = None
 369         self.url = None
 370         self.title = ''
 371         self.link = ''
 372         self.description = ''
 373         self.image = None
 374         self.pubDate = 0
 375         self.parse_error = None
 376         self.foldername = None
 377         self.auto_foldername = 1 # automatically generated foldername
 378
 379         # should this channel be synced to devices? (ex: iPod)
 380         self.sync_to_devices = True
 381         # to which playlist should be synced
 382         self.device_playlist_name = 'gPodder'
 383         # if set, this overrides the channel-provided title
 384         self.override_title = ''
 385         self.username = ''
 386         self.password = ''
 387
 388         self.last_modified = None
 389         self.etag = None
 390
 391         self.save_dir_size = 0
 392         self.__save_dir_size_set = False
 393
 394         self.channel_is_locked = False
 395
 396         self.release_expected = time.time()
 397         self.release_deviation = 0
 398         self.updated_timestamp = 0
 399
 400     def calculate_publish_behaviour(self):
 401         episodes = self.db.load_episodes(self, factory=self.episode_factory, limit=30)
 402         if len(episodes) < 3:
 403             return
 404
 405         deltas = []
 406         latest = max(e.pubDate for e in episodes)
 407         for index in range(len(episodes)-1):
 408             if episodes[index].pubDate != 0 and episodes[index+1].pubDate != 0:
 409                 deltas.append(episodes[index].pubDate - episodes[index+1].pubDate)
 410
 411         if len(deltas) > 1:
 412             stats = corestats.Stats(deltas)
 413             self.release_expected = min([latest+stats.stdev(), latest+(stats.min()+stats.avg())*.5])
 414             self.release_deviation = stats.stdev()
 415         else:
 416             self.release_expected = latest
 417             self.release_deviation = 0
 418
 419     def request_save_dir_size(self):
 420         if not self.__save_dir_size_set:
 421             self.update_save_dir_size()
 422         self.__save_dir_size_set = True
 423
 424     def update_save_dir_size(self):
 425         self.save_dir_size = util.calculate_size(self.save_dir)
 426
 427     def get_title( self):
 428         if self.override_title:
 429             return self.override_title
 430         elif not self.__title.strip():
 431             return self.url
 432         else:
 433             return self.__title
 434
 435     def set_title( self, value):
 436         self.__title = value.strip()
 437
 438     title = property(fget=get_title,
 439                      fset=set_title)
 440
 441     def set_custom_title( self, custom_title):
 442         custom_title = custom_title.strip()
 443
 444         # if the custom title is the same as we have
 445         if custom_title == self.override_title:
 446             return
 447
 448         # if custom title is the same as channel title and we didn't have a custom title
 449         if custom_title == self.__title and self.override_title == '':
 450             return
 451
 452         # make sure self.foldername is initialized
 453         self.get_save_dir()
 454
 455         # rename folder if custom_title looks sane
 456         new_folder_name = self.find_unique_folder_name(custom_title)
 457         if len(new_folder_name) > 0 and new_folder_name != self.foldername:
 458             log('Changing foldername based on custom title: %s', custom_title, sender=self)
 459             new_folder = os.path.join(self.download_dir, new_folder_name)
 460             old_folder = os.path.join(self.download_dir, self.foldername)
 461             if os.path.exists(old_folder):
 462                 if not os.path.exists(new_folder):
 463                     # Old folder exists, new folder does not -> simply rename
 464                     log('Renaming %s => %s', old_folder, new_folder, sender=self)
 465                     os.rename(old_folder, new_folder)
 466                 else:
 467                     # Both folders exist -> move files and delete old folder
 468                     log('Moving files from %s to %s', old_folder, new_folder, sender=self)
 469                     for file in glob.glob(os.path.join(old_folder, '*')):
 470                         shutil.move(file, new_folder)
 471                     log('Removing %s', old_folder, sender=self)
 472                     shutil.rmtree(old_folder, ignore_errors=True)
 473             self.foldername = new_folder_name
 474             self.save()
 475
 476         if custom_title != self.__title:
 477             self.override_title = custom_title
 478         else:
 479             self.override_title = ''
 480
 481     def get_downloaded_episodes(self):
 482         return self.db.load_episodes(self, factory=self.episode_factory, state=gpodder.STATE_DOWNLOADED)
 483
 484     def get_new_episodes(self, downloading=lambda e: False):
 485         """
 486         Get a list of new episodes. You can optionally specify
 487         "downloading" as a callback that takes an episode as
 488         a parameter and returns True if the episode is currently
 489         being downloaded or False if not.
 490
 491         By default, "downloading" is implemented so that it
 492         reports all episodes as not downloading.
 493         """
 494         return [episode for episode in self.db.load_episodes(self, \
 495                 factory=self.episode_factory) if \
 496                 episode.check_is_new(downloading=downloading)]
 497
 498     def get_playlist_filename(self):
 499         # If the save_dir doesn't end with a slash (which it really should
 500         # not, if the implementation is correct, we can just append .m3u :)
 501         assert self.save_dir[-1] != '/'
 502         return self.save_dir+'.m3u'
 503
 504     def update_m3u_playlist(self):
 505         m3u_filename = self.get_playlist_filename()
 506
 507         downloaded_episodes = self.get_downloaded_episodes()
 508         if not downloaded_episodes:
 509             log('No episodes - removing %s', m3u_filename, sender=self)
 510             util.delete_file(m3u_filename)
 511             return
 512
 513         log('Writing playlist to %s', m3u_filename, sender=self)
 514         f = open(m3u_filename, 'w')
 515         f.write('#EXTM3U\n')
 516
 517         for episode in PodcastEpisode.sort_by_pubdate(downloaded_episodes):
 518             if episode.was_downloaded(and_exists=True):
 519                 filename = episode.local_filename(create=False)
 520                 assert filename is not None
 521
 522                 if os.path.dirname(filename).startswith(os.path.dirname(m3u_filename)):
 523                     filename = filename[len(os.path.dirname(m3u_filename)+os.sep):]
 524                 f.write('#EXTINF:0,'+self.title+' - '+episode.title+' ('+episode.cute_pubdate()+')\n')
 525                 f.write(filename+'\n')
 526
 527         f.close()
 528
 529     def get_all_episodes(self):
 530         return self.db.load_episodes(self, factory=self.episode_factory)
 531
 532     def find_unique_folder_name(self, foldername):
 533         # Remove trailing dots to avoid errors on Windows (bug 600)
 534         foldername = foldername.strip().rstrip('.')
 535
 536         current_try = util.sanitize_filename(foldername, \
 537                 self.MAX_FOLDERNAME_LENGTH)
 538         next_try_id = 2
 539
 540         while True:
 541             if not os.path.exists(os.path.join(self.download_dir, current_try)):
 542                 self.db.remove_foldername_if_deleted_channel(current_try)
 543
 544             if self.db.channel_foldername_exists(current_try):
 545                 current_try = '%s (%d)' % (foldername, next_try_id)
 546                 next_try_id += 1
 547             else:
 548                 return current_try
 549
 550     def get_save_dir(self):
 551         urldigest = hashlib.md5(self.url).hexdigest()
 552         sanitizedurl = util.sanitize_filename(self.url, self.MAX_FOLDERNAME_LENGTH)
 553         if self.foldername is None or (self.auto_foldername and (self.foldername == urldigest or self.foldername.startswith(sanitizedurl))):
 554             # we must change the folder name, because it has not been set manually
 555             fn_template = util.sanitize_filename(self.title, self.MAX_FOLDERNAME_LENGTH)
 556
 557             # if this is an empty string, try the basename
 558             if len(fn_template) == 0:
 559                 log('That is one ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
 560                 fn_template = util.sanitize_filename(os.path.basename(self.url), self.MAX_FOLDERNAME_LENGTH)
 561
 562             # If the basename is also empty, use the first 6 md5 hexdigest chars of the URL
 563             if len(fn_template) == 0:
 564                 log('That is one REALLY ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
 565                 fn_template = urldigest # no need for sanitize_filename here
 566
 567             # Find a unique folder name for this podcast
 568             wanted_foldername = self.find_unique_folder_name(fn_template)
 569
 570             # if the foldername has not been set, check if the (old) md5 filename exists
 571             if self.foldername is None and os.path.exists(os.path.join(self.download_dir, urldigest)):
 572                 log('Found pre-0.15.0 download folder for %s: %s', self.title, urldigest, sender=self)
 573                 self.foldername = urldigest
 574
 575             # we have a valid, new folder name in "current_try" -> use that!
 576             if self.foldername is not None and wanted_foldername != self.foldername:
 577                 # there might be an old download folder crawling around - move it!
 578                 new_folder_name = os.path.join(self.download_dir, wanted_foldername)
 579                 old_folder_name = os.path.join(self.download_dir, self.foldername)
 580                 if os.path.exists(old_folder_name):
 581                     if not os.path.exists(new_folder_name):
 582                         # Old folder exists, new folder does not -> simply rename
 583                         log('Renaming %s => %s', old_folder_name, new_folder_name, sender=self)
 584                         os.rename(old_folder_name, new_folder_name)
 585                     else:
 586                         # Both folders exist -> move files and delete old folder
 587                         log('Moving files from %s to %s', old_folder_name, new_folder_name, sender=self)
 588                         for file in glob.glob(os.path.join(old_folder_name, '*')):
 589                             shutil.move(file, new_folder_name)
 590                         log('Removing %s', old_folder_name, sender=self)
 591                         shutil.rmtree(old_folder_name, ignore_errors=True)
 592             log('Updating foldername of %s to "%s".', self.url, wanted_foldername, sender=self)
 593             self.foldername = wanted_foldername
 594             self.save()
 595
 596         save_dir = os.path.join(self.download_dir, self.foldername)
 597
 598         # Create save_dir if it does not yet exist
 599         if not util.make_directory( save_dir):
 600             log( 'Could not create save_dir: %s', save_dir, sender = self)
 601
 602         return save_dir
 603
 604     save_dir = property(fget=get_save_dir)
 605
 606     def remove_downloaded( self):
 607         shutil.rmtree( self.save_dir, True)
 608
 609     @property
 610     def cover_file(self):
 611         new_name = os.path.join(self.save_dir, 'folder.jpg')
 612         if not os.path.exists(new_name):
 613             old_names = ('cover', '.cover')
 614             for old_name in old_names:
 615                 filename = os.path.join(self.save_dir, old_name)
 616                 if os.path.exists(filename):
 617                     shutil.move(filename, new_name)
 618                     return new_name
 619
 620         return new_name
 621
 622     def delete_episode(self, episode):
 623         filename = episode.local_filename(create=False, check_only=True)
 624         if filename is not None:
 625             util.delete_file(filename)
 626
 627         episode.set_state(gpodder.STATE_DELETED)
 628
 629
 630 class PodcastEpisode(PodcastModelObject):
 631     """holds data for one object in a channel"""
 632     MAX_FILENAME_LENGTH = 200
 633
 634     def _get_played(self):
 635         return self.is_played
 636
 637     def _set_played(self, played):
 638         self.is_played = played
 639
 640     # Alias "is_played" to "played" for DB column mapping
 641     played = property(fget=_get_played, fset=_set_played)
 642
 643     def _get_locked(self):
 644         return self.is_locked
 645
 646     def _set_locked(self, locked):
 647         self.is_locked = locked
 648
 649     # Alias "is_locked" to "locked" for DB column mapping
 650     locked = property(fget=_get_locked, fset=_set_locked)
 651
 652     def _get_channel_id(self):
 653         return self.channel.id
 654
 655     def _set_channel_id(self, channel_id):
 656         assert self.channel.id == channel_id
 657
 658     # Accessor for the "channel_id" DB column
 659     channel_id = property(fget=_get_channel_id, fset=_set_channel_id)
 660
 661     @staticmethod
 662     def sort_by_pubdate(episodes, reverse=False):
 663         """Sort a list of PodcastEpisode objects chronologically
 664
 665         Returns a iterable, sorted sequence of the episodes
 666         """
 667         key_pubdate = lambda e: e.pubDate
 668         return sorted(episodes, key=key_pubdate, reverse=reverse)
 669
 670     def reload_from_db(self):
 671         """
 672         Re-reads all episode details for this object from the
 673         database and updates this object accordingly. Can be
 674         used to refresh existing objects when the database has
 675         been updated (e.g. the filename has been set after a
 676         download where it was not set before the download)
 677         """
 678         d = self.db.load_episode(self.id)
 679         self.update_from_dict(d or {})
 680         return self
 681
 682     def has_website_link(self):
 683         return bool(self.link) and (self.link != self.url)
 684
 685     @staticmethod
 686     def from_feedparser_entry(entry, channel):
 687         episode = PodcastEpisode(channel)
 688
 689         episode.title = entry.get('title', '')
 690         episode.link = entry.get('link', '')
 691         episode.description = entry.get('summary', '')
 692
 693         # Fallback to subtitle if summary is not available0
 694         if not episode.description:
 695             episode.description = entry.get('subtitle', '')
 696
 697         episode.guid = entry.get('id', '')
 698         if entry.get('updated_parsed', None):
 699             episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,))
 700
 701         # Enclosures
 702         for e in entry.get('enclosures', ()):
 703             episode.mimetype = e.get('type', 'application/octet-stream')
 704             if '/' not in episode.mimetype:
 705                 continue
 706
 707             episode.url = util.normalize_feed_url(e.get('href', ''))
 708             if not episode.url:
 709                 continue
 710
 711             try:
 712                 episode.length = int(e.length) or -1
 713             except:
 714                 episode.length = -1
 715
 716             return episode
 717
 718         # Media RSS content
 719         for m in entry.get('media_content', ()):
 720             episode.mimetype = m.get('type', 'application/octet-stream')
 721             if '/' not in episode.mimetype:
 722                 continue
 723
 724             episode.url = util.normalize_feed_url(m.get('url', ''))
 725             if not episode.url:
 726                 continue
 727
 728             try:
 729                 episode.length = int(m.fileSize) or -1
 730             except:
 731                 episode.length = -1
 732
 733             return episode
 734
 735         # Brute-force detection of any links
 736         for l in entry.get('links', ()):
 737             episode.url = util.normalize_feed_url(l.get('href', ''))
 738             if not episode.url:
 739                 continue
 740
 741             if youtube.is_video_link(episode.url):
 742                 return episode
 743
 744             # Check if we can resolve this link to a audio/video file
 745             filename, extension = util.filename_from_url(episode.url)
 746             file_type = util.file_type_by_extension(extension)
 747             if file_type is None and hasattr(l, 'type'):
 748                 extension = util.extension_from_mimetype(l.type)
 749                 file_type = util.file_type_by_extension(extension)
 750
 751             # The link points to a audio or video file - use it!
 752             if file_type is not None:
 753                 return episode
 754
 755         # Scan MP3 links in description text
 756         mp3s = re.compile(r'http://[^"]*\.mp3')
 757         for content in entry.get('content', ()):
 758             html = content.value
 759             for match in mp3s.finditer(html):
 760                 episode.url = match.group(0)
 761                 return episode
 762
 763         return None
 764
 765     def __init__(self, channel):
 766         self.db = channel.db
 767         # Used by Storage for faster saving
 768         self.id = None
 769         self.url = ''
 770         self.title = ''
 771         self.length = 0
 772         self.mimetype = 'application/octet-stream'
 773         self.guid = ''
 774         self.description = ''
 775         self.link = ''
 776         self.channel = channel
 777         self.pubDate = 0
 778         self.filename = None
 779         self.auto_filename = 1 # automatically generated filename
 780
 781         self.state = gpodder.STATE_NORMAL
 782         self.is_played = False
 783
 784         # Initialize the "is_locked" property
 785         self._is_locked = False
 786         self.is_locked = channel.channel_is_locked
 787
 788         # Time attributes
 789         self.total_time = 0
 790         self.current_position = 0
 791         self.current_position_updated = time.time()
 792
 793     def get_is_locked(self):
 794         return self._is_locked
 795
 796     def set_is_locked(self, is_locked):
 797         self._is_locked = bool(is_locked)
 798
 799     is_locked = property(fget=get_is_locked, fset=set_is_locked)
 800
 801     def save(self):
 802         if self.state != gpodder.STATE_DOWNLOADED and self.file_exists():
 803             self.state = gpodder.STATE_DOWNLOADED
 804         self.db.save_episode(self)
 805
 806     def on_downloaded(self, filename):
 807         self.state = gpodder.STATE_DOWNLOADED
 808         self.is_played = False
 809         self.length = os.path.getsize(filename)
 810         self.db.save_downloaded_episode(self)
 811         self.db.commit()
 812
 813     def set_state(self, state):
 814         self.state = state
 815         self.db.update_episode_state(self)
 816
 817     def mark(self, state=None, is_played=None, is_locked=None):
 818         if state is not None:
 819             self.state = state
 820         if is_played is not None:
 821             self.is_played = is_played
 822         if is_locked is not None:
 823             self.is_locked = is_locked
 824         self.db.update_episode_state(self)
 825
 826     @property
 827     def title_markup(self):
 828         return '%s\n<small>%s</small>' % (xml.sax.saxutils.escape(self.title),
 829                           xml.sax.saxutils.escape(self.channel.title))
 830
 831     @property
 832     def maemo_markup(self):
 833         if self.length > 0:
 834             length_str = '%s; ' % self.filesize_prop
 835         else:
 836             length_str = ''
 837         return ('<b>%s</b>\n<small>%s'+_('released %s')+ \
 838                 '; '+_('from %s')+'</small>') % (\
 839                 xml.sax.saxutils.escape(self.title), \
 840                 xml.sax.saxutils.escape(length_str), \
 841                 xml.sax.saxutils.escape(self.pubdate_prop), \
 842                 xml.sax.saxutils.escape(self.channel.title))
 843
 844     @property
 845     def maemo_remove_markup(self):
 846         if self.is_played:
 847             played_string = _('played')
 848         else:
 849             played_string = _('unplayed')
 850         downloaded_string = self.get_age_string()
 851         if not downloaded_string:
 852             downloaded_string = _('today')
 853         return ('<b>%s</b>\n<small>%s; %s; '+_('downloaded %s')+ \
 854                 '; '+_('from %s')+'</small>') % (\
 855                 xml.sax.saxutils.escape(self.title), \
 856                 xml.sax.saxutils.escape(self.filesize_prop), \
 857                 xml.sax.saxutils.escape(played_string), \
 858                 xml.sax.saxutils.escape(downloaded_string), \
 859                 xml.sax.saxutils.escape(self.channel.title))
 860
 861     def age_in_days(self):
 862         return util.file_age_in_days(self.local_filename(create=False, \
 863                 check_only=True))
 864
 865     def get_age_string(self):
 866         return util.file_age_to_string(self.age_in_days())
 867
 868     age_prop = property(fget=get_age_string)
 869
 870     def one_line_description( self):
 871         lines = util.remove_html_tags(self.description).strip().splitlines()
 872         if not lines or lines[0] == '':
 873             return _('No description available')
 874         else:
 875             return ' '.join(lines)
 876
 877     def delete_from_disk(self):
 878         try:
 879             self.channel.delete_episode(self)
 880         except:
 881             log('Cannot delete episode from disk: %s', self.title, traceback=True, sender=self)
 882
 883     def find_unique_file_name(self, url, filename, extension):
 884         current_try = util.sanitize_filename(filename, self.MAX_FILENAME_LENGTH)+extension
 885         next_try_id = 2
 886         lookup_url = None
 887
 888         if self.filename == current_try and current_try is not None:
 889             # We already have this filename - good!
 890             return current_try
 891
 892         while self.db.episode_filename_exists(current_try):
 893             current_try = '%s (%d)%s' % (filename, next_try_id, extension)
 894             next_try_id += 1
 895
 896         return current_try
 897
 898     def local_filename(self, create, force_update=False, check_only=False,
 899             template=None):
 900         """Get (and possibly generate) the local saving filename
 901
 902         Pass create=True if you want this function to generate a
 903         new filename if none exists. You only want to do this when
 904         planning to create/download the file after calling this function.
 905
 906         Normally, you should pass create=False. This will only
 907         create a filename when the file already exists from a previous
 908         version of gPodder (where we used md5 filenames). If the file
 909         does not exist (and the filename also does not exist), this
 910         function will return None.
 911
 912         If you pass force_update=True to this function, it will try to
 913         find a new (better) filename and move the current file if this
 914         is the case. This is useful if (during the download) you get
 915         more information about the file, e.g. the mimetype and you want
 916         to include this information in the file name generation process.
 917
 918         If check_only=True is passed to this function, it will never try
 919         to rename the file, even if would be a good idea. Use this if you
 920         only want to check if a file exists.
 921
 922         If "template" is specified, it should be a filename that is to
 923         be used as a template for generating the "real" filename.
 924
 925         The generated filename is stored in the database for future access.
 926         """
 927         ext = self.extension(may_call_local_filename=False).encode('utf-8', 'ignore')
 928
 929         # For compatibility with already-downloaded episodes, we
 930         # have to know md5 filenames if they are downloaded already
 931         urldigest = hashlib.md5(self.url).hexdigest()
 932
 933         if not create and self.filename is None:
 934             urldigest_filename = os.path.join(self.channel.save_dir, urldigest+ext)
 935             if os.path.exists(urldigest_filename):
 936                 # The file exists, so set it up in our database
 937                 log('Recovering pre-0.15.0 file: %s', urldigest_filename, sender=self)
 938                 self.filename = urldigest+ext
 939                 self.auto_filename = 1
 940                 self.save()
 941                 return urldigest_filename
 942             return None
 943
 944         # We only want to check if the file exists, so don't try to
 945         # rename the file, even if it would be reasonable. See also:
 946         # http://bugs.gpodder.org/attachment.cgi?id=236
 947         if check_only:
 948             if self.filename is None:
 949                 return None
 950             else:
 951                 return os.path.join(self.channel.save_dir, self.filename)
 952
 953         if self.filename is None or force_update or (self.auto_filename and self.filename == urldigest+ext):
 954             # Try to find a new filename for the current file
 955             if template is not None:
 956                 # If template is specified, trust the template's extension
 957                 episode_filename, ext = os.path.splitext(template)
 958             else:
 959                 episode_filename, extension_UNUSED = util.filename_from_url(self.url)
 960             fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
 961
 962             if 'redirect' in fn_template and template is None:
 963                 # This looks like a redirection URL - force URL resolving!
 964                 log('Looks like a redirection to me: %s', self.url, sender=self)
 965                 url = util.get_real_url(self.channel.authenticate_url(self.url))
 966                 log('Redirection resolved to: %s', url, sender=self)
 967                 (episode_filename, extension_UNUSED) = util.filename_from_url(url)
 968                 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
 969
 970             # Use the video title for YouTube downloads
 971             for yt_url in ('http://youtube.com/', 'http://www.youtube.com/'):
 972                 if self.url.startswith(yt_url):
 973                     fn_template = util.sanitize_filename(os.path.basename(self.title), self.MAX_FILENAME_LENGTH)
 974
 975             # If the basename is empty, use the md5 hexdigest of the URL
 976             if len(fn_template) == 0 or fn_template.startswith('redirect.'):
 977                 log('Report to bugs.gpodder.org: Podcast at %s with episode URL: %s', self.channel.url, self.url, sender=self)
 978                 fn_template = urldigest
 979
 980             # Find a unique filename for this episode
 981             wanted_filename = self.find_unique_file_name(self.url, fn_template, ext)
 982
 983             # We populate the filename field the first time - does the old file still exist?
 984             if self.filename is None and os.path.exists(os.path.join(self.channel.save_dir, urldigest+ext)):
 985                 log('Found pre-0.15.0 downloaded file: %s', urldigest, sender=self)
 986                 self.filename = urldigest+ext
 987
 988             # The old file exists, but we have decided to want a different filename
 989             if self.filename is not None and wanted_filename != self.filename:
 990                 # there might be an old download folder crawling around - move it!
 991                 new_file_name = os.path.join(self.channel.save_dir, wanted_filename)
 992                 old_file_name = os.path.join(self.channel.save_dir, self.filename)
 993                 if os.path.exists(old_file_name) and not os.path.exists(new_file_name):
 994                     log('Renaming %s => %s', old_file_name, new_file_name, sender=self)
 995                     os.rename(old_file_name, new_file_name)
 996                 elif force_update and not os.path.exists(old_file_name):
 997                     # When we call force_update, the file might not yet exist when we
 998                     # call it from the downloading code before saving the file
 999                     log('Choosing new filename: %s', new_file_name, sender=self)
1000                 else:
1001                     log('Warning: %s exists or %s does not.', new_file_name, old_file_name, sender=self)
1002                 log('Updating filename of %s to "%s".', self.url, wanted_filename, sender=self)
1003             elif self.filename is None:
1004                 log('Setting filename to "%s".', wanted_filename, sender=self)
1005             else:
1006                 log('Should update filename. Stays the same (%s). Good!', \
1007                         wanted_filename, sender=self)
1008             self.filename = wanted_filename
1009             self.save()
1010             self.db.commit()
1011
1012         return os.path.join(self.channel.save_dir, self.filename)
1013
1014     def set_mimetype(self, mimetype, commit=False):
1015         """Sets the mimetype for this episode"""
1016         self.mimetype = mimetype
1017         if commit:
1018             self.db.commit()
1019
1020     def extension(self, may_call_local_filename=True):
1021         filename, ext = util.filename_from_url(self.url)
1022         if may_call_local_filename:
1023             filename = self.local_filename(create=False)
1024             if filename is not None:
1025                 filename, ext = os.path.splitext(filename)
1026         # if we can't detect the extension from the url fallback on the mimetype
1027         if ext == '' or util.file_type_by_extension(ext) is None:
1028             ext = util.extension_from_mimetype(self.mimetype)
1029         return ext
1030
1031     def check_is_new(self, downloading=lambda e: False):
1032         """
1033         Returns True if this episode is to be considered new.
1034         "Downloading" should be a callback that gets an episode
1035         as its parameter and returns True if the episode is
1036         being downloaded at the moment.
1037         """
1038         return self.state == gpodder.STATE_NORMAL and \
1039                 not self.is_played and \
1040                 not downloading(self)
1041
1042     def mark_new(self):
1043         self.state = gpodder.STATE_NORMAL
1044         self.is_played = False
1045         self.db.update_episode_state(self)
1046
1047     def mark_old(self):
1048         self.is_played = True
1049         self.db.update_episode_state(self)
1050
1051     def file_exists(self):
1052         filename = self.local_filename(create=False, check_only=True)
1053         if filename is None:
1054             return False
1055         else:
1056             return os.path.exists(filename)
1057
1058     def was_downloaded(self, and_exists=False):
1059         if self.state != gpodder.STATE_DOWNLOADED:
1060             return False
1061         if and_exists and not self.file_exists():
1062             return False
1063         return True
1064
1065     def sync_filename(self, use_custom=False, custom_format=None):
1066         if use_custom:
1067             return util.object_string_formatter(custom_format,
1068                     episode=self, podcast=self.channel)
1069         else:
1070             return self.title
1071
1072     def file_type(self):
1073         # Assume all YouTube links are video files
1074         if youtube.is_video_link(self.url):
1075             return 'video'
1076
1077         return util.file_type_by_extension(self.extension())
1078
1079     @property
1080     def basename( self):
1081         return os.path.splitext( os.path.basename( self.url))[0]
1082
1083     @property
1084     def published( self):
1085         """
1086         Returns published date as YYYYMMDD (or 00000000 if not available)
1087         """
1088         try:
1089             return datetime.datetime.fromtimestamp(self.pubDate).strftime('%Y%m%d')
1090         except:
1091             log( 'Cannot format pubDate for "%s".', self.title, sender = self)
1092             return '00000000'
1093
1094     @property
1095     def pubtime(self):
1096         """
1097         Returns published time as HHMM (or 0000 if not available)
1098         """
1099         try:
1100             return datetime.datetime.fromtimestamp(self.pubDate).strftime('%H%M')
1101         except:
1102             log('Cannot format pubDate (time) for "%s".', self.title, sender=self)
1103             return '0000'
1104
1105     def cute_pubdate(self):
1106         result = util.format_date(self.pubDate)
1107         if result is None:
1108             return '(%s)' % _('unknown')
1109         else:
1110             return result
1111
1112     pubdate_prop = property(fget=cute_pubdate)
1113
1114     def calculate_filesize( self):
1115         filename = self.local_filename(create=False)
1116         if filename is None:
1117             log('calculate_filesized called, but filename is None!', sender=self)
1118         try:
1119             self.length = os.path.getsize(filename)
1120         except:
1121             log( 'Could not get filesize for %s.', self.url)
1122
1123     def get_filesize_string(self):
1124         return util.format_filesize(self.length)
1125
1126     filesize_prop = property(fget=get_filesize_string)
1127
1128     def get_played_string( self):
1129         if not self.is_played:
1130             return _('Unplayed')
1131
1132         return ''
1133
1134     played_prop = property(fget=get_played_string)
1135
1136     def is_duplicate(self, episode):
1137         if self.title == episode.title and self.pubDate == episode.pubDate:
1138             log('Possible duplicate detected: %s', self.title)
1139             return True
1140         return False
1141
1142     def duplicate_id(self):
1143         return hash((self.title, self.pubDate))
1144
1145     def update_from(self, episode):
1146         for k in ('title', 'url', 'description', 'link', 'pubDate', 'guid'):
1147             setattr(self, k, getattr(episode, k))
1148